Line data Source code
1 : /*
2 : * Copyright(c) 2019 Intel Corporation
3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 : */
5 :
6 : #include <time.h>
7 : #include <stdio.h>
8 :
9 : #ifdef _WIN32
10 : #include <windows.h>
11 : #else
12 : #include <stdlib.h>
13 : #include <sys/time.h>
14 : #endif
15 :
16 : #include "EbDefinitions.h"
17 : #include "EbUtility.h"
18 : #include "EbTime.h"
19 : /********************************************************************************************
20 : * faster memcopy for <= 64B blocks, great w/ inlining and size known at compile time (or w/ PGO)
21 : * THIS NEEDS TO STAY IN A HEADER FOR BEST PERFORMANCE
22 : ********************************************************************************************/
23 :
24 : #include <immintrin.h>
25 :
26 : #if defined(__GNUC__) && !defined(__clang__) && !defined(__ICC__)
27 : __attribute__((optimize("unroll-loops")))
28 : #endif
29 41219600 : static void eb_memcpy_small(void* dst_ptr, void const* src_ptr, size_t size) {
30 41219600 : const char* src = (const char*)src_ptr;
31 41219600 : char* dst = (char*)dst_ptr;
32 41219600 : size_t i = 0;
33 :
34 : #ifdef _INTEL_COMPILER
35 : #pragma unroll
36 : #endif
37 58065300 : while ((i + 16) <= size)
38 : {
39 33691300 : _mm_storeu_ps((float*)(dst + i), _mm_loadu_ps((const float*)(src + i)));
40 16845700 : i += 16;
41 : }
42 :
43 41219600 : if ((i + 8) <= size)
44 : {
45 22896700 : _mm_store_sd((double*)(dst + i), _mm_load_sd((const double*)(src + i)));
46 11448400 : i += 8;
47 : }
48 :
49 125654000 : for (; i < size; ++i)
50 84433900 : dst[i] = src[i];
51 41219600 : }
52 : #define EB_MIN(a,b) (((a) < (b)) ? (a) : (b))
53 599669 : static void eb_memcpy_sse(void* dst_ptr, void const* src_ptr, size_t size) {
54 599669 : const char* src = (const char*)src_ptr;
55 599669 : char* dst = (char*)dst_ptr;
56 599669 : size_t i = 0;
57 599669 : size_t align_cnt = EB_MIN((64 - ((size_t)dst & 63)), size);
58 :
59 : // align dest to a $line
60 599669 : if (align_cnt != 64) {
61 488674 : eb_memcpy_small(dst, src, align_cnt);
62 488705 : dst += align_cnt;
63 488705 : src += align_cnt;
64 488705 : size -= align_cnt;
65 : }
66 :
67 : // copy a $line at a time
68 : // dst aligned to a $line
69 599700 : size_t cline_cnt = (size & ~(size_t)63);
70 4933790 : for (i = 0; i < cline_cnt; i += 64) {
71 4334090 : __m128 c0 = _mm_loadu_ps((const float*)(src + i));
72 4334090 : __m128 c1 = _mm_loadu_ps((const float*)(src + i + sizeof(c0)));
73 4334090 : __m128 c2 = _mm_loadu_ps((const float*)(src + i + sizeof(c0) * 2));
74 4334090 : __m128 c3 = _mm_loadu_ps((const float*)(src + i + sizeof(c0) * 3));
75 :
76 4334090 : _mm_storeu_ps((float*)(dst + i), c0);
77 4334090 : _mm_storeu_ps((float*)(dst + i + sizeof(c0)), c1);
78 4334090 : _mm_storeu_ps((float*)(dst + i + sizeof(c0) * 2), c2);
79 4334090 : _mm_storeu_ps((float*)(dst + i + sizeof(c0) * 3), c3);
80 : }
81 :
82 : // copy the remainder
83 599700 : if (i < size)
84 486248 : eb_memcpy_small(dst + i, src + i, size - i);
85 599640 : }
86 40845900 : void eb_memcpy(void *dst_ptr, void *src_ptr, size_t size) {
87 40845900 : if (size > 64)
88 599702 : eb_memcpy_sse(dst_ptr, src_ptr, size);
89 : else
90 40246200 : eb_memcpy_small(dst_ptr, src_ptr, size);
91 40846900 : }
92 : /*****************************************
93 : * Z-Order
94 : *****************************************/
95 : static TxSize blocksize_to_txsize[BlockSizeS_ALL] = {
96 : TX_4X4 , // BLOCK_4X4
97 : TX_4X8 , // BLOCK_4X8
98 : TX_8X4 , // BLOCK_8X4
99 : TX_8X8 , // BLOCK_8X8
100 : TX_8X16 , // BLOCK_8X16
101 : TX_16X8 , // BLOCK_16X8
102 : TX_16X16 , // BLOCK_16X16
103 : TX_16X32 , // BLOCK_16X32
104 : TX_32X16 , // BLOCK_32X16
105 : TX_32X32 , // BLOCK_32X32
106 : TX_32X64 , // BLOCK_32X64
107 : TX_64X32 , // BLOCK_64X32
108 : TX_64X64 , // BLOCK_64X64
109 : TX_64X64 , // BLOCK_64X128
110 : TX_64X64 , // BLOCK_128X64
111 : TX_64X64 , // BLOCK_128X128
112 : TX_4X16 , // BLOCK_4X16
113 : TX_16X4 , // BLOCK_16X4
114 : TX_8X32 , // BLOCK_8X32
115 : TX_32X8 , // BLOCK_32X8
116 : TX_16X64 , // BLOCK_16X64
117 : TX_64X16 // BLOCK_64X16
118 : };
119 0 : EbErrorType z_order_increment(
120 : uint32_t *x_loc, // x location, level agnostic
121 : uint32_t *y_loc) // y location, level agnostic
122 : {
123 0 : EbErrorType return_error = EB_ErrorNone;
124 : uint32_t mask;
125 :
126 : // The basic idea of this function is to increment an x,y coordinate
127 : // that has had its size removed to the next z-coding order location.
128 : //
129 : // In a four quadrant partition, the z coding order is [0,0], [1,0], [0,1], [1,1]
130 : // Some observations (only looking at one bit position or the LSB) are:
131 : // 1. X is always toggled (achieved with X ^= 0x1)
132 : // 2. Y can be toggled with (Y = Y ^ X)
133 : // 3. Recall that a value XOR'ed with 1 toggles, and XOR'ed with 0 stays the same
134 : //
135 : // Extending this logic is somewhat trickier. The two main observations to make are
136 : // 4. The LSB of X and Y are always progressed.
137 : // 5. Every other bit-position, N, other than the LSB are progressed in their state
138 : // when the N-1 bit position resets back to [0,0].
139 : //
140 : // From 5, we can infer the need of a "progression mask" of the form 0x1, 0x3, 0x7, 0xF, etc.
141 : // The first step of contructing the mask is to find which bit positions are ready to
142 : // reset (found by X & Y) and setting the LSB of the mask to 1 (the LSB always progresses).
143 : // The second step is to eliminate all ones from the mask above the lowest-ordered zero bit.
144 : // Note we can achieve more precision in the second mask step by more masking-out operations,
145 : // but for a 64 -> 4 (5 steps), the precision below is sufficient.
146 : //
147 : // Finally, X and Y are progressed only at the bit-positions in the mask.
148 :
149 0 : mask = ((*x_loc & *y_loc) << 1) | 0x1;
150 0 : mask &= (mask << 1) | 0x01;
151 0 : mask &= (mask << 2) | 0x03;
152 0 : mask &= (mask << 4) | 0x0F;
153 0 : mask &= (mask << 8) | 0xFF;
154 :
155 0 : *y_loc ^= *x_loc & mask;
156 0 : *x_loc ^= mask;
157 :
158 0 : return return_error;
159 : }
160 :
161 : /*****************************************
162 : * Z-Order Increment with Level
163 : * This is the main function for progressing
164 : * through a treeblock's coding units. To get
165 : * the true CU size, multiple the x_loc, y_loc
166 : * by the smallest CU size.
167 : *****************************************/
168 0 : void ZOrderIncrementWithLevel(
169 : uint32_t *x_loc, // x location, units of smallest block size
170 : uint32_t *y_loc, // y location, units of smallest block size
171 : uint32_t *level, // level, number of block size-steps from the smallest block size
172 : // (e.g. if 8x8 = level 0, 16x16 = level 1, 32x32 == level 2, 64x64 == level 3)
173 : uint32_t *index) // The CU index, can be used to index a lookup table (see get_coded_unit_stats)
174 : {
175 : uint32_t mask;
176 :
177 : // The basic idea of this function is to increment an x,y coordinate
178 : // that has had its size removed to the next z-coding order location.
179 : //
180 : // In a four quadrant partition, the z coding order is [0,0], [1,0], [0,1], [1,1]
181 : // Some observations (only looking at one bit position or the LSB) are:
182 : // 1. X is always toggled (achieved with X ^= 0x1)
183 : // 2. Y can be toggled with (Y = Y ^ X)
184 : // 3. Recall that a value XOR'ed with 1 toggles, and XOR'ed with 0 stays the same
185 : //
186 : // Extending this logic is somewhat trickier. The two main observations to make are
187 : // 4. The LSB of X and Y are always progressed.
188 : // 5. Every other bit-position, N, other than the LSB are progressed in their state
189 : // when the N-1 bit position resets back to [0,0].
190 : //
191 : // From 5, we can infer the need of a "progression mask" of the form 0x1, 0x3, 0x7, 0xF, etc.
192 : // The first step of contructing the mask is to find which bit positions are ready to
193 : // reset (found by X & Y) and setting the LSB of the mask to 1 (the LSB always progresses).
194 : // The second step is to eliminate all ones from the mask above the lowest-ordered zero bit.
195 : // Note we can achieve more precision in the second mask step by more masking-out operations,
196 : // but for a 64 -> 4 (5 steps), the precision below is sufficient.
197 : //
198 : // Finally, X and Y are progressed only at the bit-positions in the mask.
199 :
200 : // Seed the mask
201 0 : mask = ((*x_loc & *y_loc) << 1) | 0x1;
202 :
203 : // This step zero-outs the mask if level is not zero.
204 : // The purpose of this is step further down the tree
205 : // if not already at the bottom of the tree
206 : // Equivalent to: mask = (level > 0) ? mask : 0;
207 0 : mask &= (uint32_t)(-(*level == 0));
208 :
209 : // Construct the mask
210 0 : mask &= (mask << 1) | 0x01;
211 0 : mask &= (mask << 2) | 0x03;
212 0 : mask &= (mask << 4) | 0x0F;
213 0 : mask &= (mask << 8) | 0xFF;
214 :
215 : // Decrement the level if not already at the bottom of the tree
216 : // Equivalent to level = (level > 0) ? level - 1 : 0;
217 0 : *level = (*level - 1) & -(*level > 0);
218 :
219 : // If at one of the "corner" positions where the mask > 1, we
220 : // need to increase the level since larger blocks are processed
221 : // before smaller blocks. Note that by using mask, we are protected
222 : // against inadvertently incrementing the level if not already at
223 : // the bottom of the tree. The level increment should really be
224 : // Log2f(mask >> 1), but since there are only 3 valid positions,
225 : // we are using a cheesy Log2f approximation
226 : // Equivalent to: level += (mask > 3) ? 2 : mask >> 1;
227 :
228 0 : *level += ((2 ^ (mask >> 1)) & -(mask > 3)) ^ (mask >> 1);
229 :
230 : // Increment the x_loc, y_loc. Note that this only occurs when
231 : // we are at the bottom of the tree.
232 0 : *y_loc ^= *x_loc & mask;
233 0 : *x_loc ^= mask;
234 :
235 : // Increment the index. Note that the natural progression of this
236 : // block aligns with how leafs are stored in the accompanying
237 : // CU data structures.
238 0 : ++(*index);
239 :
240 0 : return;
241 : }
242 :
243 : static CodedUnitStats CodedUnitStatsArray[] = {
244 : // Depth Size SizeLog2 OriginX OriginY cu_num_in_depth Index
245 : {0, 64, 6, 0, 0, 0 , 0 }, // 0
246 : {1, 32, 5, 0, 0, 0 , 1 }, // 1
247 : {2, 16, 4, 0, 0, 0 , 1 }, // 2
248 : {3, 8, 3, 0, 0, 0 , 1 }, // 3
249 : {3, 8, 3, 8, 0, 1 , 1 }, // 4
250 : {3, 8, 3, 0, 8, 8 , 1 }, // 5
251 : {3, 8, 3, 8, 8, 9 , 1 }, // 6
252 : {2, 16, 4, 16, 0, 1 , 1 }, // 7
253 : {3, 8, 3, 16, 0, 2 , 1 }, // 8
254 : {3, 8, 3, 24, 0, 3 , 1 }, // 9
255 : {3, 8, 3, 16, 8, 10 , 1 }, // 10
256 : {3, 8, 3, 24, 8, 11 , 1 }, // 11
257 : {2, 16, 4, 0, 16, 4 , 1 }, // 12
258 : {3, 8, 3, 0, 16, 16 , 1 }, // 13
259 : {3, 8, 3, 8, 16, 17 , 1 }, // 14
260 : {3, 8, 3, 0, 24, 24 , 1 }, // 15
261 : {3, 8, 3, 8, 24, 25 , 1 }, // 16
262 : {2, 16, 4, 16, 16, 5 , 1 }, // 17
263 : {3, 8, 3, 16, 16, 18 , 1 }, // 18
264 : {3, 8, 3, 24, 16, 19 , 1 }, // 19
265 : {3, 8, 3, 16, 24, 26 , 1 }, // 20
266 : {3, 8, 3, 24, 24, 27 , 1 }, // 21
267 : {1, 32, 5, 32, 0, 1 , 2 }, // 22
268 : {2, 16, 4, 32, 0, 2 , 2 }, // 23
269 : {3, 8, 3, 32, 0, 4 , 2 }, // 24
270 : {3, 8, 3, 40, 0, 5 , 2 }, // 25
271 : {3, 8, 3, 32, 8, 12 , 2 }, // 26
272 : {3, 8, 3, 40, 8, 13 , 2 }, // 27
273 : {2, 16, 4, 48, 0, 3 , 2 }, // 28
274 : {3, 8, 3, 48, 0, 6 , 2 }, // 29
275 : {3, 8, 3, 56, 0, 7 , 2 }, // 30
276 : {3, 8, 3, 48, 8, 14 , 2 }, // 31
277 : {3, 8, 3, 56, 8, 15 , 2 }, // 32
278 : {2, 16, 4, 32, 16, 6 , 2 }, // 33
279 : {3, 8, 3, 32, 16, 20 , 2 }, // 34
280 : {3, 8, 3, 40, 16, 21 , 2 }, // 35
281 : {3, 8, 3, 32, 24, 28 , 2 }, // 36
282 : {3, 8, 3, 40, 24, 29 , 2 }, // 37
283 : {2, 16, 4, 48, 16, 7 , 2 }, // 38
284 : {3, 8, 3, 48, 16, 22 , 2 }, // 39
285 : {3, 8, 3, 56, 16, 23 , 2 }, // 40
286 : {3, 8, 3, 48, 24, 30 , 2 }, // 41
287 : {3, 8, 3, 56, 24, 31 , 2 }, // 42
288 : {1, 32, 5, 0, 32, 2 , 3 }, // 43
289 : {2, 16, 4, 0, 32, 8 , 3 }, // 44
290 : {3, 8, 3, 0, 32, 32 , 3 }, // 45
291 : {3, 8, 3, 8, 32, 33 , 3 }, // 46
292 : {3, 8, 3, 0, 40, 40 , 3 }, // 47
293 : {3, 8, 3, 8, 40, 41 , 3 }, // 48
294 : {2, 16, 4, 16, 32, 9 , 3 }, // 49
295 : {3, 8, 3, 16, 32, 34 , 3 }, // 50
296 : {3, 8, 3, 24, 32, 35 , 3 }, // 51
297 : {3, 8, 3, 16, 40, 42 , 3 }, // 52
298 : {3, 8, 3, 24, 40, 43 , 3 }, // 53
299 : {2, 16, 4, 0, 48, 12 , 3 }, // 54
300 : {3, 8, 3, 0, 48, 48 , 3 }, // 55
301 : {3, 8, 3, 8, 48, 49 , 3 }, // 56
302 : {3, 8, 3, 0, 56, 56 , 3 }, // 57
303 : {3, 8, 3, 8, 56, 57 , 3 }, // 58
304 : {2, 16, 4, 16, 48, 13 , 3 }, // 59
305 : {3, 8, 3, 16, 48, 50 , 3 }, // 60
306 : {3, 8, 3, 24, 48, 51 , 3 }, // 61
307 : {3, 8, 3, 16, 56, 58 , 3 }, // 62
308 : {3, 8, 3, 24, 56, 59 , 3 }, // 63
309 : {1, 32, 5, 32, 32, 3 , 4 }, // 64
310 : {2, 16, 4, 32, 32, 10 , 4 }, // 65
311 : {3, 8, 3, 32, 32, 36 , 4 }, // 66
312 : {3, 8, 3, 40, 32, 37 , 4 }, // 67
313 : {3, 8, 3, 32, 40, 44 , 4 }, // 68
314 : {3, 8, 3, 40, 40, 45 , 4 }, // 69
315 : {2, 16, 4, 48, 32, 11 , 4 }, // 70
316 : {3, 8, 3, 48, 32, 38 , 4 }, // 71
317 : {3, 8, 3, 56, 32, 39 , 4 }, // 72
318 : {3, 8, 3, 48, 40, 46 , 4 }, // 73
319 : {3, 8, 3, 56, 40, 47 , 4 }, // 74
320 : {2, 16, 4, 32, 48, 14 , 4 }, // 75
321 : {3, 8, 3, 32, 48, 52 , 4 }, // 76
322 : {3, 8, 3, 40, 48, 53 , 4 }, // 77
323 : {3, 8, 3, 32, 56, 60 , 4 }, // 78
324 : {3, 8, 3, 40, 56, 61 , 4 }, // 79
325 : {2, 16, 4, 48, 48, 15 , 4 }, // 80
326 : {3, 8, 3, 48, 48, 54 , 4 }, // 81
327 : {3, 8, 3, 56, 48, 55 , 4 }, // 82
328 : {3, 8, 3, 48, 56, 62 , 4 }, // 83
329 : {3, 8, 3, 56, 56, 63 , 4 } // 84
330 : };
331 :
332 : /**************************************************************
333 : * Get Coded Unit Statistics
334 : **************************************************************/
335 852509 : const CodedUnitStats* get_coded_unit_stats(const uint32_t cuIdx)
336 : {
337 : //ASSERT(cuIdx < CU_MAX_COUNT && "get_coded_unit_stats: Out-of-range CU Idx\n");
338 852509 : if (cuIdx == 255)
339 0 : printf("Invalid CuIndex\n");
340 :
341 852566 : return &CodedUnitStatsArray[cuIdx];
342 : }
343 :
344 : static const TransformUnitStats TransformUnitStatsArray[] = {
345 : //
346 : // depth
347 : // /
348 : // / offset_x (units of the current depth)
349 : // / /
350 : // / / offset_y (units of the current depth)
351 : // / / /
352 : {0, 0, 0}, // 0
353 : {1, 0, 0}, // 1
354 : {1, 2, 0}, // 2
355 : {1, 0, 2}, // 3
356 : {1, 2, 2}, // 4
357 : {2, 0, 0}, // 5
358 : {2, 1, 0}, // 6
359 : {2, 0, 1}, // 7
360 : {2, 1, 1}, // 8
361 : {2, 2, 0}, // 9
362 : {2, 3, 0}, // 10
363 : {2, 2, 1}, // 11
364 : {2, 3, 1}, // 12
365 : { 2, 0, 2}, // 13
366 : { 2, 1, 2}, // 14
367 : { 2, 0, 3}, // 15
368 : { 2, 1, 3}, // 16
369 : { 2, 2, 2}, // 17
370 : { 2, 3, 2}, // 18
371 : { 2, 2, 3}, // 19
372 : { 2, 3, 3}, // 20
373 : {0xFF, 0xFF, 0xFF} // Invalid
374 : };
375 :
376 : /**************************************************************
377 : * Get Transform Unit Statistics
378 : **************************************************************/
379 0 : const TransformUnitStats* get_transform_unit_stats(const uint32_t tuIdx)
380 : {
381 0 : return &TransformUnitStatsArray[tuIdx];
382 : }
383 :
384 : /*****************************************
385 : * Integer Log 2
386 : * This is a quick adaptation of a Number
387 : * Leading Zeros (NLZ) algorithm to get
388 : * the log2f of an integer
389 : *****************************************/
390 : /*uint32_t Log2f(uint32_t x)
391 : {
392 : uint32_t y;
393 : int32_t n = 32, c = 16;
394 :
395 : do {
396 : y = x >> c;
397 : if (y > 0) {
398 : n -= c;
399 : x = y;
400 : }
401 : c >>= 1;
402 : } while (c > 0);
403 :
404 : return 32 - n;
405 : }*/
406 :
407 : /*****************************************
408 : * Long Log 2
409 : * This is a quick adaptation of a Number
410 : * Leading Zeros (NLZ) algorithm to get
411 : * the log2f of a 64-bit number
412 : *****************************************/
413 0 : inline uint64_t Log2f64(uint64_t x)
414 : {
415 : uint64_t y;
416 0 : int64_t n = 64, c = 32;
417 :
418 : do {
419 0 : y = x >> c;
420 0 : if (y > 0) {
421 0 : n -= c;
422 0 : x = y;
423 : }
424 0 : c >>= 1;
425 0 : } while (c > 0);
426 :
427 0 : return 64 - n;
428 : }
429 :
430 : /*****************************************
431 : * Endian Swap
432 : *****************************************/
433 0 : uint32_t endian_swap(uint32_t ui)
434 : {
435 : uint32_t ul2;
436 :
437 0 : ul2 = ui >> 24;
438 0 : ul2 |= (ui >> 8) & 0x0000ff00;
439 0 : ul2 |= (ui << 8) & 0x00ff0000;
440 0 : ul2 |= ui << 24;
441 :
442 0 : return ul2;
443 : }
444 :
445 0 : uint64_t log2f_high_precision(uint64_t x, uint8_t precision)
446 : {
447 0 : uint64_t sigBitLocation = Log2f64(x);
448 0 : uint64_t Remainder = x - ((uint64_t)1 << (uint8_t)sigBitLocation);
449 : uint64_t result;
450 :
451 0 : result = (sigBitLocation << precision) + ((Remainder << precision) / ((uint64_t)1 << (uint8_t)sigBitLocation));
452 :
453 0 : return result;
454 : }
455 :
456 : // concatenate two linked list, and return the pointer to the new concatenated list
457 120 : EbLinkedListNode* concat_eb_linked_list(EbLinkedListNode* a, EbLinkedListNode* b)
458 : {
459 120 : if (a)
460 : {
461 0 : while (a->next)
462 0 : a = a->next;
463 0 : a->next = b;
464 0 : return a;
465 : }
466 : else
467 120 : return b;
468 : }
469 :
470 : // split a linked list
471 120 : EbLinkedListNode* split_eb_linked_list(EbLinkedListNode* input, EbLinkedListNode** restLL, EbBool(*predicate_func)(EbLinkedListNode*))
472 : {
473 120 : EbLinkedListNode* llTruePtr = (EbLinkedListNode *)EB_NULL; // list of nodes satifying predicate_func(node) == TRUE
474 120 : EbLinkedListNode* llRestPtr = (EbLinkedListNode *)EB_NULL; // list of nodes satifying predicate_func(node) != TRUE
475 :
476 120 : while (input)
477 : {
478 0 : EbLinkedListNode* next = input->next;
479 0 : input->next = (EbLinkedListNode *)EB_NULL;
480 0 : if (predicate_func(input))
481 0 : llTruePtr = concat_eb_linked_list(input, llTruePtr);
482 : else
483 0 : llRestPtr = concat_eb_linked_list(input, llRestPtr);
484 0 : input = next;
485 : }
486 :
487 120 : *restLL = llRestPtr;
488 120 : return llTruePtr;
489 : }
490 :
491 : static const MiniGopStats MiniGopStatsArray[] = {
492 : // hierarchical_levels start_index end_index Lenght mini_gop_index
493 : { 5, 0, 31, 32 }, // 0
494 : { 4, 0, 15, 16 }, // 1
495 : { 3, 0, 7, 8 }, // 2
496 : { 2, 0, 3, 4 }, // 3
497 : { 2, 4, 7, 4 }, // 4
498 : { 3, 8, 15, 8 }, // 5
499 : { 2, 8, 11, 4 }, // 6
500 : { 2, 12, 15, 4 }, // 7
501 : { 4, 16, 31, 16 }, // 8
502 : { 3, 16, 23, 8 }, // 9
503 : { 2, 16, 19, 4 }, // 10
504 : { 2, 20, 23, 4 }, // 11
505 : { 3, 24, 31, 8 }, // 12
506 : { 2, 24, 27, 4 }, // 13
507 : { 2, 28, 31, 4 } // 14
508 : };
509 :
510 : /**************************************************************
511 : * Get Mini GOP Statistics
512 : **************************************************************/
513 270 : const MiniGopStats* get_mini_gop_stats(const uint32_t mini_gop_index)
514 : {
515 270 : return &MiniGopStatsArray[mini_gop_index];
516 : }
517 :
518 : uint32_t ns_quarter_off_mult[9/*Up to 9 part*/][2/*x+y*/][4/*Up to 4 ns blocks per part*/] =
519 : {
520 : //9 means not used.
521 :
522 : // | x | | y |
523 :
524 : /*P=0*/ { {0,9,9,9} , {0,9,9,9} },
525 : /*P=1*/ { {0,0,9,9} , {0,2,9,9} },
526 : /*P=2*/ { {0,2,9,9} , {0,0,9,9} },
527 : /*P=3*/ { {0,2,0,9} , {0,0,2,9} },
528 : /*P=4*/ { {0,0,2,9} , {0,2,2,9} },
529 : /*P=5*/ { {0,0,2,9} , {0,2,0,9} },
530 : /*P=6*/ { {0,2,2,9} , {0,0,2,9} },
531 : /*P=7*/ { {0,0,0,0} , {0,1,2,3} },
532 : /*P=8*/ { {0,1,2,3} , {0,0,0,0} }
533 : };
534 :
535 : uint32_t ns_quarter_size_mult[9/*Up to 9 part*/][2/*h+v*/][4/*Up to 4 ns blocks per part*/] =
536 : {
537 : //9 means not used.
538 :
539 : // | h | | v |
540 :
541 : /*P=0*/ { {4,9,9,9} , {4,9,9,9} },
542 : /*P=1*/ { {4,4,9,9} , {2,2,9,9} },
543 : /*P=2*/ { {2,2,9,9} , {4,4,9,9} },
544 : /*P=3*/ { {2,2,4,9} , {2,2,2,9} },
545 : /*P=4*/ { {4,2,2,9} , {2,2,2,9} },
546 : /*P=5*/ { {2,2,2,9} , {2,2,4,9} },
547 : /*P=6*/ { {2,2,2,9} , {4,2,2,9} },
548 : /*P=7*/ { {4,4,4,4} , {1,1,1,1} },
549 : /*P=8*/ { {1,1,1,1} , {4,4,4,4} }
550 : };
551 :
552 : BlockSize hvsize_to_bsize[/*H*/6][/*V*/6] =
553 : {
554 : { BLOCK_4X4, BLOCK_4X8, BLOCK_4X16, BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID },
555 : { BLOCK_8X4, BLOCK_8X8, BLOCK_8X16, BLOCK_8X32, BLOCK_INVALID, BLOCK_INVALID },
556 : { BLOCK_16X4, BLOCK_16X8, BLOCK_16X16, BLOCK_16X32, BLOCK_16X64, BLOCK_INVALID },
557 : { BLOCK_INVALID, BLOCK_32X8, BLOCK_32X16, BLOCK_32X32, BLOCK_32X64, BLOCK_INVALID },
558 : { BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X16, BLOCK_64X32, BLOCK_64X64, BLOCK_64X128 },
559 : { BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64, BLOCK_128X128 }
560 : };
561 :
562 : uint32_t max_sb = 64;
563 : uint32_t max_depth = 5;
564 : uint32_t max_part = 9;
565 : uint32_t max_num_active_blocks;
566 :
567 : //data could be organized in 2 forms: depth scan (dps) or MD scan (mds):
568 : //dps: all depth0 - all depth1 - all depth2 - all depth3.
569 : // within a depth: square blk0 in raster scan (followed by all its ns blcoks),
570 : // square blk1 in raster scan (followed by all its ns blcoks), etc
571 : //mds: top-down and Z scan.
572 : BlockGeom blk_geom_dps[MAX_NUM_BLOCKS_ALLOC]; //to access geom info of a particular block : use this table if you have the block index in depth scan
573 : BlockGeom blk_geom_mds[MAX_NUM_BLOCKS_ALLOC]; //to access geom info of a particular block : use this table if you have the block index in md scan
574 :
575 2202 : uint32_t search_matching_from_dps(
576 : uint32_t depth,
577 : uint32_t part,
578 : uint32_t x,
579 : uint32_t y)
580 : {
581 2202 : uint32_t found = 0;
582 : uint32_t it;
583 2202 : uint32_t matched = 0xFFFF;
584 2426600 : for (it = 0; it < max_num_active_blocks; it++)
585 : {
586 2424400 : if (blk_geom_dps[it].depth == depth && blk_geom_dps[it].shape == part && blk_geom_dps[it].origin_x == x && blk_geom_dps[it].origin_y == y)
587 : {
588 2202 : if (found == 0)
589 : {
590 2202 : matched = it;
591 2202 : found = 1;
592 : }
593 : else {
594 0 : matched = 0xFFFF;
595 0 : break;
596 : }
597 : }
598 : }
599 :
600 2202 : if (matched == 0xFFFF)
601 0 : printf(" \n\n PROBLEM\n\n ");
602 :
603 2202 : return matched;
604 : }
605 2202 : uint32_t search_matching_from_mds(
606 : uint32_t depth,
607 : uint32_t part,
608 : uint32_t x,
609 : uint32_t y)
610 : {
611 2202 : uint32_t found = 0;
612 : uint32_t it;
613 2202 : uint32_t matched = 0xFFFF;
614 2426600 : for (it = 0; it < max_num_active_blocks; it++)
615 : {
616 2424400 : if (blk_geom_mds[it].depth == depth && blk_geom_mds[it].shape == part && blk_geom_mds[it].origin_x == x && blk_geom_mds[it].origin_y == y)
617 : {
618 2202 : if (found == 0)
619 : {
620 2202 : matched = it;
621 2202 : found = 1;
622 : }
623 : else {
624 0 : matched = 0xFFFF;
625 0 : break;
626 : }
627 : }
628 : }
629 :
630 2202 : if (matched == 0xFFFF)
631 0 : printf(" \n\n PROBLEM\n\n ");
632 :
633 2202 : return matched;
634 : }
635 :
636 9468 : static INLINE TxSize av1_get_tx_size(
637 : BlockSize sb_type,
638 : int32_t plane/*, const MacroBlockD *xd*/) {
639 : //const MbModeInfo *mbmi = xd->mi[0];
640 : // if (xd->lossless[mbmi->segment_id]) return TX_4X4;
641 9468 : if (plane == 0) return blocksize_to_txsize[sb_type];
642 : // const MacroblockdPlane *pd = &xd->plane[plane];
643 :
644 2202 : uint32_t subsampling_x = plane > 0 ? 1 : 0;
645 2202 : uint32_t subsampling_y = plane > 0 ? 1 : 0;
646 2202 : return av1_get_max_uv_txsize(/*mbmi->*/sb_type, subsampling_x, subsampling_y);
647 : UNUSED(plane);
648 : }
649 :
650 682 : void md_scan_all_blks(uint32_t *idx_mds, uint32_t sq_size, uint32_t x, uint32_t y, int32_t is_last_quadrant, uint8_t quad_it)
651 : {
652 : //the input block is the parent square block of size sq_size located at pos (x,y)
653 :
654 : uint32_t part_it, nsq_it, d1_it, sqi_mds;
655 :
656 682 : uint32_t halfsize = sq_size / 2;
657 682 : uint32_t quartsize = sq_size / 4;
658 :
659 1364 : uint32_t max_part_updated = sq_size == 128 ? MIN(max_part, 7) :
660 1236 : sq_size == 8 ? MIN(max_part, 3) :
661 :
662 554 : sq_size == 4 ? 1 : max_part;
663 :
664 682 : d1_it = 0;
665 682 : sqi_mds = *idx_mds;
666 :
667 1956 : for (part_it = 0; part_it < max_part_updated; part_it++)
668 : {
669 1274 : uint32_t tot_num_ns_per_part =
670 1274 : part_it < 1 ? 1 :
671 : part_it < 3 ? 2 :
672 : part_it < 7 ? 3 : 4;
673 :
674 3476 : for (nsq_it = 0; nsq_it < tot_num_ns_per_part; nsq_it++)
675 : {
676 4354 : blk_geom_mds[*idx_mds].depth = sq_size == max_sb / 1 ? 0 :
677 2152 : sq_size == max_sb / 2 ? 1 :
678 1952 : sq_size == max_sb / 4 ? 2 :
679 1152 : sq_size == max_sb / 8 ? 3 :
680 512 : sq_size == max_sb / 16 ? 4 : 5;
681 :
682 2202 : blk_geom_mds[*idx_mds].sq_size = sq_size;
683 2202 : blk_geom_mds[*idx_mds].is_last_quadrant = is_last_quadrant;
684 2202 : blk_geom_mds[*idx_mds].quadi = quad_it;
685 :
686 2202 : blk_geom_mds[*idx_mds].shape = (PART)part_it;
687 2202 : blk_geom_mds[*idx_mds].origin_x = x + quartsize * ns_quarter_off_mult[part_it][0][nsq_it];
688 2202 : blk_geom_mds[*idx_mds].origin_y = y + quartsize * ns_quarter_off_mult[part_it][1][nsq_it];
689 :
690 2202 : blk_geom_mds[*idx_mds].d1i = d1_it++;
691 2202 : blk_geom_mds[*idx_mds].sqi_mds = sqi_mds;
692 2202 : blk_geom_mds[*idx_mds].totns = tot_num_ns_per_part;
693 2202 : blk_geom_mds[*idx_mds].nsi = nsq_it;
694 :
695 2202 : uint32_t matched = search_matching_from_dps(
696 2202 : blk_geom_mds[*idx_mds].depth,
697 2202 : blk_geom_mds[*idx_mds].shape,
698 2202 : blk_geom_mds[*idx_mds].origin_x,
699 2202 : blk_geom_mds[*idx_mds].origin_y);
700 :
701 2202 : blk_geom_mds[*idx_mds].blkidx_dps = blk_geom_dps[matched].blkidx_dps;
702 :
703 2202 : blk_geom_mds[*idx_mds].bwidth = quartsize * ns_quarter_size_mult[part_it][0][nsq_it];
704 2202 : blk_geom_mds[*idx_mds].bheight = quartsize * ns_quarter_size_mult[part_it][1][nsq_it];
705 2202 : blk_geom_mds[*idx_mds].bwidth_log2 = Log2f(blk_geom_mds[*idx_mds].bwidth);
706 2202 : blk_geom_mds[*idx_mds].bheight_log2 = Log2f(blk_geom_mds[*idx_mds].bheight);
707 2202 : blk_geom_mds[*idx_mds].bsize = hvsize_to_bsize[blk_geom_mds[*idx_mds].bwidth_log2 - 2][blk_geom_mds[*idx_mds].bheight_log2 - 2];
708 2202 : blk_geom_mds[*idx_mds].bwidth_uv = MAX(4, blk_geom_mds[*idx_mds].bwidth >> 1); // AMIR to clean to check for 4x4
709 2202 : blk_geom_mds[*idx_mds].bheight_uv = MAX(4, blk_geom_mds[*idx_mds].bheight >> 1);
710 2202 : blk_geom_mds[*idx_mds].has_uv = 1;
711 :
712 2202 : if (blk_geom_mds[*idx_mds].bwidth == 4 && blk_geom_mds[*idx_mds].bheight == 4)
713 512 : blk_geom_mds[*idx_mds].has_uv = is_last_quadrant ? 1 : 0;
714 :
715 : else
716 1690 : if ((blk_geom_mds[*idx_mds].bwidth >> 1) < blk_geom_mds[*idx_mds].bwidth_uv || (blk_geom_mds[*idx_mds].bheight >> 1) < blk_geom_mds[*idx_mds].bheight_uv) {
717 768 : int32_t num_blk_same_uv = 1;
718 768 : if (blk_geom_mds[*idx_mds].bwidth >> 1 < 4)
719 384 : num_blk_same_uv *= 2;
720 768 : if (blk_geom_mds[*idx_mds].bheight >> 1 < 4)
721 384 : num_blk_same_uv *= 2;
722 : //if (blk_geom_mds[*idx_mds].nsi % 2 == 0)
723 : //if (blk_geom_mds[*idx_mds].nsi != (blk_geom_mds[*idx_mds].totns-1) )
724 768 : if (blk_geom_mds[*idx_mds].nsi != (num_blk_same_uv - 1) && blk_geom_mds[*idx_mds].nsi != (2 * num_blk_same_uv - 1))
725 384 : blk_geom_mds[*idx_mds].has_uv = 0;
726 : }
727 :
728 2202 : blk_geom_mds[*idx_mds].bsize_uv = get_plane_block_size(blk_geom_mds[*idx_mds].bsize, 1, 1);
729 2202 : uint16_t txb_itr = 0;
730 : // tx_depth 1 geom settings
731 2202 : uint8_t tx_depth = 0;
732 4404 : blk_geom_mds[*idx_mds].txb_count[tx_depth] = blk_geom_mds[*idx_mds].bsize == BLOCK_128X128 ? 4 :
733 2202 : blk_geom_mds[*idx_mds].bsize == BLOCK_128X64 || blk_geom_mds[*idx_mds].bsize == BLOCK_64X128 ? 2 : 1;
734 4404 : for (txb_itr = 0; txb_itr < blk_geom_mds[*idx_mds].txb_count[tx_depth]; txb_itr++) {
735 2202 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(blk_geom_mds[*idx_mds].bsize, 0);
736 2202 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = av1_get_tx_size(blk_geom_mds[*idx_mds].bsize, 1);
737 2202 : if (blk_geom_mds[*idx_mds].bsize == BLOCK_128X128)
738 : {
739 0 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = (txb_itr == 0 || txb_itr == 2) ? blk_geom_mds[*idx_mds].origin_x : blk_geom_mds[*idx_mds].origin_x + 64;
740 0 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = (txb_itr == 0 || txb_itr == 1) ? blk_geom_mds[*idx_mds].origin_y : blk_geom_mds[*idx_mds].origin_y + 64;
741 : }
742 2202 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_128X64)
743 : {
744 0 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = (txb_itr == 0) ? blk_geom_mds[*idx_mds].origin_x : blk_geom_mds[*idx_mds].origin_x + 64;
745 0 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y;
746 : }
747 2202 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_64X128)
748 : {
749 0 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x;
750 0 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = (txb_itr == 0) ? blk_geom_mds[*idx_mds].origin_y : blk_geom_mds[*idx_mds].origin_y + 64;
751 : }
752 : else
753 : {
754 2202 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x;
755 2202 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y;
756 : }
757 : /*if (blk_geom_mds[*idx_mds].bsize == BLOCK_16X8)
758 : printf("");*/
759 2202 : blk_geom_mds[*idx_mds].tx_boff_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] - blk_geom_mds[*idx_mds].origin_x;
760 2202 : blk_geom_mds[*idx_mds].tx_boff_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] - blk_geom_mds[*idx_mds].origin_y;
761 2202 : blk_geom_mds[*idx_mds].tx_width[tx_depth][txb_itr] = tx_size_wide[blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr]];
762 2202 : blk_geom_mds[*idx_mds].tx_height[tx_depth][txb_itr] = tx_size_high[blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr]];
763 2202 : blk_geom_mds[*idx_mds].tx_width_uv[tx_depth][txb_itr] = tx_size_wide[blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr]];
764 2202 : blk_geom_mds[*idx_mds].tx_height_uv[tx_depth][txb_itr] = tx_size_high[blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr]];
765 : }
766 : // tx_depth 1 geom settings
767 2202 : tx_depth = 1;
768 4404 : blk_geom_mds[*idx_mds].txb_count[tx_depth] = blk_geom_mds[*idx_mds].bsize == BLOCK_128X128 ? 4 :
769 2202 : blk_geom_mds[*idx_mds].bsize == BLOCK_128X64 || blk_geom_mds[*idx_mds].bsize == BLOCK_64X128 ? 2 : 1;
770 :
771 2202 : if (blk_geom_mds[*idx_mds].bsize == BLOCK_64X64 ||
772 2200 : blk_geom_mds[*idx_mds].bsize == BLOCK_32X32 ||
773 2176 : blk_geom_mds[*idx_mds].bsize == BLOCK_16X16 ||
774 2080 : blk_geom_mds[*idx_mds].bsize == BLOCK_8X8)
775 : {
776 506 : blk_geom_mds[*idx_mds].txb_count[tx_depth] = 4;
777 : }
778 :
779 2202 : if (blk_geom_mds[*idx_mds].bsize == BLOCK_64X32 ||
780 2194 : blk_geom_mds[*idx_mds].bsize == BLOCK_32X64 ||
781 2186 : blk_geom_mds[*idx_mds].bsize == BLOCK_32X16 ||
782 2154 : blk_geom_mds[*idx_mds].bsize == BLOCK_16X32 ||
783 2122 : blk_geom_mds[*idx_mds].bsize == BLOCK_16X8 ||
784 1994 : blk_geom_mds[*idx_mds].bsize == BLOCK_8X16)
785 : {
786 336 : blk_geom_mds[*idx_mds].txb_count[tx_depth] = 2;
787 : }
788 2202 : if (blk_geom_mds[*idx_mds].bsize == BLOCK_64X16 ||
789 2194 : blk_geom_mds[*idx_mds].bsize == BLOCK_16X64 ||
790 2186 : blk_geom_mds[*idx_mds].bsize == BLOCK_32X8 ||
791 2154 : blk_geom_mds[*idx_mds].bsize == BLOCK_8X32 ||
792 2122 : blk_geom_mds[*idx_mds].bsize == BLOCK_16X4 ||
793 1994 : blk_geom_mds[*idx_mds].bsize == BLOCK_4X16)
794 : {
795 336 : blk_geom_mds[*idx_mds].txb_count[tx_depth] = 4;
796 : }
797 7266 : for (txb_itr = 0; txb_itr < blk_geom_mds[*idx_mds].txb_count[tx_depth]; txb_itr++) {
798 5064 : if (blk_geom_mds[*idx_mds].bsize == BLOCK_64X64)
799 : {
800 8 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_32X32, 0);
801 8 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
802 8 : uint8_t offsetx[4] = { 0,32,0,32 };
803 8 : uint8_t offsety[4] = { 0,0,32,32 };
804 : // 0 1
805 : // 2 3
806 8 : uint8_t tbx = offsetx[txb_itr];
807 8 : uint8_t tby = offsety[txb_itr];
808 :
809 8 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
810 8 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
811 : }
812 5056 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_64X32)
813 : {
814 16 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_32X32, 0);
815 16 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
816 16 : uint8_t offsetx[2] = { 0,32 };
817 16 : uint8_t offsety[2] = { 0,0 };
818 : // 0 1
819 16 : uint8_t tbx = offsetx[txb_itr];
820 16 : uint8_t tby = offsety[txb_itr];
821 :
822 16 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
823 16 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
824 : }
825 5040 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_32X64)
826 : {
827 16 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_32X32, 0);
828 16 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
829 16 : uint8_t offsetx[2] = { 0,0 };
830 16 : uint8_t offsety[2] = { 0,32 };
831 : // 0 1
832 16 : uint8_t tbx = offsetx[txb_itr];
833 16 : uint8_t tby = offsety[txb_itr];
834 :
835 16 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
836 16 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
837 : }
838 5024 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_32X32)
839 : {
840 96 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_16X16, 0);
841 96 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
842 96 : uint8_t offsetx[4] = { 0,16,0,16 };
843 96 : uint8_t offsety[4] = { 0,0,16,16 };
844 : // 0 1
845 : // 2 3
846 96 : uint8_t tbx = offsetx[txb_itr];
847 96 : uint8_t tby = offsety[txb_itr];
848 :
849 96 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
850 96 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
851 : }
852 4928 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_32X16)
853 : {
854 64 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_16X16, 0);
855 64 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
856 64 : uint8_t offsetx[2] = { 0,16 };
857 64 : uint8_t offsety[2] = { 0,0 };
858 : // 0 1
859 64 : uint8_t tbx = offsetx[txb_itr];
860 64 : uint8_t tby = offsety[txb_itr];
861 :
862 64 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
863 64 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
864 : }
865 4864 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_16X32)
866 : {
867 64 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_16X16, 0);
868 64 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
869 64 : uint8_t offsetx[2] = { 0,0 };
870 64 : uint8_t offsety[2] = { 0,16 };
871 : // 0 1
872 64 : uint8_t tbx = offsetx[txb_itr];
873 64 : uint8_t tby = offsety[txb_itr];
874 :
875 64 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
876 64 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
877 : }
878 4800 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_16X16)
879 : {
880 384 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_8X8, 0);
881 384 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
882 384 : uint8_t offsetx[4] = { 0,8,0,8 };
883 384 : uint8_t offsety[4] = { 0,0,8,8 };
884 : // 0 1
885 : // 2 3
886 384 : uint8_t tbx = offsetx[txb_itr];
887 384 : uint8_t tby = offsety[txb_itr];
888 :
889 384 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
890 384 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
891 : }
892 4416 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_16X8)
893 : {
894 256 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_8X8, 0);
895 256 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
896 256 : uint8_t offsetx[2] = { 0,8 };
897 256 : uint8_t offsety[2] = { 0,0 };
898 : // 0 1
899 256 : uint8_t tbx = offsetx[txb_itr];
900 256 : uint8_t tby = offsety[txb_itr];
901 :
902 256 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
903 256 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
904 : //printf("");
905 : }
906 4160 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_8X16)
907 : {
908 256 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_8X8, 0);
909 256 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
910 256 : uint8_t offsetx[2] = { 0,0 };
911 256 : uint8_t offsety[2] = { 0,8 };
912 : // 0 1
913 256 : uint8_t tbx = offsetx[txb_itr];
914 256 : uint8_t tby = offsety[txb_itr];
915 :
916 256 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
917 256 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
918 : }
919 3904 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_8X8)
920 : {
921 1536 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_4X4, 0);
922 1536 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
923 1536 : uint8_t offsetx[4] = { 0,4,0,4 };
924 1536 : uint8_t offsety[4] = { 0,0,4,4 };
925 : // 0 1
926 : // 2 3
927 1536 : uint8_t tbx = offsetx[txb_itr];
928 1536 : uint8_t tby = offsety[txb_itr];
929 :
930 1536 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
931 1536 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
932 : }
933 2368 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_64X16)
934 : {
935 32 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_16X16, 0);
936 32 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
937 : // 0 1 2 3
938 32 : uint8_t offsetx[4] = { 0,16, 32, 48 };
939 32 : uint8_t offsety[4] = { 0,0, 0, 0 };
940 32 : uint8_t tbx = offsetx[txb_itr];
941 32 : uint8_t tby = offsety[txb_itr];
942 :
943 32 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
944 32 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
945 : }
946 2336 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_16X64)
947 : {
948 32 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_16X16, 0);
949 32 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
950 : // 0 1 2 3
951 32 : uint8_t offsetx[4] = { 0,0, 0, 0 };
952 32 : uint8_t offsety[4] = { 0,16,32, 48 };
953 32 : uint8_t tbx = offsetx[txb_itr];
954 32 : uint8_t tby = offsety[txb_itr];
955 :
956 32 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
957 32 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
958 : }
959 2304 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_32X8)
960 : {
961 128 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_8X8, 0);
962 128 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
963 : // 0 1 2 3
964 128 : uint8_t offsetx[4] = { 0,8, 16, 24 };
965 128 : uint8_t offsety[4] = { 0,0, 0, 0 };
966 128 : uint8_t tbx = offsetx[txb_itr];
967 128 : uint8_t tby = offsety[txb_itr];
968 :
969 128 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
970 128 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
971 : }
972 2176 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_8X32)
973 : {
974 128 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_8X8, 0);
975 128 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
976 : // 0 1 2 3
977 128 : uint8_t offsetx[4] = { 0,0, 0, 0 };
978 128 : uint8_t offsety[4] = { 0,8,16, 24 };
979 128 : uint8_t tbx = offsetx[txb_itr];
980 128 : uint8_t tby = offsety[txb_itr];
981 :
982 128 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
983 128 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
984 : }
985 2048 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_16X4)
986 : {
987 512 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_4X4, 0);
988 512 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
989 : // 0 1 2 3
990 512 : uint8_t offsetx[4] = { 0,4, 8, 12 };
991 512 : uint8_t offsety[4] = { 0,0, 0, 0 };
992 512 : uint8_t tbx = offsetx[txb_itr];
993 512 : uint8_t tby = offsety[txb_itr];
994 :
995 512 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
996 512 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
997 : }
998 1536 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_4X16)
999 : {
1000 512 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_4X4, 0);
1001 512 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
1002 : // 0 1 2 3
1003 512 : uint8_t offsetx[4] = { 0,0, 0, 0 };
1004 512 : uint8_t offsety[4] = { 0,4,8, 12 };
1005 512 : uint8_t tbx = offsetx[txb_itr];
1006 512 : uint8_t tby = offsety[txb_itr];
1007 :
1008 512 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
1009 512 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
1010 : }
1011 : else
1012 : {
1013 1024 : if (blk_geom_mds[*idx_mds].bsize == BLOCK_128X128)
1014 : {
1015 0 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(blk_geom_mds[*idx_mds].bsize, 0);
1016 0 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
1017 :
1018 0 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = (txb_itr == 0 || txb_itr == 2) ? blk_geom_mds[*idx_mds].origin_x : blk_geom_mds[*idx_mds].origin_x + 64;
1019 0 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = (txb_itr == 0 || txb_itr == 1) ? blk_geom_mds[*idx_mds].origin_y : blk_geom_mds[*idx_mds].origin_y + 64;
1020 : }
1021 1024 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_128X64)
1022 : {
1023 0 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(blk_geom_mds[*idx_mds].bsize, 0);
1024 0 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
1025 :
1026 0 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = (txb_itr == 0) ? blk_geom_mds[*idx_mds].origin_x : blk_geom_mds[*idx_mds].origin_x + 64;
1027 0 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y;
1028 : }
1029 1024 : else if (blk_geom_mds[*idx_mds].bsize == BLOCK_64X128)
1030 : {
1031 0 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(blk_geom_mds[*idx_mds].bsize, 0);
1032 0 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
1033 0 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x;
1034 0 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = (txb_itr == 0) ? blk_geom_mds[*idx_mds].origin_y : blk_geom_mds[*idx_mds].origin_y + 64;
1035 : }
1036 : else
1037 : {
1038 1024 : blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(blk_geom_mds[*idx_mds].bsize, 0);
1039 1024 : blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
1040 1024 : blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x;
1041 1024 : blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y;
1042 : }
1043 : }
1044 5064 : blk_geom_mds[*idx_mds].tx_boff_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] - blk_geom_mds[*idx_mds].origin_x;
1045 5064 : blk_geom_mds[*idx_mds].tx_boff_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] - blk_geom_mds[*idx_mds].origin_y;
1046 5064 : blk_geom_mds[*idx_mds].tx_width[tx_depth][txb_itr] = tx_size_wide[blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr]];
1047 5064 : blk_geom_mds[*idx_mds].tx_height[tx_depth][txb_itr] = tx_size_high[blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr]];
1048 5064 : blk_geom_mds[*idx_mds].tx_width_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].tx_width_uv[0][0];
1049 5064 : blk_geom_mds[*idx_mds].tx_height_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].tx_height_uv[0][0];
1050 : }
1051 2202 : blk_geom_mds[*idx_mds].blkidx_mds = (*idx_mds);
1052 2202 : (*idx_mds) = (*idx_mds) + 1;
1053 : }
1054 : }
1055 :
1056 682 : uint32_t min_size = max_sb >> (max_depth - 1);
1057 682 : if (halfsize >= min_size)
1058 : {
1059 170 : md_scan_all_blks(idx_mds, halfsize, x, y, 0,0);
1060 170 : md_scan_all_blks(idx_mds, halfsize, x + halfsize, y, 0,1);
1061 170 : md_scan_all_blks(idx_mds, halfsize, x, y + halfsize, 0,2);
1062 170 : md_scan_all_blks(idx_mds, halfsize, x + halfsize, y + halfsize, 1,3);
1063 : }
1064 682 : }
1065 :
1066 2 : void depth_scan_all_blks()
1067 : {
1068 : uint32_t depth_it, sq_it_y, sq_it_x, part_it, nsq_it;
1069 : uint32_t sq_orgx, sq_orgy;
1070 2 : uint32_t depth_scan_idx = 0;
1071 :
1072 12 : for (depth_it = 0; depth_it < max_depth; depth_it++)
1073 : {
1074 10 : uint32_t tot_num_sq = 1 << depth_it;
1075 18 : uint32_t sq_size = depth_it == 0 ? max_sb :
1076 14 : depth_it == 1 ? max_sb / 2 :
1077 10 : depth_it == 2 ? max_sb / 4 :
1078 6 : depth_it == 3 ? max_sb / 8 :
1079 2 : depth_it == 4 ? max_sb / 16 : max_sb / 32;
1080 :
1081 20 : uint32_t max_part_updated = sq_size == 128 ? MIN(max_part, 7) :
1082 18 : sq_size == 8 ? MIN(max_part, 3) :
1083 8 : sq_size == 4 ? 1 : max_part;
1084 :
1085 72 : for (sq_it_y = 0; sq_it_y < tot_num_sq; sq_it_y++)
1086 : {
1087 62 : sq_orgy = sq_it_y * sq_size;
1088 :
1089 744 : for (sq_it_x = 0; sq_it_x < tot_num_sq; sq_it_x++)
1090 : {
1091 682 : sq_orgx = sq_it_x * sq_size;
1092 :
1093 1956 : for (part_it = 0; part_it < max_part_updated; part_it++)
1094 : {
1095 1274 : uint32_t tot_num_ns_per_part = part_it < 1 ? 1 :
1096 : part_it < 3 ? 2 :
1097 : part_it < 7 ? 3 : 4;
1098 :
1099 3476 : for (nsq_it = 0; nsq_it < tot_num_ns_per_part; nsq_it++)
1100 : {
1101 2202 : blk_geom_dps[depth_scan_idx].blkidx_dps = depth_scan_idx;
1102 2202 : blk_geom_dps[depth_scan_idx].depth = depth_it;
1103 2202 : blk_geom_dps[depth_scan_idx].shape = (PART)part_it;
1104 2202 : blk_geom_dps[depth_scan_idx].origin_x = sq_orgx + (sq_size / 4) *ns_quarter_off_mult[part_it][0][nsq_it];
1105 2202 : blk_geom_dps[depth_scan_idx].origin_y = sq_orgy + (sq_size / 4) *ns_quarter_off_mult[part_it][1][nsq_it];
1106 :
1107 2202 : depth_scan_idx++;
1108 : }
1109 : }
1110 : }
1111 : }
1112 : }
1113 2 : }
1114 :
1115 2 : void finish_depth_scan_all_blks()
1116 : {
1117 2 : uint32_t do_print = 0;
1118 2 : uint32_t min_size = max_sb >> (max_depth - 1);
1119 2 : FILE * fp = NULL;
1120 2 : if (do_print)
1121 0 : FOPEN(fp, "e:\\test\\data.csv", "w");
1122 :
1123 : uint32_t depth_it, sq_it_y, sq_it_x, part_it, nsq_it;
1124 :
1125 2 : uint32_t depth_scan_idx = 0;
1126 :
1127 12 : for (depth_it = 0; depth_it < max_depth; depth_it++)
1128 : {
1129 10 : uint32_t tot_num_sq = 1 << depth_it;
1130 18 : uint32_t sq_size = depth_it == 0 ? max_sb :
1131 14 : depth_it == 1 ? max_sb / 2 :
1132 10 : depth_it == 2 ? max_sb / 4 :
1133 6 : depth_it == 3 ? max_sb / 8 :
1134 2 : depth_it == 4 ? max_sb / 16 : max_sb / 32;
1135 :
1136 20 : uint32_t max_part_updated = sq_size == 128 ? MIN(max_part, 7) :
1137 18 : sq_size == 8 ? MIN(max_part, 3) :
1138 8 : sq_size == 4 ? 1 : max_part;
1139 :
1140 10 : if (do_print)
1141 : {
1142 0 : fprintf(fp, "\n\n\n");
1143 0 : printf("\n\n\n");
1144 : }
1145 :
1146 72 : for (sq_it_y = 0; sq_it_y < tot_num_sq; sq_it_y++)
1147 : {
1148 62 : if (do_print)
1149 : {
1150 0 : for (uint32_t i = 0; i < sq_size / min_size; i++)
1151 : {
1152 0 : fprintf(fp, "\n ");
1153 0 : printf("\n ");
1154 : }
1155 : }
1156 :
1157 744 : for (sq_it_x = 0; sq_it_x < tot_num_sq; sq_it_x++)
1158 : {
1159 1956 : for (part_it = 0; part_it < max_part_updated; part_it++)
1160 : {
1161 1274 : uint32_t tot_num_ns_per_part = part_it < 1 ? 1 :
1162 : part_it < 3 ? 2 :
1163 : part_it < 7 ? 3 : 4;
1164 :
1165 3476 : for (nsq_it = 0; nsq_it < tot_num_ns_per_part; nsq_it++)
1166 : {
1167 2202 : uint32_t matched = search_matching_from_mds(
1168 2202 : blk_geom_dps[depth_scan_idx].depth,
1169 2202 : blk_geom_dps[depth_scan_idx].shape,
1170 2202 : blk_geom_dps[depth_scan_idx].origin_x,
1171 2202 : blk_geom_dps[depth_scan_idx].origin_y);
1172 :
1173 2202 : blk_geom_dps[depth_scan_idx].blkidx_mds = blk_geom_mds[matched].blkidx_mds;
1174 :
1175 2202 : if (do_print && part_it == 0)
1176 : {
1177 0 : fprintf(fp, "%i", blk_geom_dps[depth_scan_idx].blkidx_mds);
1178 0 : printf("%i", blk_geom_dps[depth_scan_idx].blkidx_mds);
1179 :
1180 0 : for (uint32_t i = 0; i < sq_size / min_size; i++)
1181 : {
1182 0 : fprintf(fp, ",");
1183 0 : printf(",");
1184 : }
1185 : }
1186 2202 : depth_scan_idx++;
1187 : }
1188 : }
1189 : }
1190 : }
1191 : }
1192 :
1193 2 : if (do_print)
1194 0 : fclose(fp);
1195 2 : }
1196 :
1197 2 : uint32_t count_total_num_of_active_blks()
1198 : {
1199 : uint32_t depth_it, sq_it_y, sq_it_x, part_it, nsq_it;
1200 :
1201 2 : uint32_t depth_scan_idx = 0;
1202 :
1203 12 : for (depth_it = 0; depth_it < max_depth; depth_it++)
1204 : {
1205 10 : uint32_t tot_num_sq = 1 << depth_it;
1206 18 : uint32_t sq_size = depth_it == 0 ? max_sb :
1207 14 : depth_it == 1 ? max_sb / 2 :
1208 10 : depth_it == 2 ? max_sb / 4 :
1209 6 : depth_it == 3 ? max_sb / 8 :
1210 2 : depth_it == 4 ? max_sb / 16 : max_sb / 32;
1211 :
1212 20 : uint32_t max_part_updated = sq_size == 128 ? MIN(max_part, 7) :
1213 18 : sq_size == 8 ? MIN(max_part, 3) :
1214 8 : sq_size == 4 ? 1 : max_part;
1215 :
1216 72 : for (sq_it_y = 0; sq_it_y < tot_num_sq; sq_it_y++)
1217 : {
1218 744 : for (sq_it_x = 0; sq_it_x < tot_num_sq; sq_it_x++)
1219 : {
1220 1956 : for (part_it = 0; part_it < max_part_updated; part_it++)
1221 : {
1222 1274 : uint32_t tot_num_ns_per_part = part_it < 1 ? 1 :
1223 : part_it < 3 ? 2 :
1224 : part_it < 7 ? 3 : 4;
1225 :
1226 3476 : for (nsq_it = 0; nsq_it < tot_num_ns_per_part; nsq_it++)
1227 2202 : depth_scan_idx++;
1228 : }
1229 : }
1230 : }
1231 : }
1232 :
1233 2 : return depth_scan_idx;
1234 : }
1235 2 : void log_redundancy_similarity(uint32_t max_block_count)
1236 : {
1237 : uint32_t blk_it, s_it;
1238 :
1239 2204 : for (blk_it = 0; blk_it < max_block_count; blk_it++)
1240 : {
1241 2202 : BlockGeom * cur_geom = &blk_geom_mds[blk_it];
1242 2202 : cur_geom->similar = 0;
1243 2202 : cur_geom->redund = 0;
1244 2202 : cur_geom->redund_list.list_size = 0;
1245 2202 : cur_geom->similar_list.list_size = 0;
1246 :
1247 2426600 : for (s_it = 0; s_it < max_block_count; s_it++)
1248 : {
1249 2424400 : BlockGeom * search_geom = &blk_geom_mds[s_it];
1250 :
1251 2424400 : if (cur_geom->bsize == search_geom->bsize && cur_geom->origin_x == search_geom->origin_x &&
1252 31274 : cur_geom->origin_y == search_geom->origin_y && s_it != blk_it)
1253 : {
1254 : //one block could have similar and redundant blocks
1255 1344 : cur_geom->similar = 1;
1256 1344 : cur_geom->similar_list.blk_mds_table[cur_geom->similar_list.list_size] = search_geom->blkidx_mds;
1257 1344 : cur_geom->similar_list.list_size++;
1258 1344 : if (cur_geom->nsi == 0 && search_geom->nsi==0)
1259 : {
1260 420 : cur_geom->redund = 1;
1261 420 : cur_geom->redund_list.blk_mds_table[cur_geom->redund_list.list_size] = search_geom->blkidx_mds;
1262 420 : cur_geom->redund_list.list_size++;
1263 : }
1264 : }
1265 : }
1266 : }
1267 2 : }
1268 2 : void build_blk_geom(int32_t use_128x128)
1269 : {
1270 2 : max_sb = use_128x128 ? 128 : 64;
1271 2 : max_depth = use_128x128 ? 6 : 5;
1272 2 : uint32_t max_block_count = use_128x128 ? BLOCK_MAX_COUNT_SB_128 : BLOCK_MAX_COUNT_SB_64;
1273 :
1274 : //(0)compute total number of blocks using the information provided
1275 2 : max_num_active_blocks = count_total_num_of_active_blks();
1276 2 : if (max_num_active_blocks != max_block_count)
1277 0 : printf(" \n\n Error %i blocks\n\n ", max_num_active_blocks);
1278 :
1279 : //(1) Construct depth scan blk_geom_dps
1280 2 : depth_scan_all_blks();
1281 :
1282 : //(2) Construct md scan blk_geom_mds: use info from dps
1283 2 : uint32_t idx_mds = 0;
1284 2 : md_scan_all_blks(&idx_mds, max_sb, 0, 0, 0,0);
1285 :
1286 : //(3) Fill more info from mds to dps - print using dps
1287 2 : finish_depth_scan_all_blks();
1288 :
1289 2 : log_redundancy_similarity(max_block_count);
1290 2 : }
1291 :
1292 : //need to finish filling dps by inherting data from mds
1293 0 : const BlockGeom * Get_blk_geom_dps(uint32_t bidx_dps)
1294 : {
1295 0 : return &blk_geom_dps[bidx_dps];
1296 : }
1297 351924000 : const BlockGeom * get_blk_geom_mds(uint32_t bidx_mds)
1298 : {
1299 351924000 : return &blk_geom_mds[bidx_mds];
1300 : }
1301 :
1302 46068 : uint32_t get_mds_idx(uint32_t orgx, uint32_t orgy, uint32_t size, uint32_t use_128x128)
1303 : {
1304 46068 : uint32_t max_block_count = use_128x128 ? BLOCK_MAX_COUNT_SB_128 : BLOCK_MAX_COUNT_SB_64;
1305 46068 : uint32_t mds = 0;
1306 :
1307 24972700 : for (uint32_t blk_it = 0; blk_it < max_block_count; blk_it++){
1308 24972700 : BlockGeom * cur_geom = &blk_geom_mds[blk_it];
1309 :
1310 24972700 : if ((uint32_t)cur_geom->sq_size == size && cur_geom->origin_x == orgx &&
1311 1150580 : cur_geom->origin_y == orgy && cur_geom->shape == PART_N) {
1312 46066 : mds = cur_geom->blkidx_mds;
1313 46066 : break;
1314 : }
1315 : }
1316 46068 : return mds;
1317 : }
|