LCOV - code coverage report
Current view: top level - Codec - EbUtility.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 460 555 82.9 %
Date: 2019-11-25 17:38:06 Functions: 18 25 72.0 %

          Line data    Source code
       1             : /*
       2             : * Copyright(c) 2019 Intel Corporation
       3             : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
       4             : */
       5             : 
       6             : #include <time.h>
       7             : #include <stdio.h>
       8             : 
       9             : #ifdef _WIN32
      10             : #include <windows.h>
      11             : #else
      12             : #include <stdlib.h>
      13             : #include <sys/time.h>
      14             : #endif
      15             : 
      16             : #include "EbDefinitions.h"
      17             : #include "EbUtility.h"
      18             : #include "EbTime.h"
      19             : /********************************************************************************************
      20             : * faster memcopy for <= 64B blocks, great w/ inlining and size known at compile time (or w/ PGO)
      21             : * THIS NEEDS TO STAY IN A HEADER FOR BEST PERFORMANCE
      22             : ********************************************************************************************/
      23             : 
      24             : #include <immintrin.h>
      25             : 
      26             : #if defined(__GNUC__) && !defined(__clang__) && !defined(__ICC__)
      27             : __attribute__((optimize("unroll-loops")))
      28             : #endif
      29    41219600 : static void eb_memcpy_small(void* dst_ptr, void const* src_ptr, size_t size) {
      30    41219600 :     const char* src = (const char*)src_ptr;
      31    41219600 :     char*       dst = (char*)dst_ptr;
      32    41219600 :     size_t      i = 0;
      33             : 
      34             : #ifdef _INTEL_COMPILER
      35             : #pragma unroll
      36             : #endif
      37    58065300 :     while ((i + 16) <= size)
      38             :     {
      39    33691300 :         _mm_storeu_ps((float*)(dst + i), _mm_loadu_ps((const float*)(src + i)));
      40    16845700 :         i += 16;
      41             :     }
      42             : 
      43    41219600 :     if ((i + 8) <= size)
      44             :     {
      45    22896700 :         _mm_store_sd((double*)(dst + i), _mm_load_sd((const double*)(src + i)));
      46    11448400 :         i += 8;
      47             :     }
      48             : 
      49   125654000 :     for (; i < size; ++i)
      50    84433900 :         dst[i] = src[i];
      51    41219600 : }
      52             : #define EB_MIN(a,b)             (((a) < (b)) ? (a) : (b))
      53      599669 : static void eb_memcpy_sse(void* dst_ptr, void const* src_ptr, size_t size) {
      54      599669 :     const char* src = (const char*)src_ptr;
      55      599669 :     char*       dst = (char*)dst_ptr;
      56      599669 :     size_t      i = 0;
      57      599669 :     size_t align_cnt = EB_MIN((64 - ((size_t)dst & 63)), size);
      58             : 
      59             :     // align dest to a $line
      60      599669 :     if (align_cnt != 64) {
      61      488674 :         eb_memcpy_small(dst, src, align_cnt);
      62      488705 :         dst += align_cnt;
      63      488705 :         src += align_cnt;
      64      488705 :         size -= align_cnt;
      65             :     }
      66             : 
      67             :     // copy a $line at a time
      68             :     // dst aligned to a $line
      69      599700 :     size_t cline_cnt = (size & ~(size_t)63);
      70     4933790 :     for (i = 0; i < cline_cnt; i += 64) {
      71     4334090 :         __m128 c0 = _mm_loadu_ps((const float*)(src + i));
      72     4334090 :         __m128 c1 = _mm_loadu_ps((const float*)(src + i + sizeof(c0)));
      73     4334090 :         __m128 c2 = _mm_loadu_ps((const float*)(src + i + sizeof(c0) * 2));
      74     4334090 :         __m128 c3 = _mm_loadu_ps((const float*)(src + i + sizeof(c0) * 3));
      75             : 
      76     4334090 :         _mm_storeu_ps((float*)(dst + i), c0);
      77     4334090 :         _mm_storeu_ps((float*)(dst + i + sizeof(c0)), c1);
      78     4334090 :         _mm_storeu_ps((float*)(dst + i + sizeof(c0) * 2), c2);
      79     4334090 :         _mm_storeu_ps((float*)(dst + i + sizeof(c0) * 3), c3);
      80             :     }
      81             : 
      82             :     // copy the remainder
      83      599700 :     if (i < size)
      84      486248 :         eb_memcpy_small(dst + i, src + i, size - i);
      85      599640 : }
      86    40845900 : void eb_memcpy(void  *dst_ptr, void  *src_ptr, size_t size) {
      87    40845900 :     if (size > 64)
      88      599702 :         eb_memcpy_sse(dst_ptr, src_ptr, size);
      89             :     else
      90    40246200 :         eb_memcpy_small(dst_ptr, src_ptr, size);
      91    40846900 : }
      92             : /*****************************************
      93             :  * Z-Order
      94             :  *****************************************/
      95             : static TxSize blocksize_to_txsize[BlockSizeS_ALL] = {
      96             :       TX_4X4    ,      // BLOCK_4X4
      97             :       TX_4X8    ,      // BLOCK_4X8
      98             :       TX_8X4    ,      // BLOCK_8X4
      99             :       TX_8X8    ,      // BLOCK_8X8
     100             :       TX_8X16   ,      // BLOCK_8X16
     101             :       TX_16X8   ,      // BLOCK_16X8
     102             :       TX_16X16  ,      // BLOCK_16X16
     103             :       TX_16X32  ,      // BLOCK_16X32
     104             :       TX_32X16  ,      // BLOCK_32X16
     105             :       TX_32X32  ,      // BLOCK_32X32
     106             :       TX_32X64  ,      // BLOCK_32X64
     107             :       TX_64X32  ,      // BLOCK_64X32
     108             :       TX_64X64  ,      // BLOCK_64X64
     109             :       TX_64X64  ,      // BLOCK_64X128
     110             :       TX_64X64  ,      // BLOCK_128X64
     111             :       TX_64X64  ,      // BLOCK_128X128
     112             :       TX_4X16   ,      // BLOCK_4X16
     113             :       TX_16X4   ,      // BLOCK_16X4
     114             :       TX_8X32   ,      // BLOCK_8X32
     115             :       TX_32X8   ,      // BLOCK_32X8
     116             :       TX_16X64  ,      // BLOCK_16X64
     117             :       TX_64X16         // BLOCK_64X16
     118             : };
     119           0 : EbErrorType z_order_increment(
     120             :     uint32_t *x_loc,   // x location, level agnostic
     121             :     uint32_t *y_loc)   // y location, level agnostic
     122             : {
     123           0 :     EbErrorType return_error = EB_ErrorNone;
     124             :     uint32_t mask;
     125             : 
     126             :     // The basic idea of this function is to increment an x,y coordinate
     127             :     // that has had its size removed to the next z-coding order location.
     128             :     //
     129             :     // In a four quadrant partition, the z coding order is [0,0], [1,0], [0,1], [1,1]
     130             :     // Some observations (only looking at one bit position or the LSB) are:
     131             :     //  1. X is always toggled (achieved with X ^= 0x1)
     132             :     //  2. Y can be toggled with (Y = Y ^ X)
     133             :     //  3. Recall that a value XOR'ed with 1 toggles, and XOR'ed with 0 stays the same
     134             :     //
     135             :     //  Extending this logic is somewhat trickier. The two main observations to make are
     136             :     //  4. The LSB of X and Y are always progressed.
     137             :     //  5. Every other bit-position, N, other than the LSB are progressed in their state
     138             :     //     when the N-1 bit position resets back to [0,0].
     139             :     //
     140             :     //  From 5, we can infer the need of a "progression mask" of the form 0x1, 0x3, 0x7, 0xF, etc.
     141             :     //  The first step of contructing the mask is to find which bit positions are ready to
     142             :     //  reset (found by X & Y) and setting the LSB of the mask to 1 (the LSB always progresses).
     143             :     //  The second step is to eliminate all ones from the mask above the lowest-ordered zero bit.
     144             :     //  Note we can achieve more precision in the second mask step by more masking-out operations,
     145             :     //  but for a 64 -> 4 (5 steps), the precision below is sufficient.
     146             :     //
     147             :     //  Finally, X and Y are progressed only at the bit-positions in the mask.
     148             : 
     149           0 :     mask = ((*x_loc & *y_loc) << 1) | 0x1;
     150           0 :     mask &= (mask << 1) | 0x01;
     151           0 :     mask &= (mask << 2) | 0x03;
     152           0 :     mask &= (mask << 4) | 0x0F;
     153           0 :     mask &= (mask << 8) | 0xFF;
     154             : 
     155           0 :     *y_loc ^= *x_loc & mask;
     156           0 :     *x_loc ^= mask;
     157             : 
     158           0 :     return return_error;
     159             : }
     160             : 
     161             : /*****************************************
     162             :  * Z-Order Increment with Level
     163             :  *   This is the main function for progressing
     164             :  *   through a treeblock's coding units. To get
     165             :  *   the true CU size, multiple the x_loc, y_loc
     166             :  *   by the smallest CU size.
     167             :  *****************************************/
     168           0 : void ZOrderIncrementWithLevel(
     169             :     uint32_t *x_loc,   // x location, units of smallest block size
     170             :     uint32_t *y_loc,   // y location, units of smallest block size
     171             :     uint32_t *level,  // level, number of block size-steps from the smallest block size
     172             :     //   (e.g. if 8x8 = level 0, 16x16 = level 1, 32x32 == level 2, 64x64 == level 3)
     173             :     uint32_t *index)  // The CU index, can be used to index a lookup table (see get_coded_unit_stats)
     174             : {
     175             :     uint32_t mask;
     176             : 
     177             :     // The basic idea of this function is to increment an x,y coordinate
     178             :     // that has had its size removed to the next z-coding order location.
     179             :     //
     180             :     // In a four quadrant partition, the z coding order is [0,0], [1,0], [0,1], [1,1]
     181             :     // Some observations (only looking at one bit position or the LSB) are:
     182             :     //  1. X is always toggled (achieved with X ^= 0x1)
     183             :     //  2. Y can be toggled with (Y = Y ^ X)
     184             :     //  3. Recall that a value XOR'ed with 1 toggles, and XOR'ed with 0 stays the same
     185             :     //
     186             :     //  Extending this logic is somewhat trickier. The two main observations to make are
     187             :     //  4. The LSB of X and Y are always progressed.
     188             :     //  5. Every other bit-position, N, other than the LSB are progressed in their state
     189             :     //     when the N-1 bit position resets back to [0,0].
     190             :     //
     191             :     //  From 5, we can infer the need of a "progression mask" of the form 0x1, 0x3, 0x7, 0xF, etc.
     192             :     //  The first step of contructing the mask is to find which bit positions are ready to
     193             :     //  reset (found by X & Y) and setting the LSB of the mask to 1 (the LSB always progresses).
     194             :     //  The second step is to eliminate all ones from the mask above the lowest-ordered zero bit.
     195             :     //  Note we can achieve more precision in the second mask step by more masking-out operations,
     196             :     //  but for a 64 -> 4 (5 steps), the precision below is sufficient.
     197             :     //
     198             :     //  Finally, X and Y are progressed only at the bit-positions in the mask.
     199             : 
     200             :     // Seed the mask
     201           0 :     mask = ((*x_loc & *y_loc) << 1) | 0x1;
     202             : 
     203             :     // This step zero-outs the mask if level is not zero.
     204             :     //   The purpose of this is step further down the tree
     205             :     //   if not already at the bottom of the tree
     206             :     //   Equivalent to: mask = (level > 0) ? mask : 0;
     207           0 :     mask &= (uint32_t)(-(*level == 0));
     208             : 
     209             :     // Construct the mask
     210           0 :     mask &= (mask << 1) | 0x01;
     211           0 :     mask &= (mask << 2) | 0x03;
     212           0 :     mask &= (mask << 4) | 0x0F;
     213           0 :     mask &= (mask << 8) | 0xFF;
     214             : 
     215             :     // Decrement the level if not already at the bottom of the tree
     216             :     //  Equivalent to level = (level > 0) ? level - 1 : 0;
     217           0 :     *level = (*level - 1) & -(*level > 0);
     218             : 
     219             :     // If at one of the "corner" positions where the mask > 1, we
     220             :     //   need to increase the level since larger blocks are processed
     221             :     //   before smaller blocks.  Note that by using mask, we are protected
     222             :     //   against inadvertently incrementing the level if not already at
     223             :     //   the bottom of the tree.  The level increment should really be
     224             :     //   Log2f(mask >> 1), but since there are only 3 valid positions,
     225             :     //   we are using a cheesy Log2f approximation
     226             :     //   Equivalent to: level += (mask > 3) ? 2 : mask >> 1;
     227             : 
     228           0 :     *level += ((2 ^ (mask >> 1)) & -(mask > 3)) ^ (mask >> 1);
     229             : 
     230             :     // Increment the x_loc, y_loc.  Note that this only occurs when
     231             :     //   we are at the bottom of the tree.
     232           0 :     *y_loc ^= *x_loc & mask;
     233           0 :     *x_loc ^= mask;
     234             : 
     235             :     // Increment the index. Note that the natural progression of this
     236             :     //   block aligns with how leafs are stored in the accompanying
     237             :     //   CU data structures.
     238           0 :     ++(*index);
     239             : 
     240           0 :     return;
     241             : }
     242             : 
     243             : static CodedUnitStats CodedUnitStatsArray[] = {
     244             :     //   Depth       Size      SizeLog2     OriginX    OriginY   cu_num_in_depth   Index
     245             :         {0,           64,         6,           0,         0,        0     ,   0    },   // 0
     246             :         {1,           32,         5,           0,         0,        0     ,   1    },   // 1
     247             :         {2,           16,         4,           0,         0,        0     ,   1    },   // 2
     248             :         {3,            8,         3,           0,         0,        0     ,   1    },   // 3
     249             :         {3,            8,         3,           8,         0,        1     ,   1    },   // 4
     250             :         {3,            8,         3,           0,         8,        8     ,   1    },   // 5
     251             :         {3,            8,         3,           8,         8,        9     ,   1    },   // 6
     252             :         {2,           16,         4,          16,         0,        1     ,   1    },   // 7
     253             :         {3,            8,         3,          16,         0,        2     ,   1    },   // 8
     254             :         {3,            8,         3,          24,         0,        3     ,   1    },   // 9
     255             :         {3,            8,         3,          16,         8,        10    ,   1     },  // 10
     256             :         {3,            8,         3,          24,         8,        11    ,   1     },  // 11
     257             :         {2,           16,         4,           0,        16,        4     ,   1    },   // 12
     258             :         {3,            8,         3,           0,        16,        16    ,   1     },  // 13
     259             :         {3,            8,         3,           8,        16,        17    ,   1     },  // 14
     260             :         {3,            8,         3,           0,        24,        24    ,   1     },  // 15
     261             :         {3,            8,         3,           8,        24,        25    ,   1     },  // 16
     262             :         {2,           16,         4,          16,        16,        5     ,   1    },   // 17
     263             :         {3,            8,         3,          16,        16,        18    ,   1     },  // 18
     264             :         {3,            8,         3,          24,        16,        19    ,   1     },  // 19
     265             :         {3,            8,         3,          16,        24,        26    ,   1     },  // 20
     266             :         {3,            8,         3,          24,        24,        27    ,   1     },  // 21
     267             :         {1,           32,         5,          32,         0,        1     ,   2    },   // 22
     268             :         {2,           16,         4,          32,         0,        2     ,   2    },   // 23
     269             :         {3,            8,         3,          32,         0,        4     ,   2    },   // 24
     270             :         {3,            8,         3,          40,         0,        5     ,   2    },   // 25
     271             :         {3,            8,         3,          32,         8,        12    ,   2     },  // 26
     272             :         {3,            8,         3,          40,         8,        13    ,   2     },  // 27
     273             :         {2,           16,         4,          48,         0,        3     ,   2    },   // 28
     274             :         {3,            8,         3,          48,         0,        6     ,   2    },   // 29
     275             :         {3,            8,         3,          56,         0,        7     ,   2    },   // 30
     276             :         {3,            8,         3,          48,         8,        14    ,   2     },  // 31
     277             :         {3,            8,         3,          56,         8,        15    ,   2     },  // 32
     278             :         {2,           16,         4,          32,        16,        6     ,   2    },   // 33
     279             :         {3,            8,         3,          32,        16,        20    ,   2     },  // 34
     280             :         {3,            8,         3,          40,        16,        21    ,   2     },  // 35
     281             :         {3,            8,         3,          32,        24,        28    ,   2     },  // 36
     282             :         {3,            8,         3,          40,        24,        29    ,   2     },  // 37
     283             :         {2,           16,         4,          48,        16,        7     ,   2    },   // 38
     284             :         {3,            8,         3,          48,        16,        22    ,   2     },  // 39
     285             :         {3,            8,         3,          56,        16,        23    ,   2     },  // 40
     286             :         {3,            8,         3,          48,        24,        30    ,   2     },  // 41
     287             :         {3,            8,         3,          56,        24,        31    ,   2     },  // 42
     288             :         {1,           32,         5,           0,        32,        2     ,   3    },   // 43
     289             :         {2,           16,         4,           0,        32,        8     ,   3    },   // 44
     290             :         {3,            8,         3,           0,        32,        32    ,   3     },  // 45
     291             :         {3,            8,         3,           8,        32,        33    ,   3     },  // 46
     292             :         {3,            8,         3,           0,        40,        40    ,   3     },  // 47
     293             :         {3,            8,         3,           8,        40,        41    ,   3     },  // 48
     294             :         {2,           16,         4,          16,        32,        9     ,   3    },   // 49
     295             :         {3,            8,         3,          16,        32,        34    ,   3     },  // 50
     296             :         {3,            8,         3,          24,        32,        35    ,   3     },  // 51
     297             :         {3,            8,         3,          16,        40,        42    ,   3     },  // 52
     298             :         {3,            8,         3,          24,        40,        43    ,   3     },  // 53
     299             :         {2,           16,         4,           0,        48,        12    ,   3     },  // 54
     300             :         {3,            8,         3,           0,        48,        48    ,   3     },  // 55
     301             :         {3,            8,         3,           8,        48,        49    ,   3     },  // 56
     302             :         {3,            8,         3,           0,        56,        56    ,   3     },  // 57
     303             :         {3,            8,         3,           8,        56,        57    ,   3     },  // 58
     304             :         {2,           16,         4,          16,        48,        13    ,   3     },  // 59
     305             :         {3,            8,         3,          16,        48,        50    ,   3     },  // 60
     306             :         {3,            8,         3,          24,        48,        51    ,   3     },  // 61
     307             :         {3,            8,         3,          16,        56,        58    ,   3     },  // 62
     308             :         {3,            8,         3,          24,        56,        59    ,   3     },  // 63
     309             :         {1,           32,         5,          32,        32,        3     ,   4     },  // 64
     310             :         {2,           16,         4,          32,        32,        10    ,   4     },  // 65
     311             :         {3,            8,         3,          32,        32,        36    ,   4     },  // 66
     312             :         {3,            8,         3,          40,        32,        37    ,   4     },  // 67
     313             :         {3,            8,         3,          32,        40,        44    ,   4     },  // 68
     314             :         {3,            8,         3,          40,        40,        45    ,   4     },  // 69
     315             :         {2,           16,         4,          48,        32,        11    ,   4     },  // 70
     316             :         {3,            8,         3,          48,        32,        38    ,   4     },  // 71
     317             :         {3,            8,         3,          56,        32,        39    ,   4     },  // 72
     318             :         {3,            8,         3,          48,        40,        46    ,   4     },  // 73
     319             :         {3,            8,         3,          56,        40,        47    ,   4     },  // 74
     320             :         {2,           16,         4,          32,        48,        14    ,   4     },  // 75
     321             :         {3,            8,         3,          32,        48,        52    ,   4     },  // 76
     322             :         {3,            8,         3,          40,        48,        53    ,   4     },  // 77
     323             :         {3,            8,         3,          32,        56,        60    ,   4     },  // 78
     324             :         {3,            8,         3,          40,        56,        61    ,   4     },  // 79
     325             :         {2,           16,         4,          48,        48,        15    ,   4     },  // 80
     326             :         {3,            8,         3,          48,        48,        54    ,   4     },  // 81
     327             :         {3,            8,         3,          56,        48,        55    ,   4     },  // 82
     328             :         {3,            8,         3,          48,        56,        62    ,   4     },  // 83
     329             :         {3,            8,         3,          56,        56,        63    ,   4     }   // 84
     330             : };
     331             : 
     332             : /**************************************************************
     333             :  * Get Coded Unit Statistics
     334             :  **************************************************************/
     335      852509 : const CodedUnitStats* get_coded_unit_stats(const uint32_t cuIdx)
     336             : {
     337             :     //ASSERT(cuIdx < CU_MAX_COUNT && "get_coded_unit_stats: Out-of-range CU Idx\n");
     338      852509 :     if (cuIdx == 255)
     339           0 :         printf("Invalid CuIndex\n");
     340             : 
     341      852566 :     return &CodedUnitStatsArray[cuIdx];
     342             : }
     343             : 
     344             : static const TransformUnitStats TransformUnitStatsArray[] = {
     345             :     //
     346             :     //        depth
     347             :     //       /
     348             :     //      /       offset_x (units of the current depth)
     349             :     //     /       /
     350             :     //    /       /       offset_y (units of the current depth)
     351             :     //   /       /       /
     352             :     {0,     0,      0},     // 0
     353             :     {1,     0,      0},     // 1
     354             :     {1,     2,      0},     // 2
     355             :     {1,     0,      2},     // 3
     356             :     {1,     2,      2},     // 4
     357             :     {2,     0,      0},     // 5
     358             :     {2,     1,      0},     // 6
     359             :     {2,     0,      1},     // 7
     360             :     {2,     1,      1},     // 8
     361             :     {2,     2,      0},     // 9
     362             :     {2,     3,      0},     // 10
     363             :     {2,     2,      1},     // 11
     364             :     {2,     3,      1},     // 12
     365             :     { 2,    0,        2},     // 13
     366             :     { 2,    1,        2},     // 14
     367             :     { 2,    0,        3},     // 15
     368             :     { 2,    1,        3},     // 16
     369             :     { 2,    2,        2},     // 17
     370             :     { 2,    3,        2},     // 18
     371             :     { 2,    2,        3},     // 19
     372             :     { 2,    3,        3},    // 20
     373             :     {0xFF,  0xFF,   0xFF}   // Invalid
     374             : };
     375             : 
     376             : /**************************************************************
     377             :  * Get Transform Unit Statistics
     378             :  **************************************************************/
     379           0 : const TransformUnitStats* get_transform_unit_stats(const uint32_t tuIdx)
     380             : {
     381           0 :     return &TransformUnitStatsArray[tuIdx];
     382             : }
     383             : 
     384             : /*****************************************
     385             :  * Integer Log 2
     386             :  *  This is a quick adaptation of a Number
     387             :  *  Leading Zeros (NLZ) algorithm to get
     388             :  *  the log2f of an integer
     389             :  *****************************************/
     390             :  /*uint32_t Log2f(uint32_t x)
     391             :  {
     392             :      uint32_t y;
     393             :      int32_t n = 32, c = 16;
     394             : 
     395             :      do {
     396             :          y = x >> c;
     397             :          if (y > 0) {
     398             :              n -= c;
     399             :              x = y;
     400             :          }
     401             :          c >>= 1;
     402             :      } while (c > 0);
     403             : 
     404             :      return 32 - n;
     405             :  }*/
     406             : 
     407             :  /*****************************************
     408             :   * Long Log 2
     409             :   *  This is a quick adaptation of a Number
     410             :   *  Leading Zeros (NLZ) algorithm to get
     411             :   *  the log2f of a 64-bit number
     412             :   *****************************************/
     413           0 : inline uint64_t Log2f64(uint64_t x)
     414             : {
     415             :     uint64_t y;
     416           0 :     int64_t n = 64, c = 32;
     417             : 
     418             :     do {
     419           0 :         y = x >> c;
     420           0 :         if (y > 0) {
     421           0 :             n -= c;
     422           0 :             x = y;
     423             :         }
     424           0 :         c >>= 1;
     425           0 :     } while (c > 0);
     426             : 
     427           0 :     return 64 - n;
     428             : }
     429             : 
     430             : /*****************************************
     431             :  * Endian Swap
     432             :  *****************************************/
     433           0 : uint32_t endian_swap(uint32_t ui)
     434             : {
     435             :     uint32_t ul2;
     436             : 
     437           0 :     ul2 = ui >> 24;
     438           0 :     ul2 |= (ui >> 8) & 0x0000ff00;
     439           0 :     ul2 |= (ui << 8) & 0x00ff0000;
     440           0 :     ul2 |= ui << 24;
     441             : 
     442           0 :     return ul2;
     443             : }
     444             : 
     445           0 : uint64_t log2f_high_precision(uint64_t x, uint8_t precision)
     446             : {
     447           0 :     uint64_t sigBitLocation = Log2f64(x);
     448           0 :     uint64_t Remainder = x - ((uint64_t)1 << (uint8_t)sigBitLocation);
     449             :     uint64_t result;
     450             : 
     451           0 :     result = (sigBitLocation << precision) + ((Remainder << precision) / ((uint64_t)1 << (uint8_t)sigBitLocation));
     452             : 
     453           0 :     return result;
     454             : }
     455             : 
     456             : // concatenate two linked list, and return the pointer to the new concatenated list
     457         120 : EbLinkedListNode* concat_eb_linked_list(EbLinkedListNode* a, EbLinkedListNode* b)
     458             : {
     459         120 :     if (a)
     460             :     {
     461           0 :         while (a->next)
     462           0 :             a = a->next;
     463           0 :         a->next = b;
     464           0 :         return a;
     465             :     }
     466             :     else
     467         120 :         return b;
     468             : }
     469             : 
     470             : // split a linked list
     471         120 : EbLinkedListNode* split_eb_linked_list(EbLinkedListNode* input, EbLinkedListNode** restLL, EbBool(*predicate_func)(EbLinkedListNode*))
     472             : {
     473         120 :     EbLinkedListNode* llTruePtr = (EbLinkedListNode *)EB_NULL;    // list of nodes satifying predicate_func(node) == TRUE
     474         120 :     EbLinkedListNode* llRestPtr = (EbLinkedListNode *)EB_NULL;    // list of nodes satifying predicate_func(node) != TRUE
     475             : 
     476         120 :     while (input)
     477             :     {
     478           0 :         EbLinkedListNode* next = input->next;
     479           0 :         input->next = (EbLinkedListNode *)EB_NULL;
     480           0 :         if (predicate_func(input))
     481           0 :             llTruePtr = concat_eb_linked_list(input, llTruePtr);
     482             :         else
     483           0 :             llRestPtr = concat_eb_linked_list(input, llRestPtr);
     484           0 :         input = next;
     485             :     }
     486             : 
     487         120 :     *restLL = llRestPtr;
     488         120 :     return llTruePtr;
     489             : }
     490             : 
     491             : static const MiniGopStats MiniGopStatsArray[] = {
     492             :     //    hierarchical_levels    start_index    end_index    Lenght    mini_gop_index
     493             :     { 5,  0, 31, 32 },    // 0
     494             :     { 4,  0, 15, 16 },    // 1
     495             :     { 3,  0,  7,  8 },    // 2
     496             :     { 2,  0,  3,  4 },    // 3
     497             :     { 2,  4,  7,  4 },    // 4
     498             :     { 3,  8, 15,  8 },    // 5
     499             :     { 2,  8, 11,  4 },    // 6
     500             :     { 2, 12, 15,  4 },    // 7
     501             :     { 4, 16, 31, 16 },    // 8
     502             :     { 3, 16, 23,  8 },    // 9
     503             :     { 2, 16, 19,  4 },    // 10
     504             :     { 2, 20, 23,  4 },    // 11
     505             :     { 3, 24, 31,  8 },    // 12
     506             :     { 2, 24, 27,  4 },    // 13
     507             :     { 2, 28, 31,  4 }    // 14
     508             : };
     509             : 
     510             : /**************************************************************
     511             : * Get Mini GOP Statistics
     512             : **************************************************************/
     513         270 : const MiniGopStats* get_mini_gop_stats(const uint32_t mini_gop_index)
     514             : {
     515         270 :     return &MiniGopStatsArray[mini_gop_index];
     516             : }
     517             : 
     518             : uint32_t ns_quarter_off_mult[9/*Up to 9 part*/][2/*x+y*/][4/*Up to 4 ns blocks per part*/] =
     519             : {
     520             :     //9 means not used.
     521             : 
     522             :     //          |   x   |     |   y   |
     523             : 
     524             :     /*P=0*/  {  {0,9,9,9}  ,  {0,9,9,9}  },
     525             :     /*P=1*/  {  {0,0,9,9}  ,  {0,2,9,9}  },
     526             :     /*P=2*/  {  {0,2,9,9}  ,  {0,0,9,9}  },
     527             :     /*P=3*/  {  {0,2,0,9}  ,  {0,0,2,9}  },
     528             :     /*P=4*/  {  {0,0,2,9}  ,  {0,2,2,9}  },
     529             :     /*P=5*/  {  {0,0,2,9}  ,  {0,2,0,9}  },
     530             :     /*P=6*/  {  {0,2,2,9}  ,  {0,0,2,9}  },
     531             :     /*P=7*/  {  {0,0,0,0}  ,  {0,1,2,3}  },
     532             :     /*P=8*/  {  {0,1,2,3}  ,  {0,0,0,0}  }
     533             : };
     534             : 
     535             : uint32_t ns_quarter_size_mult[9/*Up to 9 part*/][2/*h+v*/][4/*Up to 4 ns blocks per part*/] =
     536             : {
     537             :     //9 means not used.
     538             : 
     539             :     //          |   h   |     |   v   |
     540             : 
     541             :     /*P=0*/  {  {4,9,9,9}  ,  {4,9,9,9}  },
     542             :     /*P=1*/  {  {4,4,9,9}  ,  {2,2,9,9}  },
     543             :     /*P=2*/  {  {2,2,9,9}  ,  {4,4,9,9}  },
     544             :     /*P=3*/  {  {2,2,4,9}  ,  {2,2,2,9}  },
     545             :     /*P=4*/  {  {4,2,2,9}  ,  {2,2,2,9}  },
     546             :     /*P=5*/  {  {2,2,2,9}  ,  {2,2,4,9}  },
     547             :     /*P=6*/  {  {2,2,2,9}  ,  {4,2,2,9}  },
     548             :     /*P=7*/  {  {4,4,4,4}  ,  {1,1,1,1}  },
     549             :     /*P=8*/  {  {1,1,1,1}  ,  {4,4,4,4}  }
     550             : };
     551             : 
     552             : BlockSize hvsize_to_bsize[/*H*/6][/*V*/6] =
     553             : {
     554             :     {  BLOCK_4X4,       BLOCK_4X8,     BLOCK_4X16,      BLOCK_INVALID,   BLOCK_INVALID,   BLOCK_INVALID      },
     555             :     {  BLOCK_8X4,       BLOCK_8X8,     BLOCK_8X16,      BLOCK_8X32,      BLOCK_INVALID,   BLOCK_INVALID      },
     556             :     {  BLOCK_16X4,      BLOCK_16X8,    BLOCK_16X16,     BLOCK_16X32,     BLOCK_16X64,     BLOCK_INVALID   },
     557             :     {  BLOCK_INVALID,   BLOCK_32X8,    BLOCK_32X16,     BLOCK_32X32,     BLOCK_32X64,     BLOCK_INVALID   },
     558             :     {  BLOCK_INVALID,   BLOCK_INVALID, BLOCK_64X16,     BLOCK_64X32,     BLOCK_64X64,     BLOCK_64X128    },
     559             :     {  BLOCK_INVALID,   BLOCK_INVALID, BLOCK_INVALID,   BLOCK_INVALID,   BLOCK_128X64,    BLOCK_128X128   }
     560             : };
     561             : 
     562             : uint32_t  max_sb = 64;
     563             : uint32_t  max_depth = 5;
     564             : uint32_t  max_part = 9;
     565             : uint32_t  max_num_active_blocks;
     566             : 
     567             : //data could be  organized in 2 forms: depth scan (dps) or MD scan (mds):
     568             : //dps: all depth0 - all depth1 - all depth2 - all depth3.
     569             : //     within a depth: square blk0 in raster scan (followed by all its ns blcoks),
     570             : //     square blk1 in raster scan (followed by all its ns blcoks), etc
     571             : //mds: top-down and Z scan.
     572             : BlockGeom blk_geom_dps[MAX_NUM_BLOCKS_ALLOC];  //to access geom info of a particular block : use this table if you have the block index in depth scan
     573             : BlockGeom blk_geom_mds[MAX_NUM_BLOCKS_ALLOC];  //to access geom info of a particular block : use this table if you have the block index in md    scan
     574             : 
     575        2202 : uint32_t search_matching_from_dps(
     576             :     uint32_t depth,
     577             :     uint32_t part,
     578             :     uint32_t x,
     579             :     uint32_t y)
     580             : {
     581        2202 :     uint32_t found = 0;
     582             :     uint32_t it;
     583        2202 :     uint32_t matched = 0xFFFF;
     584     2426600 :     for (it = 0; it < max_num_active_blocks; it++)
     585             :     {
     586     2424400 :         if (blk_geom_dps[it].depth == depth && blk_geom_dps[it].shape == part && blk_geom_dps[it].origin_x == x && blk_geom_dps[it].origin_y == y)
     587             :         {
     588        2202 :             if (found == 0)
     589             :             {
     590        2202 :                 matched = it;
     591        2202 :                 found = 1;
     592             :             }
     593             :             else {
     594           0 :                 matched = 0xFFFF;
     595           0 :                 break;
     596             :             }
     597             :         }
     598             :     }
     599             : 
     600        2202 :     if (matched == 0xFFFF)
     601           0 :         printf(" \n\n PROBLEM\n\n ");
     602             : 
     603        2202 :     return matched;
     604             : }
     605        2202 : uint32_t search_matching_from_mds(
     606             :     uint32_t depth,
     607             :     uint32_t part,
     608             :     uint32_t x,
     609             :     uint32_t y)
     610             : {
     611        2202 :     uint32_t found = 0;
     612             :     uint32_t it;
     613        2202 :     uint32_t matched = 0xFFFF;
     614     2426600 :     for (it = 0; it < max_num_active_blocks; it++)
     615             :     {
     616     2424400 :         if (blk_geom_mds[it].depth == depth && blk_geom_mds[it].shape == part && blk_geom_mds[it].origin_x == x && blk_geom_mds[it].origin_y == y)
     617             :         {
     618        2202 :             if (found == 0)
     619             :             {
     620        2202 :                 matched = it;
     621        2202 :                 found = 1;
     622             :             }
     623             :             else {
     624           0 :                 matched = 0xFFFF;
     625           0 :                 break;
     626             :             }
     627             :         }
     628             :     }
     629             : 
     630        2202 :     if (matched == 0xFFFF)
     631           0 :         printf(" \n\n PROBLEM\n\n ");
     632             : 
     633        2202 :     return matched;
     634             : }
     635             : 
     636        9468 : static INLINE TxSize av1_get_tx_size(
     637             :     BlockSize  sb_type,
     638             :     int32_t plane/*, const MacroBlockD *xd*/) {
     639             :     //const MbModeInfo *mbmi = xd->mi[0];
     640             :     // if (xd->lossless[mbmi->segment_id]) return TX_4X4;
     641        9468 :     if (plane == 0) return blocksize_to_txsize[sb_type];
     642             :     // const MacroblockdPlane *pd = &xd->plane[plane];
     643             : 
     644        2202 :     uint32_t subsampling_x = plane > 0 ? 1 : 0;
     645        2202 :     uint32_t subsampling_y = plane > 0 ? 1 : 0;
     646        2202 :     return av1_get_max_uv_txsize(/*mbmi->*/sb_type, subsampling_x, subsampling_y);
     647             :     UNUSED(plane);
     648             : }
     649             : 
     650         682 : void md_scan_all_blks(uint32_t *idx_mds, uint32_t sq_size, uint32_t x, uint32_t y, int32_t is_last_quadrant, uint8_t quad_it)
     651             : {
     652             :     //the input block is the parent square block of size sq_size located at pos (x,y)
     653             : 
     654             :     uint32_t part_it, nsq_it, d1_it, sqi_mds;
     655             : 
     656         682 :     uint32_t halfsize = sq_size / 2;
     657         682 :     uint32_t quartsize = sq_size / 4;
     658             : 
     659        1364 :     uint32_t max_part_updated = sq_size == 128 ? MIN(max_part, 7) :
     660        1236 :         sq_size == 8 ? MIN(max_part, 3) :
     661             : 
     662         554 :         sq_size == 4 ? 1 : max_part;
     663             : 
     664         682 :     d1_it = 0;
     665         682 :     sqi_mds = *idx_mds;
     666             : 
     667        1956 :     for (part_it = 0; part_it < max_part_updated; part_it++)
     668             :     {
     669        1274 :         uint32_t tot_num_ns_per_part =
     670        1274 :             part_it < 1 ? 1 :
     671             :             part_it < 3 ? 2 :
     672             :             part_it < 7 ? 3 : 4;
     673             : 
     674        3476 :         for (nsq_it = 0; nsq_it < tot_num_ns_per_part; nsq_it++)
     675             :         {
     676        4354 :             blk_geom_mds[*idx_mds].depth = sq_size == max_sb / 1 ? 0 :
     677        2152 :                 sq_size == max_sb / 2 ? 1 :
     678        1952 :                 sq_size == max_sb / 4 ? 2 :
     679        1152 :                 sq_size == max_sb / 8 ? 3 :
     680         512 :                 sq_size == max_sb / 16 ? 4 : 5;
     681             : 
     682        2202 :             blk_geom_mds[*idx_mds].sq_size = sq_size;
     683        2202 :             blk_geom_mds[*idx_mds].is_last_quadrant = is_last_quadrant;
     684        2202 :             blk_geom_mds[*idx_mds].quadi = quad_it;
     685             : 
     686        2202 :             blk_geom_mds[*idx_mds].shape = (PART)part_it;
     687        2202 :             blk_geom_mds[*idx_mds].origin_x = x + quartsize * ns_quarter_off_mult[part_it][0][nsq_it];
     688        2202 :             blk_geom_mds[*idx_mds].origin_y = y + quartsize * ns_quarter_off_mult[part_it][1][nsq_it];
     689             : 
     690        2202 :             blk_geom_mds[*idx_mds].d1i = d1_it++;
     691        2202 :             blk_geom_mds[*idx_mds].sqi_mds = sqi_mds;
     692        2202 :             blk_geom_mds[*idx_mds].totns = tot_num_ns_per_part;
     693        2202 :             blk_geom_mds[*idx_mds].nsi = nsq_it;
     694             : 
     695        2202 :             uint32_t matched = search_matching_from_dps(
     696        2202 :                 blk_geom_mds[*idx_mds].depth,
     697        2202 :                 blk_geom_mds[*idx_mds].shape,
     698        2202 :                 blk_geom_mds[*idx_mds].origin_x,
     699        2202 :                 blk_geom_mds[*idx_mds].origin_y);
     700             : 
     701        2202 :             blk_geom_mds[*idx_mds].blkidx_dps = blk_geom_dps[matched].blkidx_dps;
     702             : 
     703        2202 :             blk_geom_mds[*idx_mds].bwidth = quartsize * ns_quarter_size_mult[part_it][0][nsq_it];
     704        2202 :             blk_geom_mds[*idx_mds].bheight = quartsize * ns_quarter_size_mult[part_it][1][nsq_it];
     705        2202 :             blk_geom_mds[*idx_mds].bwidth_log2 = Log2f(blk_geom_mds[*idx_mds].bwidth);
     706        2202 :             blk_geom_mds[*idx_mds].bheight_log2 = Log2f(blk_geom_mds[*idx_mds].bheight);
     707        2202 :             blk_geom_mds[*idx_mds].bsize = hvsize_to_bsize[blk_geom_mds[*idx_mds].bwidth_log2 - 2][blk_geom_mds[*idx_mds].bheight_log2 - 2];
     708        2202 :             blk_geom_mds[*idx_mds].bwidth_uv = MAX(4, blk_geom_mds[*idx_mds].bwidth >> 1); // AMIR to clean to check for 4x4
     709        2202 :             blk_geom_mds[*idx_mds].bheight_uv = MAX(4, blk_geom_mds[*idx_mds].bheight >> 1);
     710        2202 :             blk_geom_mds[*idx_mds].has_uv = 1;
     711             : 
     712        2202 :             if (blk_geom_mds[*idx_mds].bwidth == 4 && blk_geom_mds[*idx_mds].bheight == 4)
     713         512 :                 blk_geom_mds[*idx_mds].has_uv = is_last_quadrant ? 1 : 0;
     714             : 
     715             :             else
     716        1690 :                 if ((blk_geom_mds[*idx_mds].bwidth >> 1) < blk_geom_mds[*idx_mds].bwidth_uv || (blk_geom_mds[*idx_mds].bheight >> 1) < blk_geom_mds[*idx_mds].bheight_uv) {
     717         768 :                     int32_t num_blk_same_uv = 1;
     718         768 :                     if (blk_geom_mds[*idx_mds].bwidth >> 1 < 4)
     719         384 :                         num_blk_same_uv *= 2;
     720         768 :                     if (blk_geom_mds[*idx_mds].bheight >> 1 < 4)
     721         384 :                         num_blk_same_uv *= 2;
     722             :                     //if (blk_geom_mds[*idx_mds].nsi % 2 == 0)
     723             :                     //if (blk_geom_mds[*idx_mds].nsi != (blk_geom_mds[*idx_mds].totns-1) )
     724         768 :                     if (blk_geom_mds[*idx_mds].nsi != (num_blk_same_uv - 1) && blk_geom_mds[*idx_mds].nsi != (2 * num_blk_same_uv - 1))
     725         384 :                         blk_geom_mds[*idx_mds].has_uv = 0;
     726             :                 }
     727             : 
     728        2202 :             blk_geom_mds[*idx_mds].bsize_uv = get_plane_block_size(blk_geom_mds[*idx_mds].bsize, 1, 1);
     729        2202 :             uint16_t   txb_itr = 0;
     730             :             // tx_depth 1 geom settings
     731        2202 :             uint8_t tx_depth = 0;
     732        4404 :             blk_geom_mds[*idx_mds].txb_count[tx_depth] = blk_geom_mds[*idx_mds].bsize == BLOCK_128X128 ? 4 :
     733        2202 :                 blk_geom_mds[*idx_mds].bsize == BLOCK_128X64 || blk_geom_mds[*idx_mds].bsize == BLOCK_64X128 ? 2 : 1;
     734        4404 :             for (txb_itr = 0; txb_itr < blk_geom_mds[*idx_mds].txb_count[tx_depth]; txb_itr++) {
     735        2202 :                 blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(blk_geom_mds[*idx_mds].bsize, 0);
     736        2202 :                 blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = av1_get_tx_size(blk_geom_mds[*idx_mds].bsize, 1);
     737        2202 :                 if (blk_geom_mds[*idx_mds].bsize == BLOCK_128X128)
     738             :                 {
     739           0 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = (txb_itr == 0 || txb_itr == 2) ? blk_geom_mds[*idx_mds].origin_x : blk_geom_mds[*idx_mds].origin_x + 64;
     740           0 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = (txb_itr == 0 || txb_itr == 1) ? blk_geom_mds[*idx_mds].origin_y : blk_geom_mds[*idx_mds].origin_y + 64;
     741             :                 }
     742        2202 :                 else if (blk_geom_mds[*idx_mds].bsize == BLOCK_128X64)
     743             :                 {
     744           0 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = (txb_itr == 0) ? blk_geom_mds[*idx_mds].origin_x : blk_geom_mds[*idx_mds].origin_x + 64;
     745           0 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y;
     746             :                 }
     747        2202 :                 else if (blk_geom_mds[*idx_mds].bsize == BLOCK_64X128)
     748             :                 {
     749           0 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x;
     750           0 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = (txb_itr == 0) ? blk_geom_mds[*idx_mds].origin_y : blk_geom_mds[*idx_mds].origin_y + 64;
     751             :                 }
     752             :                 else
     753             :                 {
     754        2202 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x;
     755        2202 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y;
     756             :                 }
     757             :                 /*if (blk_geom_mds[*idx_mds].bsize == BLOCK_16X8)
     758             :                     printf("");*/
     759        2202 :                 blk_geom_mds[*idx_mds].tx_boff_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] - blk_geom_mds[*idx_mds].origin_x;
     760        2202 :                 blk_geom_mds[*idx_mds].tx_boff_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] - blk_geom_mds[*idx_mds].origin_y;
     761        2202 :                 blk_geom_mds[*idx_mds].tx_width[tx_depth][txb_itr] = tx_size_wide[blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr]];
     762        2202 :                 blk_geom_mds[*idx_mds].tx_height[tx_depth][txb_itr] = tx_size_high[blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr]];
     763        2202 :                 blk_geom_mds[*idx_mds].tx_width_uv[tx_depth][txb_itr] = tx_size_wide[blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr]];
     764        2202 :                 blk_geom_mds[*idx_mds].tx_height_uv[tx_depth][txb_itr] = tx_size_high[blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr]];
     765             :             }
     766             :             // tx_depth 1 geom settings
     767        2202 :             tx_depth = 1;
     768        4404 :             blk_geom_mds[*idx_mds].txb_count[tx_depth] = blk_geom_mds[*idx_mds].bsize == BLOCK_128X128 ? 4 :
     769        2202 :                 blk_geom_mds[*idx_mds].bsize == BLOCK_128X64 || blk_geom_mds[*idx_mds].bsize == BLOCK_64X128 ? 2 : 1;
     770             : 
     771        2202 :             if (blk_geom_mds[*idx_mds].bsize == BLOCK_64X64 ||
     772        2200 :                 blk_geom_mds[*idx_mds].bsize == BLOCK_32X32 ||
     773        2176 :                 blk_geom_mds[*idx_mds].bsize == BLOCK_16X16 ||
     774        2080 :                 blk_geom_mds[*idx_mds].bsize == BLOCK_8X8)
     775             :             {
     776         506 :                 blk_geom_mds[*idx_mds].txb_count[tx_depth] = 4;
     777             :             }
     778             : 
     779        2202 :             if (blk_geom_mds[*idx_mds].bsize == BLOCK_64X32 ||
     780        2194 :                 blk_geom_mds[*idx_mds].bsize == BLOCK_32X64 ||
     781        2186 :                 blk_geom_mds[*idx_mds].bsize == BLOCK_32X16 ||
     782        2154 :                 blk_geom_mds[*idx_mds].bsize == BLOCK_16X32 ||
     783        2122 :                 blk_geom_mds[*idx_mds].bsize == BLOCK_16X8 ||
     784        1994 :                 blk_geom_mds[*idx_mds].bsize == BLOCK_8X16)
     785             :             {
     786         336 :                 blk_geom_mds[*idx_mds].txb_count[tx_depth] = 2;
     787             :             }
     788        2202 :             if (blk_geom_mds[*idx_mds].bsize == BLOCK_64X16 ||
     789        2194 :                 blk_geom_mds[*idx_mds].bsize == BLOCK_16X64 ||
     790        2186 :                 blk_geom_mds[*idx_mds].bsize == BLOCK_32X8 ||
     791        2154 :                 blk_geom_mds[*idx_mds].bsize == BLOCK_8X32 ||
     792        2122 :                 blk_geom_mds[*idx_mds].bsize == BLOCK_16X4 ||
     793        1994 :                 blk_geom_mds[*idx_mds].bsize == BLOCK_4X16)
     794             :             {
     795         336 :                 blk_geom_mds[*idx_mds].txb_count[tx_depth] = 4;
     796             :             }
     797        7266 :             for (txb_itr = 0; txb_itr < blk_geom_mds[*idx_mds].txb_count[tx_depth]; txb_itr++) {
     798        5064 :                 if (blk_geom_mds[*idx_mds].bsize == BLOCK_64X64)
     799             :                 {
     800           8 :                     blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_32X32, 0);
     801           8 :                     blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
     802           8 :                     uint8_t offsetx[4] = { 0,32,0,32 };
     803           8 :                     uint8_t offsety[4] = { 0,0,32,32 };
     804             :                     //   0  1
     805             :                     //   2  3
     806           8 :                     uint8_t tbx = offsetx[txb_itr];
     807           8 :                     uint8_t tby = offsety[txb_itr];
     808             : 
     809           8 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
     810           8 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
     811             :                 }
     812        5056 :                 else if (blk_geom_mds[*idx_mds].bsize == BLOCK_64X32)
     813             :                 {
     814          16 :                     blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_32X32, 0);
     815          16 :                     blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
     816          16 :                     uint8_t offsetx[2] = { 0,32 };
     817          16 :                     uint8_t offsety[2] = { 0,0 };
     818             :                     //   0  1
     819          16 :                     uint8_t tbx = offsetx[txb_itr];
     820          16 :                     uint8_t tby = offsety[txb_itr];
     821             : 
     822          16 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
     823          16 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
     824             :                 }
     825        5040 :                 else if (blk_geom_mds[*idx_mds].bsize == BLOCK_32X64)
     826             :                 {
     827          16 :                     blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_32X32, 0);
     828          16 :                     blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
     829          16 :                     uint8_t offsetx[2] = { 0,0 };
     830          16 :                     uint8_t offsety[2] = { 0,32 };
     831             :                     //   0  1
     832          16 :                     uint8_t tbx = offsetx[txb_itr];
     833          16 :                     uint8_t tby = offsety[txb_itr];
     834             : 
     835          16 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
     836          16 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
     837             :                 }
     838        5024 :                 else if (blk_geom_mds[*idx_mds].bsize == BLOCK_32X32)
     839             :                 {
     840          96 :                     blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_16X16, 0);
     841          96 :                     blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
     842          96 :                     uint8_t offsetx[4] = { 0,16,0,16 };
     843          96 :                     uint8_t offsety[4] = { 0,0,16,16 };
     844             :                     //   0  1
     845             :                     //   2  3
     846          96 :                     uint8_t tbx = offsetx[txb_itr];
     847          96 :                     uint8_t tby = offsety[txb_itr];
     848             : 
     849          96 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
     850          96 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
     851             :                 }
     852        4928 :                 else if (blk_geom_mds[*idx_mds].bsize == BLOCK_32X16)
     853             :                 {
     854          64 :                     blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_16X16, 0);
     855          64 :                     blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
     856          64 :                     uint8_t offsetx[2] = { 0,16 };
     857          64 :                     uint8_t offsety[2] = { 0,0 };
     858             :                     //   0  1
     859          64 :                     uint8_t tbx = offsetx[txb_itr];
     860          64 :                     uint8_t tby = offsety[txb_itr];
     861             : 
     862          64 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
     863          64 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
     864             :                 }
     865        4864 :                 else if (blk_geom_mds[*idx_mds].bsize == BLOCK_16X32)
     866             :                 {
     867          64 :                     blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_16X16, 0);
     868          64 :                     blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
     869          64 :                     uint8_t offsetx[2] = { 0,0 };
     870          64 :                     uint8_t offsety[2] = { 0,16 };
     871             :                     //   0  1
     872          64 :                     uint8_t tbx = offsetx[txb_itr];
     873          64 :                     uint8_t tby = offsety[txb_itr];
     874             : 
     875          64 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
     876          64 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
     877             :                 }
     878        4800 :                 else if (blk_geom_mds[*idx_mds].bsize == BLOCK_16X16)
     879             :                 {
     880         384 :                     blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_8X8, 0);
     881         384 :                     blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
     882         384 :                     uint8_t offsetx[4] = { 0,8,0,8 };
     883         384 :                     uint8_t offsety[4] = { 0,0,8,8 };
     884             :                     //   0  1
     885             :                     //   2  3
     886         384 :                     uint8_t tbx = offsetx[txb_itr];
     887         384 :                     uint8_t tby = offsety[txb_itr];
     888             : 
     889         384 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
     890         384 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
     891             :                 }
     892        4416 :                 else if (blk_geom_mds[*idx_mds].bsize == BLOCK_16X8)
     893             :                 {
     894         256 :                     blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_8X8, 0);
     895         256 :                     blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
     896         256 :                     uint8_t offsetx[2] = { 0,8 };
     897         256 :                     uint8_t offsety[2] = { 0,0 };
     898             :                     //   0  1
     899         256 :                     uint8_t tbx = offsetx[txb_itr];
     900         256 :                     uint8_t tby = offsety[txb_itr];
     901             : 
     902         256 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
     903         256 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
     904             :                     //printf("");
     905             :                 }
     906        4160 :                 else if (blk_geom_mds[*idx_mds].bsize == BLOCK_8X16)
     907             :                 {
     908         256 :                     blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_8X8, 0);
     909         256 :                     blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
     910         256 :                     uint8_t offsetx[2] = { 0,0 };
     911         256 :                     uint8_t offsety[2] = { 0,8 };
     912             :                     //   0  1
     913         256 :                     uint8_t tbx = offsetx[txb_itr];
     914         256 :                     uint8_t tby = offsety[txb_itr];
     915             : 
     916         256 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
     917         256 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
     918             :                 }
     919        3904 :                 else if (blk_geom_mds[*idx_mds].bsize == BLOCK_8X8)
     920             :                 {
     921        1536 :                     blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_4X4, 0);
     922        1536 :                     blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
     923        1536 :                     uint8_t offsetx[4] = { 0,4,0,4 };
     924        1536 :                     uint8_t offsety[4] = { 0,0,4,4 };
     925             :                     //   0  1
     926             :                     //   2  3
     927        1536 :                     uint8_t tbx = offsetx[txb_itr];
     928        1536 :                     uint8_t tby = offsety[txb_itr];
     929             : 
     930        1536 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
     931        1536 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
     932             :                 }
     933        2368 :                 else if (blk_geom_mds[*idx_mds].bsize == BLOCK_64X16)
     934             :                 {
     935          32 :                     blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_16X16, 0);
     936          32 :                     blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
     937             :                     //   0  1 2 3
     938          32 :                     uint8_t offsetx[4] = { 0,16, 32, 48 };
     939          32 :                     uint8_t offsety[4] = { 0,0,  0,   0 };
     940          32 :                     uint8_t tbx = offsetx[txb_itr];
     941          32 :                     uint8_t tby = offsety[txb_itr];
     942             : 
     943          32 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
     944          32 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
     945             :                 }
     946        2336 :                 else if (blk_geom_mds[*idx_mds].bsize == BLOCK_16X64)
     947             :                 {
     948          32 :                     blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_16X16, 0);
     949          32 :                     blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
     950             :                     //   0  1 2 3
     951          32 :                     uint8_t offsetx[4] = { 0,0,  0, 0 };
     952          32 :                     uint8_t offsety[4] = { 0,16,32, 48 };
     953          32 :                     uint8_t tbx = offsetx[txb_itr];
     954          32 :                     uint8_t tby = offsety[txb_itr];
     955             : 
     956          32 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
     957          32 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
     958             :                 }
     959        2304 :                 else if (blk_geom_mds[*idx_mds].bsize == BLOCK_32X8)
     960             :                 {
     961         128 :                     blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_8X8, 0);
     962         128 :                     blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
     963             :                     //   0  1 2 3
     964         128 :                     uint8_t offsetx[4] = { 0,8, 16, 24 };
     965         128 :                     uint8_t offsety[4] = { 0,0,  0,   0 };
     966         128 :                     uint8_t tbx = offsetx[txb_itr];
     967         128 :                     uint8_t tby = offsety[txb_itr];
     968             : 
     969         128 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
     970         128 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
     971             :                 }
     972        2176 :                 else if (blk_geom_mds[*idx_mds].bsize == BLOCK_8X32)
     973             :                 {
     974         128 :                     blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_8X8, 0);
     975         128 :                     blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
     976             :                     //   0  1 2 3
     977         128 :                     uint8_t offsetx[4] = { 0,0,  0, 0 };
     978         128 :                     uint8_t offsety[4] = { 0,8,16, 24 };
     979         128 :                     uint8_t tbx = offsetx[txb_itr];
     980         128 :                     uint8_t tby = offsety[txb_itr];
     981             : 
     982         128 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
     983         128 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
     984             :                 }
     985        2048 :                 else if (blk_geom_mds[*idx_mds].bsize == BLOCK_16X4)
     986             :                 {
     987         512 :                     blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_4X4, 0);
     988         512 :                     blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
     989             :                     //   0  1 2 3
     990         512 :                     uint8_t offsetx[4] = { 0,4, 8, 12 };
     991         512 :                     uint8_t offsety[4] = { 0,0,  0,   0 };
     992         512 :                     uint8_t tbx = offsetx[txb_itr];
     993         512 :                     uint8_t tby = offsety[txb_itr];
     994             : 
     995         512 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
     996         512 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
     997             :                 }
     998        1536 :                 else if (blk_geom_mds[*idx_mds].bsize == BLOCK_4X16)
     999             :                 {
    1000         512 :                     blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(BLOCK_4X4, 0);
    1001         512 :                     blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
    1002             :                     //   0  1 2 3
    1003         512 :                     uint8_t offsetx[4] = { 0,0,  0, 0 };
    1004         512 :                     uint8_t offsety[4] = { 0,4,8, 12 };
    1005         512 :                     uint8_t tbx = offsetx[txb_itr];
    1006         512 :                     uint8_t tby = offsety[txb_itr];
    1007             : 
    1008         512 :                     blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x + tbx;
    1009         512 :                     blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y + tby;
    1010             :                 }
    1011             :                 else
    1012             :                 {
    1013        1024 :                     if (blk_geom_mds[*idx_mds].bsize == BLOCK_128X128)
    1014             :                     {
    1015           0 :                         blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(blk_geom_mds[*idx_mds].bsize, 0);
    1016           0 :                         blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
    1017             : 
    1018           0 :                         blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = (txb_itr == 0 || txb_itr == 2) ? blk_geom_mds[*idx_mds].origin_x : blk_geom_mds[*idx_mds].origin_x + 64;
    1019           0 :                         blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = (txb_itr == 0 || txb_itr == 1) ? blk_geom_mds[*idx_mds].origin_y : blk_geom_mds[*idx_mds].origin_y + 64;
    1020             :                     }
    1021        1024 :                     else if (blk_geom_mds[*idx_mds].bsize == BLOCK_128X64)
    1022             :                     {
    1023           0 :                         blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(blk_geom_mds[*idx_mds].bsize, 0);
    1024           0 :                         blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
    1025             : 
    1026           0 :                         blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = (txb_itr == 0) ? blk_geom_mds[*idx_mds].origin_x : blk_geom_mds[*idx_mds].origin_x + 64;
    1027           0 :                         blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y;
    1028             :                     }
    1029        1024 :                     else if (blk_geom_mds[*idx_mds].bsize == BLOCK_64X128)
    1030             :                     {
    1031           0 :                         blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(blk_geom_mds[*idx_mds].bsize, 0);
    1032           0 :                         blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
    1033           0 :                         blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x;
    1034           0 :                         blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = (txb_itr == 0) ? blk_geom_mds[*idx_mds].origin_y : blk_geom_mds[*idx_mds].origin_y + 64;
    1035             :                     }
    1036             :                     else
    1037             :                     {
    1038        1024 :                         blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr] = av1_get_tx_size(blk_geom_mds[*idx_mds].bsize, 0);
    1039        1024 :                         blk_geom_mds[*idx_mds].txsize_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].txsize_uv[0][0];
    1040        1024 :                         blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_x;
    1041        1024 :                         blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].origin_y;
    1042             :                     }
    1043             :                 }
    1044        5064 :                 blk_geom_mds[*idx_mds].tx_boff_x[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].tx_org_x[tx_depth][txb_itr] - blk_geom_mds[*idx_mds].origin_x;
    1045        5064 :                 blk_geom_mds[*idx_mds].tx_boff_y[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].tx_org_y[tx_depth][txb_itr] - blk_geom_mds[*idx_mds].origin_y;
    1046        5064 :                 blk_geom_mds[*idx_mds].tx_width[tx_depth][txb_itr] = tx_size_wide[blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr]];
    1047        5064 :                 blk_geom_mds[*idx_mds].tx_height[tx_depth][txb_itr] = tx_size_high[blk_geom_mds[*idx_mds].txsize[tx_depth][txb_itr]];
    1048        5064 :                 blk_geom_mds[*idx_mds].tx_width_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].tx_width_uv[0][0];
    1049        5064 :                 blk_geom_mds[*idx_mds].tx_height_uv[tx_depth][txb_itr] = blk_geom_mds[*idx_mds].tx_height_uv[0][0];
    1050             :             }
    1051        2202 :             blk_geom_mds[*idx_mds].blkidx_mds = (*idx_mds);
    1052        2202 :             (*idx_mds) = (*idx_mds) + 1;
    1053             :         }
    1054             :     }
    1055             : 
    1056         682 :     uint32_t min_size = max_sb >> (max_depth - 1);
    1057         682 :     if (halfsize >= min_size)
    1058             :     {
    1059         170 :         md_scan_all_blks(idx_mds, halfsize, x, y, 0,0);
    1060         170 :         md_scan_all_blks(idx_mds, halfsize, x + halfsize, y, 0,1);
    1061         170 :         md_scan_all_blks(idx_mds, halfsize, x, y + halfsize, 0,2);
    1062         170 :         md_scan_all_blks(idx_mds, halfsize, x + halfsize, y + halfsize, 1,3);
    1063             :     }
    1064         682 : }
    1065             : 
    1066           2 : void depth_scan_all_blks()
    1067             : {
    1068             :     uint32_t depth_it, sq_it_y, sq_it_x, part_it, nsq_it;
    1069             :     uint32_t sq_orgx, sq_orgy;
    1070           2 :     uint32_t  depth_scan_idx = 0;
    1071             : 
    1072          12 :     for (depth_it = 0; depth_it < max_depth; depth_it++)
    1073             :     {
    1074          10 :         uint32_t  tot_num_sq = 1 << depth_it;
    1075          18 :         uint32_t  sq_size = depth_it == 0 ? max_sb :
    1076          14 :             depth_it == 1 ? max_sb / 2 :
    1077          10 :             depth_it == 2 ? max_sb / 4 :
    1078           6 :             depth_it == 3 ? max_sb / 8 :
    1079           2 :             depth_it == 4 ? max_sb / 16 : max_sb / 32;
    1080             : 
    1081          20 :         uint32_t max_part_updated = sq_size == 128 ? MIN(max_part, 7) :
    1082          18 :             sq_size == 8 ? MIN(max_part, 3) :
    1083           8 :             sq_size == 4 ? 1 : max_part;
    1084             : 
    1085          72 :         for (sq_it_y = 0; sq_it_y < tot_num_sq; sq_it_y++)
    1086             :         {
    1087          62 :             sq_orgy = sq_it_y * sq_size;
    1088             : 
    1089         744 :             for (sq_it_x = 0; sq_it_x < tot_num_sq; sq_it_x++)
    1090             :             {
    1091         682 :                 sq_orgx = sq_it_x * sq_size;
    1092             : 
    1093        1956 :                 for (part_it = 0; part_it < max_part_updated; part_it++)
    1094             :                 {
    1095        1274 :                     uint32_t tot_num_ns_per_part = part_it < 1 ? 1 :
    1096             :                         part_it < 3 ? 2 :
    1097             :                         part_it < 7 ? 3 : 4;
    1098             : 
    1099        3476 :                     for (nsq_it = 0; nsq_it < tot_num_ns_per_part; nsq_it++)
    1100             :                     {
    1101        2202 :                         blk_geom_dps[depth_scan_idx].blkidx_dps = depth_scan_idx;
    1102        2202 :                         blk_geom_dps[depth_scan_idx].depth = depth_it;
    1103        2202 :                         blk_geom_dps[depth_scan_idx].shape = (PART)part_it;
    1104        2202 :                         blk_geom_dps[depth_scan_idx].origin_x = sq_orgx + (sq_size / 4) *ns_quarter_off_mult[part_it][0][nsq_it];
    1105        2202 :                         blk_geom_dps[depth_scan_idx].origin_y = sq_orgy + (sq_size / 4) *ns_quarter_off_mult[part_it][1][nsq_it];
    1106             : 
    1107        2202 :                         depth_scan_idx++;
    1108             :                     }
    1109             :                 }
    1110             :             }
    1111             :         }
    1112             :     }
    1113           2 : }
    1114             : 
    1115           2 : void finish_depth_scan_all_blks()
    1116             : {
    1117           2 :     uint32_t do_print = 0;
    1118           2 :     uint32_t min_size = max_sb >> (max_depth - 1);
    1119           2 :     FILE * fp = NULL;
    1120           2 :     if (do_print)
    1121           0 :         FOPEN(fp, "e:\\test\\data.csv", "w");
    1122             : 
    1123             :     uint32_t depth_it, sq_it_y, sq_it_x, part_it, nsq_it;
    1124             : 
    1125           2 :     uint32_t  depth_scan_idx = 0;
    1126             : 
    1127          12 :     for (depth_it = 0; depth_it < max_depth; depth_it++)
    1128             :     {
    1129          10 :         uint32_t  tot_num_sq = 1 << depth_it;
    1130          18 :         uint32_t  sq_size = depth_it == 0 ? max_sb :
    1131          14 :             depth_it == 1 ? max_sb / 2 :
    1132          10 :             depth_it == 2 ? max_sb / 4 :
    1133           6 :             depth_it == 3 ? max_sb / 8 :
    1134           2 :             depth_it == 4 ? max_sb / 16 : max_sb / 32;
    1135             : 
    1136          20 :         uint32_t max_part_updated = sq_size == 128 ? MIN(max_part, 7) :
    1137          18 :             sq_size == 8 ? MIN(max_part, 3) :
    1138           8 :             sq_size == 4 ? 1 : max_part;
    1139             : 
    1140          10 :         if (do_print)
    1141             :         {
    1142           0 :             fprintf(fp, "\n\n\n");
    1143           0 :             printf("\n\n\n");
    1144             :         }
    1145             : 
    1146          72 :         for (sq_it_y = 0; sq_it_y < tot_num_sq; sq_it_y++)
    1147             :         {
    1148          62 :             if (do_print)
    1149             :             {
    1150           0 :                 for (uint32_t i = 0; i < sq_size / min_size; i++)
    1151             :                 {
    1152           0 :                     fprintf(fp, "\n ");
    1153           0 :                     printf("\n ");
    1154             :                 }
    1155             :             }
    1156             : 
    1157         744 :             for (sq_it_x = 0; sq_it_x < tot_num_sq; sq_it_x++)
    1158             :             {
    1159        1956 :                 for (part_it = 0; part_it < max_part_updated; part_it++)
    1160             :                 {
    1161        1274 :                     uint32_t tot_num_ns_per_part = part_it < 1 ? 1 :
    1162             :                         part_it < 3 ? 2 :
    1163             :                         part_it < 7 ? 3 : 4;
    1164             : 
    1165        3476 :                     for (nsq_it = 0; nsq_it < tot_num_ns_per_part; nsq_it++)
    1166             :                     {
    1167        2202 :                         uint32_t matched = search_matching_from_mds(
    1168        2202 :                             blk_geom_dps[depth_scan_idx].depth,
    1169        2202 :                             blk_geom_dps[depth_scan_idx].shape,
    1170        2202 :                             blk_geom_dps[depth_scan_idx].origin_x,
    1171        2202 :                             blk_geom_dps[depth_scan_idx].origin_y);
    1172             : 
    1173        2202 :                         blk_geom_dps[depth_scan_idx].blkidx_mds = blk_geom_mds[matched].blkidx_mds;
    1174             : 
    1175        2202 :                         if (do_print && part_it == 0)
    1176             :                         {
    1177           0 :                             fprintf(fp, "%i", blk_geom_dps[depth_scan_idx].blkidx_mds);
    1178           0 :                             printf("%i", blk_geom_dps[depth_scan_idx].blkidx_mds);
    1179             : 
    1180           0 :                             for (uint32_t i = 0; i < sq_size / min_size; i++)
    1181             :                             {
    1182           0 :                                 fprintf(fp, ",");
    1183           0 :                                 printf(",");
    1184             :                             }
    1185             :                         }
    1186        2202 :                         depth_scan_idx++;
    1187             :                     }
    1188             :                 }
    1189             :             }
    1190             :         }
    1191             :     }
    1192             : 
    1193           2 :     if (do_print)
    1194           0 :         fclose(fp);
    1195           2 : }
    1196             : 
    1197           2 : uint32_t count_total_num_of_active_blks()
    1198             : {
    1199             :     uint32_t depth_it, sq_it_y, sq_it_x, part_it, nsq_it;
    1200             : 
    1201           2 :     uint32_t  depth_scan_idx = 0;
    1202             : 
    1203          12 :     for (depth_it = 0; depth_it < max_depth; depth_it++)
    1204             :     {
    1205          10 :         uint32_t  tot_num_sq = 1 << depth_it;
    1206          18 :         uint32_t  sq_size = depth_it == 0 ? max_sb :
    1207          14 :             depth_it == 1 ? max_sb / 2 :
    1208          10 :             depth_it == 2 ? max_sb / 4 :
    1209           6 :             depth_it == 3 ? max_sb / 8 :
    1210           2 :             depth_it == 4 ? max_sb / 16 : max_sb / 32;
    1211             : 
    1212          20 :         uint32_t max_part_updated = sq_size == 128 ? MIN(max_part, 7) :
    1213          18 :             sq_size == 8 ? MIN(max_part, 3) :
    1214           8 :             sq_size == 4 ? 1 : max_part;
    1215             : 
    1216          72 :         for (sq_it_y = 0; sq_it_y < tot_num_sq; sq_it_y++)
    1217             :         {
    1218         744 :             for (sq_it_x = 0; sq_it_x < tot_num_sq; sq_it_x++)
    1219             :             {
    1220        1956 :                 for (part_it = 0; part_it < max_part_updated; part_it++)
    1221             :                 {
    1222        1274 :                     uint32_t tot_num_ns_per_part = part_it < 1 ? 1 :
    1223             :                         part_it < 3 ? 2 :
    1224             :                         part_it < 7 ? 3 : 4;
    1225             : 
    1226        3476 :                     for (nsq_it = 0; nsq_it < tot_num_ns_per_part; nsq_it++)
    1227        2202 :                         depth_scan_idx++;
    1228             :                 }
    1229             :             }
    1230             :         }
    1231             :     }
    1232             : 
    1233           2 :     return depth_scan_idx;
    1234             : }
    1235           2 : void log_redundancy_similarity(uint32_t  max_block_count)
    1236             : {
    1237             :     uint32_t blk_it, s_it;
    1238             : 
    1239        2204 :     for (blk_it = 0; blk_it < max_block_count; blk_it++)
    1240             :     {
    1241        2202 :         BlockGeom * cur_geom = &blk_geom_mds[blk_it];
    1242        2202 :         cur_geom->similar = 0;
    1243        2202 :         cur_geom->redund = 0;
    1244        2202 :         cur_geom->redund_list.list_size = 0;
    1245        2202 :         cur_geom->similar_list.list_size = 0;
    1246             : 
    1247     2426600 :         for (s_it = 0; s_it < max_block_count; s_it++)
    1248             :         {
    1249     2424400 :             BlockGeom * search_geom = &blk_geom_mds[s_it];
    1250             : 
    1251     2424400 :             if (cur_geom->bsize == search_geom->bsize  && cur_geom->origin_x == search_geom->origin_x &&
    1252       31274 :                 cur_geom->origin_y == search_geom->origin_y && s_it != blk_it)
    1253             :             {
    1254             :                 //one block could have similar and redundant blocks
    1255        1344 :                 cur_geom->similar = 1;
    1256        1344 :                 cur_geom->similar_list.blk_mds_table[cur_geom->similar_list.list_size] = search_geom->blkidx_mds;
    1257        1344 :                 cur_geom->similar_list.list_size++;
    1258        1344 :                 if (cur_geom->nsi == 0 && search_geom->nsi==0)
    1259             :                 {
    1260         420 :                     cur_geom->redund = 1;
    1261         420 :                     cur_geom->redund_list.blk_mds_table[cur_geom->redund_list.list_size] = search_geom->blkidx_mds;
    1262         420 :                     cur_geom->redund_list.list_size++;
    1263             :                 }
    1264             :             }
    1265             :         }
    1266             :     }
    1267           2 : }
    1268           2 : void build_blk_geom(int32_t use_128x128)
    1269             : {
    1270           2 :     max_sb = use_128x128 ? 128 : 64;
    1271           2 :     max_depth = use_128x128 ? 6 : 5;
    1272           2 :     uint32_t  max_block_count = use_128x128 ? BLOCK_MAX_COUNT_SB_128 : BLOCK_MAX_COUNT_SB_64;
    1273             : 
    1274             :     //(0)compute total number of blocks using the information provided
    1275           2 :     max_num_active_blocks = count_total_num_of_active_blks();
    1276           2 :     if (max_num_active_blocks != max_block_count)
    1277           0 :         printf(" \n\n Error %i blocks\n\n ", max_num_active_blocks);
    1278             : 
    1279             :     //(1) Construct depth scan blk_geom_dps
    1280           2 :     depth_scan_all_blks();
    1281             : 
    1282             :     //(2) Construct md scan blk_geom_mds:  use info from dps
    1283           2 :     uint32_t idx_mds = 0;
    1284           2 :     md_scan_all_blks(&idx_mds, max_sb, 0, 0, 0,0);
    1285             : 
    1286             :     //(3) Fill more info from mds to dps - print using dps
    1287           2 :     finish_depth_scan_all_blks();
    1288             : 
    1289           2 :     log_redundancy_similarity(max_block_count);
    1290           2 : }
    1291             : 
    1292             : //need to finish filling dps by inherting data from mds
    1293           0 : const BlockGeom * Get_blk_geom_dps(uint32_t bidx_dps)
    1294             : {
    1295           0 :     return &blk_geom_dps[bidx_dps];
    1296             : }
    1297   351924000 : const BlockGeom * get_blk_geom_mds(uint32_t bidx_mds)
    1298             : {
    1299   351924000 :     return &blk_geom_mds[bidx_mds];
    1300             : }
    1301             : 
    1302       46068 : uint32_t get_mds_idx(uint32_t orgx, uint32_t orgy, uint32_t size, uint32_t use_128x128)
    1303             : {
    1304       46068 :     uint32_t max_block_count = use_128x128 ? BLOCK_MAX_COUNT_SB_128 : BLOCK_MAX_COUNT_SB_64;
    1305       46068 :     uint32_t mds = 0;
    1306             : 
    1307    24972700 :     for (uint32_t blk_it = 0; blk_it < max_block_count; blk_it++){
    1308    24972700 :         BlockGeom * cur_geom = &blk_geom_mds[blk_it];
    1309             : 
    1310    24972700 :         if ((uint32_t)cur_geom->sq_size == size && cur_geom->origin_x == orgx &&
    1311     1150580 :             cur_geom->origin_y == orgy && cur_geom->shape == PART_N) {
    1312       46066 :             mds = cur_geom->blkidx_mds;
    1313       46066 :             break;
    1314             :         }
    1315             :     }
    1316       46068 :     return mds;
    1317             : }

Generated by: LCOV version 1.14