LCOV - code coverage report
Current view: top level - Codec - EbMotionEstimation.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 7750 0.0 %
Date: 2019-11-25 17:38:06 Functions: 0 59 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright(c) 2019 Intel Corporation
       3             :  * SPDX - License - Identifier: BSD - 2 - Clause - Patent
       4             :  */
       5             : /*
       6             :  * Copyright(c) 2019 Netflix, Inc.
       7             :  * SPDX - License - Identifier: BSD - 2 - Clause - Patent
       8             :  */
       9             : 
      10             : #include <stdio.h>
      11             : 
      12             : #include "aom_dsp_rtcd.h"
      13             : #include "EbDefinitions.h"
      14             : 
      15             : #include "EbPictureControlSet.h"
      16             : #include "EbSequenceControlSet.h"
      17             : #include "EbMotionEstimation.h"
      18             : #include "EbUtility.h"
      19             : 
      20             : #include "EbComputeSAD.h"
      21             : #include "EbReferenceObject.h"
      22             : #include "EbMeSadCalculation.h"
      23             : 
      24             : #include "EbIntraPrediction.h"
      25             : #include "EbLambdaRateTables.h"
      26             : #include "EbPictureOperators.h"
      27             : #define OIS_TH_COUNT 4
      28             : 
      29             : int32_t OisPointTh[3][MAX_TEMPORAL_LAYERS][OIS_TH_COUNT] = {
      30             :     {// Light OIS
      31             :      {-20, 50, 150, 200},
      32             :      {-20, 50, 150, 200},
      33             :      {-20, 50, 100, 150},
      34             :      {-20, 50, 200, 300},
      35             :      {-20, 50, 200, 300},
      36             :      {-20, 50, 200, 300}},
      37             :     {// Default OIS
      38             :      {-150, 0, 150, 200},
      39             :      {-150, 0, 150, 200},
      40             :      {-125, 0, 100, 150},
      41             :      {-50, 50, 200, 300},
      42             :      {-50, 50, 200, 300},
      43             :      {-50, 50, 200, 300}},
      44             :     {// Heavy OIS
      45             :      {-400, -300, -200, 0},
      46             :      {-400, -300, -200, 0},
      47             :      {-400, -300, -200, 0},
      48             :      {-400, -300, -200, 0},
      49             :      {-400, -300, -200, 0},
      50             :      {-400, -300, -200, 0}}};
      51             : 
      52             : #define AVCCODEL
      53             : /********************************************
      54             :  * Constants
      55             :  ********************************************/
      56             : 
      57             : #define MAX_INTRA_IN_MD 9
      58             : #define REFERENCE_PIC_LIST_0 0
      59             : #define REFERENCE_PIC_LIST_1 1
      60             : 
      61             : /*******************************************
      62             :  * Compute8x4SAD_Default
      63             :  *   Unoptimized 8x4 SAD
      64             :  *******************************************/
      65           0 : uint32_t compute8x4_sad_kernel_c(
      66             :     uint8_t *src,         // input parameter, source samples Ptr
      67             :     uint32_t src_stride,  // input parameter, source stride
      68             :     uint8_t *ref,         // input parameter, reference samples Ptr
      69             :     uint32_t ref_stride)  // input parameter, reference stride
      70             : {
      71             :     uint32_t rowNumberInBlock8x4;
      72           0 :     uint32_t sadBlock8x4 = 0;
      73             : 
      74           0 :     for (rowNumberInBlock8x4 = 0; rowNumberInBlock8x4 < 4;
      75           0 :          ++rowNumberInBlock8x4) {
      76           0 :         sadBlock8x4 += EB_ABS_DIFF(src[0x00], ref[0x00]);
      77           0 :         sadBlock8x4 += EB_ABS_DIFF(src[0x01], ref[0x01]);
      78           0 :         sadBlock8x4 += EB_ABS_DIFF(src[0x02], ref[0x02]);
      79           0 :         sadBlock8x4 += EB_ABS_DIFF(src[0x03], ref[0x03]);
      80           0 :         sadBlock8x4 += EB_ABS_DIFF(src[0x04], ref[0x04]);
      81           0 :         sadBlock8x4 += EB_ABS_DIFF(src[0x05], ref[0x05]);
      82           0 :         sadBlock8x4 += EB_ABS_DIFF(src[0x06], ref[0x06]);
      83           0 :         sadBlock8x4 += EB_ABS_DIFF(src[0x07], ref[0x07]);
      84           0 :         src += src_stride;
      85           0 :         ref += ref_stride;
      86             :     }
      87             : 
      88           0 :     return sadBlock8x4;
      89             : }
      90             : /*******************************************
      91             :  * Compute8x8SAD_Default
      92             :  *   Unoptimized 8x8 SAD
      93             :  *******************************************/
      94           0 : uint32_t compute8x8_sad_kernel_c(
      95             :     uint8_t *src,         // input parameter, source samples Ptr
      96             :     uint32_t src_stride,  // input parameter, source stride
      97             :     uint8_t *ref,         // input parameter, reference samples Ptr
      98             :     uint32_t ref_stride)  // input parameter, reference stride
      99             : {
     100             :     uint32_t rowNumberInBlock8x8;
     101           0 :     uint32_t sadBlock8x8 = 0;
     102             : 
     103           0 :     for (rowNumberInBlock8x8 = 0; rowNumberInBlock8x8 < 8;
     104           0 :          ++rowNumberInBlock8x8) {
     105           0 :         sadBlock8x8 += EB_ABS_DIFF(src[0x00], ref[0x00]);
     106           0 :         sadBlock8x8 += EB_ABS_DIFF(src[0x01], ref[0x01]);
     107           0 :         sadBlock8x8 += EB_ABS_DIFF(src[0x02], ref[0x02]);
     108           0 :         sadBlock8x8 += EB_ABS_DIFF(src[0x03], ref[0x03]);
     109           0 :         sadBlock8x8 += EB_ABS_DIFF(src[0x04], ref[0x04]);
     110           0 :         sadBlock8x8 += EB_ABS_DIFF(src[0x05], ref[0x05]);
     111           0 :         sadBlock8x8 += EB_ABS_DIFF(src[0x06], ref[0x06]);
     112           0 :         sadBlock8x8 += EB_ABS_DIFF(src[0x07], ref[0x07]);
     113           0 :         src += src_stride;
     114           0 :         ref += ref_stride;
     115             :     }
     116             : 
     117           0 :     return sadBlock8x8;
     118             : }
     119             : 
     120             : /*******************************************
     121             : Calcualte SAD for 16x16 and its 8x8 sublcoks
     122             : and check if there is improvment, if yes keep
     123             : the best SAD+MV
     124             : *******************************************/
     125           0 : void ext_sad_calculation_8x8_16x16_c(
     126             :     uint8_t *src, uint32_t src_stride, uint8_t *ref, uint32_t ref_stride,
     127             :     uint32_t *p_best_sad8x8, uint32_t *p_best_sad16x16, uint32_t *p_best_mv8x8,
     128             :     uint32_t *p_best_mv16x16, uint32_t mv, uint32_t *p_sad16x16,
     129             :     uint32_t *p_sad8x8, EbBool sub_sad) {
     130             :     uint32_t sad16x16;
     131             : 
     132           0 :     if (sub_sad) {
     133           0 :         p_sad8x8[0] = (compute8x4_sad_kernel_c(src + 0 * src_stride + 0,
     134             :                                                      2 * src_stride,
     135             :                                                      ref + 0 * ref_stride + 0,
     136             :                                                      2 * ref_stride))
     137           0 :                       << 1;
     138           0 :         p_sad8x8[1] = (compute8x4_sad_kernel_c(src + 0 * src_stride + 8,
     139             :                                                      2 * src_stride,
     140             :                                                      ref + 0 * ref_stride + 8,
     141             :                                                      2 * ref_stride))
     142           0 :                       << 1;
     143           0 :         p_sad8x8[2] = (compute8x4_sad_kernel_c(src + 8 * src_stride + 0,
     144             :                                                      2 * src_stride,
     145           0 :                                                      ref + 8 * ref_stride + 0,
     146             :                                                      2 * ref_stride))
     147           0 :                       << 1;
     148           0 :         p_sad8x8[3] = (compute8x4_sad_kernel_c(src + 8 * src_stride + 8,
     149             :                                                      2 * src_stride,
     150           0 :                                                      ref + 8 * ref_stride + 8,
     151             :                                                      2 * ref_stride))
     152           0 :                       << 1;
     153             :     } else {
     154           0 :         p_sad8x8[0] = compute8x8_sad_kernel_c(src + 0 * src_stride + 0,
     155             :                                             src_stride,
     156             :                                             ref + 0 * ref_stride + 0,
     157             :                                             ref_stride);
     158           0 :         p_sad8x8[1] = compute8x8_sad_kernel_c(src + 0 * src_stride + 8,
     159             :                                             src_stride,
     160             :                                             ref + 0 * ref_stride + 8,
     161             :                                             ref_stride);
     162           0 :         p_sad8x8[2] = compute8x8_sad_kernel_c(src + 8 * src_stride + 0,
     163             :                                             src_stride,
     164           0 :                                             ref + 8 * ref_stride + 0,
     165             :                                             ref_stride);
     166           0 :         p_sad8x8[3] = compute8x8_sad_kernel_c(src + 8 * src_stride + 8,
     167             :                                             src_stride,
     168           0 :                                             ref + 8 * ref_stride + 8,
     169             :                                             ref_stride);
     170             :     }
     171             : 
     172           0 :     if (p_sad8x8[0] < p_best_sad8x8[0]) {
     173           0 :         p_best_sad8x8[0] = (uint32_t)p_sad8x8[0];
     174           0 :         p_best_mv8x8[0] = mv;
     175             :     }
     176             : 
     177           0 :     if (p_sad8x8[1] < p_best_sad8x8[1]) {
     178           0 :         p_best_sad8x8[1] = (uint32_t)p_sad8x8[1];
     179           0 :         p_best_mv8x8[1] = mv;
     180             :     }
     181             : 
     182           0 :     if (p_sad8x8[2] < p_best_sad8x8[2]) {
     183           0 :         p_best_sad8x8[2] = (uint32_t)p_sad8x8[2];
     184           0 :         p_best_mv8x8[2] = mv;
     185             :     }
     186             : 
     187           0 :     if (p_sad8x8[3] < p_best_sad8x8[3]) {
     188           0 :         p_best_sad8x8[3] = (uint32_t)p_sad8x8[3];
     189           0 :         p_best_mv8x8[3] = mv;
     190             :     }
     191             : 
     192           0 :     sad16x16 = p_sad8x8[0] + p_sad8x8[1] + p_sad8x8[2] + p_sad8x8[3];
     193           0 :     if (sad16x16 < p_best_sad16x16[0]) {
     194           0 :         p_best_sad16x16[0] = (uint32_t)sad16x16;
     195           0 :         p_best_mv16x16[0] = mv;
     196             :     }
     197             : 
     198           0 :     *p_sad16x16 = (uint32_t)sad16x16;
     199           0 : }
     200             : 
     201             : /*******************************************
     202             : Calcualte SAD for 32x32,64x64 from 16x16
     203             : and check if there is improvment, if yes keep
     204             : the best SAD+MV
     205             : *******************************************/
     206           0 : void ext_sad_calculation_32x32_64x64_c(uint32_t *p_sad16x16,
     207             :                                      uint32_t *p_best_sad32x32,
     208             :                                      uint32_t *p_best_sad64x64,
     209             :                                      uint32_t *p_best_mv32x32,
     210             :                                      uint32_t *p_best_mv64x64, uint32_t mv,
     211             :                                      uint32_t *p_sad32x32) {
     212             :     uint32_t sad32x32_0, sad32x32_1, sad32x32_2, sad32x32_3, sad64x64;
     213             : 
     214           0 :     p_sad32x32[0] = sad32x32_0 =
     215           0 :         p_sad16x16[0] + p_sad16x16[1] + p_sad16x16[2] + p_sad16x16[3];
     216           0 :     if (sad32x32_0 < p_best_sad32x32[0]) {
     217           0 :         p_best_sad32x32[0] = sad32x32_0;
     218           0 :         p_best_mv32x32[0] = mv;
     219             :     }
     220             : 
     221           0 :     p_sad32x32[1] = sad32x32_1 =
     222           0 :         p_sad16x16[4] + p_sad16x16[5] + p_sad16x16[6] + p_sad16x16[7];
     223           0 :     if (sad32x32_1 < p_best_sad32x32[1]) {
     224           0 :         p_best_sad32x32[1] = sad32x32_1;
     225           0 :         p_best_mv32x32[1] = mv;
     226             :     }
     227             : 
     228           0 :     p_sad32x32[2] = sad32x32_2 =
     229           0 :         p_sad16x16[8] + p_sad16x16[9] + p_sad16x16[10] + p_sad16x16[11];
     230           0 :     if (sad32x32_2 < p_best_sad32x32[2]) {
     231           0 :         p_best_sad32x32[2] = sad32x32_2;
     232           0 :         p_best_mv32x32[2] = mv;
     233             :     }
     234             : 
     235           0 :     p_sad32x32[3] = sad32x32_3 =
     236           0 :         p_sad16x16[12] + p_sad16x16[13] + p_sad16x16[14] + p_sad16x16[15];
     237           0 :     if (sad32x32_3 < p_best_sad32x32[3]) {
     238           0 :         p_best_sad32x32[3] = sad32x32_3;
     239           0 :         p_best_mv32x32[3] = mv;
     240             :     }
     241           0 :     sad64x64 = sad32x32_0 + sad32x32_1 + sad32x32_2 + sad32x32_3;
     242           0 :     if (sad64x64 < p_best_sad64x64[0]) {
     243           0 :         p_best_sad64x64[0] = sad64x64;
     244           0 :         p_best_mv64x64[0] = mv;
     245             :     }
     246           0 : }
     247             : 
     248             : /*******************************************
     249             :  * GetEightHorizontalSearchPointResults_8x8_16x16_PU
     250             :  *******************************************/
     251           0 : void get_eight_horizontal_search_point_results_8x8_16x16_pu_c(
     252             :     uint8_t *src, uint32_t src_stride, uint8_t *ref, uint32_t ref_stride,
     253             :     uint32_t *p_best_sad8x8, uint32_t *p_best_mv8x8, uint32_t *p_best_sad16x16,
     254             :     uint32_t *p_best_mv16x16, uint32_t mv, uint16_t *p_sad16x16,
     255             :     EbBool sub_sad) {
     256             :     uint32_t xSearchIndex;
     257             :     int16_t xMv, yMv;
     258             :     uint32_t sad8x8[4];
     259             :     uint16_t sad16x16;
     260             : 
     261             :     /*
     262             :     -------------------------------------   -----------------------------------
     263             :     | 8x8_00 | 8x8_01 | 8x8_04 | 8x8_05 |   8x8_16 | 8x8_17 | 8x8_20 | 8x8_21 |
     264             :     -------------------------------------   -----------------------------------
     265             :     | 8x8_02 | 8x8_03 | 8x8_06 | 8x8_07 |   8x8_18 | 8x8_19 | 8x8_22 | 8x8_23 |
     266             :     -----------------------   -----------   ----------------------   ----------
     267             :     | 8x8_08 | 8x8_09 | 8x8_12 | 8x8_13 |   8x8_24 | 8x8_25 | 8x8_29 | 8x8_29 |
     268             :     ----------------------    -----------   ---------------------    ----------
     269             :     | 8x8_10 | 8x8_11 | 8x8_14 | 8x8_15 |   8x8_26 | 8x8_27 | 8x8_30 | 8x8_31 |
     270             :     -------------------------------------   -----------------------------------
     271             : 
     272             :     -------------------------------------   -----------------------------------
     273             :     | 8x8_32 | 8x8_33 | 8x8_36 | 8x8_37 |   8x8_48 | 8x8_49 | 8x8_52 | 8x8_53 |
     274             :     -------------------------------------   -----------------------------------
     275             :     | 8x8_34 | 8x8_35 | 8x8_38 | 8x8_39 |   8x8_50 | 8x8_51 | 8x8_54 | 8x8_55 |
     276             :     -----------------------   -----------   ----------------------   ----------
     277             :     | 8x8_40 | 8x8_41 | 8x8_44 | 8x8_45 |   8x8_56 | 8x8_57 | 8x8_60 | 8x8_61 |
     278             :     ----------------------    -----------   ---------------------    ----------
     279             :     | 8x8_42 | 8x8_43 | 8x8_46 | 8x8_48 |   8x8_58 | 8x8_59 | 8x8_62 | 8x8_63 |
     280             :     -------------------------------------   -----------------------------------
     281             :     */
     282             : 
     283             :     /*
     284             :     ----------------------    ----------------------
     285             :     |  16x16_0  |  16x16_1  |  16x16_4  |  16x16_5  |
     286             :     ----------------------    ----------------------
     287             :     |  16x16_2  |  16x16_3  |  16x16_6  |  16x16_7  |
     288             :     -----------------------   -----------------------
     289             :     |  16x16_8  |  16x16_9  |  16x16_12 |  16x16_13 |
     290             :     ----------------------    ----------------------
     291             :     |  16x16_10 |  16x16_11 |  16x16_14 |  16x16_15 |
     292             :     -----------------------   -----------------------
     293             :     */
     294             : 
     295           0 :     for (xSearchIndex = 0; xSearchIndex < 8; xSearchIndex++) {
     296           0 :         if (sub_sad) {
     297           0 :             sad8x8[0] = compute8x4_sad_kernel_c(
     298             :                             src + 0 * src_stride + 0,
     299             :                             2 * src_stride,
     300           0 :                             ref + 0 * ref_stride + 0 + xSearchIndex,
     301             :                             2 * ref_stride)
     302           0 :                         << 1;
     303           0 :             sad8x8[1] = compute8x4_sad_kernel_c(
     304             :                             src + 0 * src_stride + 8,
     305             :                             2 * src_stride,
     306           0 :                             ref + 0 * ref_stride + 8 + xSearchIndex,
     307             :                             2 * ref_stride)
     308           0 :                         << 1;
     309           0 :             sad8x8[2] = compute8x4_sad_kernel_c(
     310           0 :                             src + 8 * src_stride + 0,
     311             :                             2 * src_stride,
     312           0 :                             ref + 8 * ref_stride + 0 + xSearchIndex,
     313             :                             2 * ref_stride)
     314           0 :                         << 1;
     315           0 :             sad8x8[3] = compute8x4_sad_kernel_c(
     316           0 :                             src + 8 * src_stride + 8,
     317             :                             2 * src_stride,
     318           0 :                             ref + 8 * ref_stride + 8 + xSearchIndex,
     319             :                             2 * ref_stride)
     320           0 :                         << 1;
     321             :         } else {
     322           0 :             sad8x8[0] =
     323           0 :                 compute8x8_sad_kernel_c(src + 0 * src_stride + 0,
     324             :                                       src_stride,
     325           0 :                                       ref + 0 * ref_stride + 0 + xSearchIndex,
     326             :                                       ref_stride);
     327           0 :             sad8x8[1] =
     328           0 :                 compute8x8_sad_kernel_c(src + 0 * src_stride + 8,
     329             :                                       src_stride,
     330           0 :                                       ref + 0 * ref_stride + 8 + xSearchIndex,
     331             :                                       ref_stride);
     332           0 :             sad8x8[2] =
     333           0 :                 compute8x8_sad_kernel_c(src + 8 * src_stride + 0,
     334             :                                       src_stride,
     335           0 :                                       ref + 8 * ref_stride + 0 + xSearchIndex,
     336             :                                       ref_stride);
     337           0 :             sad8x8[3] =
     338           0 :                 compute8x8_sad_kernel_c(src + 8 * src_stride + 8,
     339             :                                       src_stride,
     340           0 :                                       ref + 8 * ref_stride + 8 + xSearchIndex,
     341             :                                       ref_stride);
     342             :         }
     343             : 
     344             :         // 8x8_0
     345           0 :         if (sad8x8[0] < p_best_sad8x8[0]) {
     346           0 :             p_best_sad8x8[0] = sad8x8[0];
     347           0 :             xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
     348           0 :             yMv = _MVYT(mv);
     349           0 :             p_best_mv8x8[0] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
     350             :         }
     351             : 
     352             :         // 8x8_1
     353           0 :         if (sad8x8[1] < p_best_sad8x8[1]) {
     354           0 :             p_best_sad8x8[1] = sad8x8[1];
     355           0 :             xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
     356           0 :             yMv = _MVYT(mv);
     357           0 :             p_best_mv8x8[1] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
     358             :         }
     359             : 
     360             :         // 8x8_2
     361           0 :         if (sad8x8[2] < p_best_sad8x8[2]) {
     362           0 :             p_best_sad8x8[2] = sad8x8[2];
     363           0 :             xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
     364           0 :             yMv = _MVYT(mv);
     365           0 :             p_best_mv8x8[2] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
     366             :         }
     367             : 
     368             :         // 8x8_3
     369           0 :         if (sad8x8[3] < p_best_sad8x8[3]) {
     370           0 :             p_best_sad8x8[3] = sad8x8[3];
     371           0 :             xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
     372           0 :             yMv = _MVYT(mv);
     373           0 :             p_best_mv8x8[3] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
     374             :         }
     375             : 
     376             :         // 16x16
     377           0 :         sad16x16 = (uint16_t)(sad8x8[0] + sad8x8[1] + sad8x8[2] + sad8x8[3]);
     378           0 :         p_sad16x16[xSearchIndex] =
     379             :             sad16x16;  // store the intermediate 16x16 SAD for 32x32.
     380           0 :         if ((uint32_t)(sad16x16) < p_best_sad16x16[0]) {
     381           0 :             p_best_sad16x16[0] = sad16x16;
     382           0 :             xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
     383           0 :             yMv = _MVYT(mv);
     384           0 :             p_best_mv16x16[0] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
     385             :         }
     386             :     }
     387           0 : }
     388             : 
     389             : /*******************************************
     390             : Calcualte SAD for 32x32,64x64 from 16x16
     391             : and check if there is improvement, if yes keep
     392             : the best SAD+MV
     393             : *******************************************/
     394           0 : void get_eight_horizontal_search_point_results_32x32_64x64_pu_c(
     395             :     uint16_t *p_sad16x16, uint32_t *p_best_sad32x32, uint32_t *p_best_sad64x64,
     396             :     uint32_t *p_best_mv32x32, uint32_t *p_best_mv64x64, uint32_t mv) {
     397             :     int16_t xMv, yMv;
     398             :     uint32_t sad32x32_0, sad32x32_1, sad32x32_2, sad32x32_3, sad64x64;
     399             :     uint32_t xSearchIndex;
     400             : 
     401             :     /*--------------------
     402             :     |  32x32_0  |  32x32_1
     403             :     ----------------------
     404             :     |  32x32_2  |  32x32_3
     405             :     ----------------------*/
     406             : 
     407             :     /*  data ordering in p_sad16x16 buffer
     408             : 
     409             :     Search    Search            Search
     410             :     Point 0   Point 1           Point 7
     411             :     ---------------------------------------
     412             :     16x16_0    |    x    |    x    | ...... |    x    |
     413             :     ---------------------------------------
     414             :     16x16_1    |    x    |    x    | ...... |    x    |
     415             : 
     416             :     16x16_n    |    x    |    x    | ...... |    x    |
     417             : 
     418             :     ---------------------------------------
     419             :     16x16_15   |    x    |    x    | ...... |    x    |
     420             :     ---------------------------------------
     421             :     */
     422             : 
     423           0 :     for (xSearchIndex = 0; xSearchIndex < 8; xSearchIndex++) {
     424             :         // 32x32_0
     425           0 :         sad32x32_0 = p_sad16x16[0 * 8 + xSearchIndex] +
     426           0 :                      p_sad16x16[1 * 8 + xSearchIndex] +
     427           0 :                      p_sad16x16[2 * 8 + xSearchIndex] +
     428           0 :                      p_sad16x16[3 * 8 + xSearchIndex];
     429             : 
     430           0 :         if (sad32x32_0 < p_best_sad32x32[0]) {
     431           0 :             p_best_sad32x32[0] = sad32x32_0;
     432           0 :             xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
     433           0 :             yMv = _MVYT(mv);
     434           0 :             p_best_mv32x32[0] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
     435             :         }
     436             : 
     437             :         // 32x32_1
     438           0 :         sad32x32_1 = p_sad16x16[4 * 8 + xSearchIndex] +
     439           0 :                      p_sad16x16[5 * 8 + xSearchIndex] +
     440           0 :                      p_sad16x16[6 * 8 + xSearchIndex] +
     441           0 :                      p_sad16x16[7 * 8 + xSearchIndex];
     442             : 
     443           0 :         if (sad32x32_1 < p_best_sad32x32[1]) {
     444           0 :             p_best_sad32x32[1] = sad32x32_1;
     445           0 :             xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
     446           0 :             yMv = _MVYT(mv);
     447           0 :             p_best_mv32x32[1] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
     448             :         }
     449             : 
     450             :         // 32x32_2
     451           0 :         sad32x32_2 = p_sad16x16[8 * 8 + xSearchIndex] +
     452           0 :                      p_sad16x16[9 * 8 + xSearchIndex] +
     453           0 :                      p_sad16x16[10 * 8 + xSearchIndex] +
     454           0 :                      p_sad16x16[11 * 8 + xSearchIndex];
     455             : 
     456           0 :         if (sad32x32_2 < p_best_sad32x32[2]) {
     457           0 :             p_best_sad32x32[2] = sad32x32_2;
     458           0 :             xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
     459           0 :             yMv = _MVYT(mv);
     460           0 :             p_best_mv32x32[2] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
     461             :         }
     462             : 
     463             :         // 32x32_3
     464           0 :         sad32x32_3 = p_sad16x16[12 * 8 + xSearchIndex] +
     465           0 :                      p_sad16x16[13 * 8 + xSearchIndex] +
     466           0 :                      p_sad16x16[14 * 8 + xSearchIndex] +
     467           0 :                      p_sad16x16[15 * 8 + xSearchIndex];
     468             : 
     469           0 :         if (sad32x32_3 < p_best_sad32x32[3]) {
     470           0 :             p_best_sad32x32[3] = sad32x32_3;
     471           0 :             xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
     472           0 :             yMv = _MVYT(mv);
     473           0 :             p_best_mv32x32[3] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
     474             :         }
     475             : 
     476             :         // 64x64
     477           0 :         sad64x64 = sad32x32_0 + sad32x32_1 + sad32x32_2 + sad32x32_3;
     478           0 :         if (sad64x64 < p_best_sad64x64[0]) {
     479           0 :             p_best_sad64x64[0] = sad64x64;
     480           0 :             xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
     481           0 :             yMv = _MVYT(mv);
     482           0 :             p_best_mv64x64[0] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
     483             :         }
     484             :     }
     485           0 : }
     486             : 
     487             : /*******************************************
     488             : Calcualte SAD for 16x16 and its 8x8 sublcoks
     489             : and check if there is improvment, if yes keep
     490             : the best SAD+MV
     491             : *******************************************/
     492           0 : void sad_calculation_8x8_16x16_c(uint8_t *src, uint32_t src_stride, uint8_t *ref,
     493             :                                uint32_t ref_stride, uint32_t *p_best_sad8x8,
     494             :                                uint32_t *p_best_sad16x16,
     495             :                                uint32_t *p_best_mv8x8, uint32_t *p_best_mv16x16,
     496             :                                uint32_t mv, uint32_t *p_sad16x16,
     497             :                                EbBool sub_sad) {
     498             :     uint64_t sad8x8[4];
     499             :     uint64_t sad16x16;
     500             : 
     501           0 :     if (sub_sad) {
     502           0 :         sad8x8[0] = (compute8x4_sad_kernel_c(src + 0 * src_stride + 0,
     503             :                                            2 * src_stride,
     504             :                                            ref + 0 * ref_stride + 0,
     505             :                                            2 * ref_stride))
     506           0 :                     << 1;
     507           0 :         sad8x8[1] = (compute8x4_sad_kernel_c(src + 0 * src_stride + 8,
     508             :                                            2 * src_stride,
     509             :                                            ref + 0 * ref_stride + 8,
     510             :                                            2 * ref_stride))
     511           0 :                     << 1;
     512           0 :         sad8x8[2] = (compute8x4_sad_kernel_c(src + 8 * src_stride + 0,
     513             :                                            2 * src_stride,
     514           0 :                                            ref + 8 * ref_stride + 0,
     515             :                                            2 * ref_stride))
     516           0 :                     << 1;
     517           0 :         sad8x8[3] = (compute8x4_sad_kernel_c(src + 8 * src_stride + 8,
     518             :                                            2 * src_stride,
     519           0 :                                            ref + 8 * ref_stride + 8,
     520             :                                            2 * ref_stride))
     521           0 :                     << 1;
     522             :     } else {
     523           0 :         sad8x8[0] = compute8x8_sad_kernel_c(src + 0 * src_stride + 0,
     524             :                                           src_stride,
     525             :                                           ref + 0 * ref_stride + 0,
     526             :                                           ref_stride);
     527           0 :         sad8x8[1] = compute8x8_sad_kernel_c(src + 0 * src_stride + 8,
     528             :                                           src_stride,
     529             :                                           ref + 0 * ref_stride + 8,
     530             :                                           ref_stride);
     531           0 :         sad8x8[2] = compute8x8_sad_kernel_c(src + 8 * src_stride + 0,
     532             :                                           src_stride,
     533           0 :                                           ref + 8 * ref_stride + 0,
     534             :                                           ref_stride);
     535           0 :         sad8x8[3] = compute8x8_sad_kernel_c(src + 8 * src_stride + 8,
     536             :                                           src_stride,
     537           0 :                                           ref + 8 * ref_stride + 8,
     538             :                                           ref_stride);
     539             :     }
     540             : 
     541           0 :     if (sad8x8[0] < p_best_sad8x8[0]) {
     542           0 :         p_best_sad8x8[0] = (uint32_t)sad8x8[0];
     543           0 :         p_best_mv8x8[0] = mv;
     544             :     }
     545             : 
     546           0 :     if (sad8x8[1] < p_best_sad8x8[1]) {
     547           0 :         p_best_sad8x8[1] = (uint32_t)sad8x8[1];
     548           0 :         p_best_mv8x8[1] = mv;
     549             :     }
     550             : 
     551           0 :     if (sad8x8[2] < p_best_sad8x8[2]) {
     552           0 :         p_best_sad8x8[2] = (uint32_t)sad8x8[2];
     553           0 :         p_best_mv8x8[2] = mv;
     554             :     }
     555             : 
     556           0 :     if (sad8x8[3] < p_best_sad8x8[3]) {
     557           0 :         p_best_sad8x8[3] = (uint32_t)sad8x8[3];
     558           0 :         p_best_mv8x8[3] = mv;
     559             :     }
     560             : 
     561           0 :     sad16x16 = sad8x8[0] + sad8x8[1] + sad8x8[2] + sad8x8[3];
     562           0 :     if (sad16x16 < p_best_sad16x16[0]) {
     563           0 :         p_best_sad16x16[0] = (uint32_t)sad16x16;
     564           0 :         p_best_mv16x16[0] = mv;
     565             :     }
     566             : 
     567           0 :     *p_sad16x16 = (uint32_t)sad16x16;
     568           0 : }
     569             : 
     570             : /*******************************************
     571             : Calcualte SAD for 32x32,64x64 from 16x16
     572             : and check if there is improvment, if yes keep
     573             : the best SAD+MV
     574             : *******************************************/
     575           0 : void sad_calculation_32x32_64x64_c(uint32_t *p_sad16x16,
     576             :                                  uint32_t *p_best_sad32x32,
     577             :                                  uint32_t *p_best_sad64x64,
     578             :                                  uint32_t *p_best_mv32x32,
     579             :                                  uint32_t *p_best_mv64x64, uint32_t mv) {
     580             :     uint32_t sad32x32_0, sad32x32_1, sad32x32_2, sad32x32_3, sad64x64;
     581             : 
     582           0 :     sad32x32_0 = p_sad16x16[0] + p_sad16x16[1] + p_sad16x16[2] + p_sad16x16[3];
     583           0 :     if (sad32x32_0 < p_best_sad32x32[0]) {
     584           0 :         p_best_sad32x32[0] = sad32x32_0;
     585           0 :         p_best_mv32x32[0] = mv;
     586             :     }
     587             : 
     588           0 :     sad32x32_1 = p_sad16x16[4] + p_sad16x16[5] + p_sad16x16[6] + p_sad16x16[7];
     589           0 :     if (sad32x32_1 < p_best_sad32x32[1]) {
     590           0 :         p_best_sad32x32[1] = sad32x32_1;
     591           0 :         p_best_mv32x32[1] = mv;
     592             :     }
     593             : 
     594           0 :     sad32x32_2 =
     595           0 :         p_sad16x16[8] + p_sad16x16[9] + p_sad16x16[10] + p_sad16x16[11];
     596           0 :     if (sad32x32_2 < p_best_sad32x32[2]) {
     597           0 :         p_best_sad32x32[2] = sad32x32_2;
     598           0 :         p_best_mv32x32[2] = mv;
     599             :     }
     600             : 
     601           0 :     sad32x32_3 =
     602           0 :         p_sad16x16[12] + p_sad16x16[13] + p_sad16x16[14] + p_sad16x16[15];
     603           0 :     if (sad32x32_3 < p_best_sad32x32[3]) {
     604           0 :         p_best_sad32x32[3] = sad32x32_3;
     605           0 :         p_best_mv32x32[3] = mv;
     606             :     }
     607             : 
     608           0 :     sad64x64 = sad32x32_0 + sad32x32_1 + sad32x32_2 + sad32x32_3;
     609           0 :     if (sad64x64 < p_best_sad64x64[0]) {
     610           0 :         p_best_sad64x64[0] = sad64x64;
     611           0 :         p_best_mv64x64[0] = mv;
     612             :     }
     613           0 : }
     614             : 
     615             : #define BLK_NUM 5
     616             : /**********************************************************
     617             : Calcualte the best SAD from Rect H, V and H4, V4 partitions
     618             : 
     619             : and return the best partition index
     620             : ***********************************************************/
     621           0 : void nsq_me_analysis(uint32_t *p_sad64x32, uint32_t *p_sad32x16,
     622             :                      uint32_t *p_sad16x8, uint32_t *p_sad32x64,
     623             :                      uint32_t *p_sad16x32, uint32_t *p_sad8x16,
     624             :                      uint32_t *p_sad32x8, uint32_t *p_sad8x32,
     625             :                      uint32_t *p_sad64x16, uint32_t *p_sad16x64,
     626             :                      uint8_t *p_nsq_64x64, uint8_t *p_nsq_32x32,
     627             :                      uint8_t *p_nsq_16x16, uint8_t *p_nsq_8x8) {
     628             :     uint32_t sad[BLK_NUM];  // sad_N, sad_H, sad_V, sad_H4, sad_V4, sad_S;
     629             :     uint32_t best_nsq_sad;
     630             :     uint8_t nsq_index;
     631             :     /*64x64*/
     632             :     // sad[0] = p_sad64x64;
     633           0 :     sad[1] = p_sad64x32[0] + p_sad64x32[1];
     634           0 :     sad[2] = p_sad32x64[0] + p_sad32x64[1];
     635           0 :     sad[3] = p_sad64x16[0] + p_sad64x16[1] + p_sad64x16[2] + p_sad64x16[3];
     636           0 :     sad[4] = p_sad16x64[0] + p_sad16x64[1] + p_sad16x64[2] + p_sad16x64[3];
     637             :     // sad[5] = p_sad32x32[0] + p_sad32x32[1] + p_sad32x32[2] + p_sad32x32[3];
     638           0 :     best_nsq_sad = MAX_SAD_VALUE;
     639           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     640           0 :         if (sad[nsq_index] < best_nsq_sad) {
     641           0 :             best_nsq_sad = sad[nsq_index];
     642           0 :             *p_nsq_64x64 = nsq_index;
     643             :         }
     644             :     }
     645             :     /*32x32*/
     646             :     // 32x32_0
     647             :     // sad[0] = p_sad32x32[0];
     648           0 :     sad[1] = p_sad32x16[0] + p_sad32x16[1];
     649           0 :     sad[2] = p_sad16x32[0] + p_sad16x32[1];
     650           0 :     sad[3] = p_sad32x8[0] + p_sad32x8[1] + p_sad32x8[2] + p_sad32x8[3];
     651           0 :     sad[4] = p_sad8x32[0] + p_sad8x32[1] + p_sad8x32[2] + p_sad8x32[3];
     652             :     // sad[5] = p_sad16x16[0] + p_sad16x16[1] + p_sad16x16[2] + p_sad16x16[3];
     653           0 :     best_nsq_sad = MAX_SAD_VALUE;
     654           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     655           0 :         if (sad[nsq_index] < best_nsq_sad) {
     656           0 :             best_nsq_sad = sad[nsq_index];
     657           0 :             p_nsq_32x32[0] = nsq_index;
     658             :         }
     659             :     }
     660             :     // 32x32_1
     661             :     // sad[0] = p_sad32x32[1];
     662           0 :     sad[1] = p_sad32x16[2] + p_sad32x16[3];
     663           0 :     sad[2] = p_sad16x32[2] + p_sad16x32[3];
     664           0 :     sad[3] = p_sad32x8[4] + p_sad32x8[5] + p_sad32x8[6] + p_sad32x8[7];
     665           0 :     sad[4] = p_sad8x32[4] + p_sad8x32[5] + p_sad8x32[6] + p_sad8x32[7];
     666             :     // sad[5] = p_sad16x16[4] + p_sad16x16[5] + p_sad16x16[6] + p_sad16x16[7];
     667           0 :     best_nsq_sad = MAX_SAD_VALUE;
     668           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     669           0 :         if (sad[nsq_index] < best_nsq_sad) {
     670           0 :             best_nsq_sad = sad[nsq_index];
     671           0 :             p_nsq_32x32[1] = nsq_index;
     672             :         }
     673             :     }
     674             :     // 32x32_2
     675             :     // sad[0] = p_sad32x32[2];
     676           0 :     sad[1] = p_sad32x16[4] + p_sad32x16[5];
     677           0 :     sad[2] = p_sad16x32[4] + p_sad16x32[5];
     678           0 :     sad[3] = p_sad32x8[8] + p_sad32x8[9] + p_sad32x8[10] + p_sad32x8[11];
     679           0 :     sad[4] = p_sad8x32[8] + p_sad8x32[9] + p_sad8x32[10] + p_sad8x32[11];
     680             :     // sad[5] = p_sad16x16[8] + p_sad16x16[9] + p_sad16x16[10] + p_sad16x16[11];
     681           0 :     best_nsq_sad = MAX_SAD_VALUE;
     682           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     683           0 :         if (sad[nsq_index] < best_nsq_sad) {
     684           0 :             best_nsq_sad = sad[nsq_index];
     685           0 :             p_nsq_32x32[2] = nsq_index;
     686             :         }
     687             :     }
     688             :     // 32x32_3
     689             :     // sad[0] = p_sad32x32[3];
     690           0 :     sad[1] = p_sad32x16[6] + p_sad32x16[7];
     691           0 :     sad[2] = p_sad16x32[6] + p_sad16x32[7];
     692           0 :     sad[3] = p_sad32x8[12] + p_sad32x8[13] + p_sad32x8[14] + p_sad32x8[15];
     693           0 :     sad[4] = p_sad8x32[12] + p_sad8x32[13] + p_sad8x32[14] + p_sad8x32[15];
     694             :     // sad[5] = p_sad16x16[12] + p_sad16x16[13] + p_sad16x16[14] +
     695             :     // p_sad16x16[15];
     696           0 :     best_nsq_sad = MAX_SAD_VALUE;
     697           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     698           0 :         if (sad[nsq_index] < best_nsq_sad) {
     699           0 :             best_nsq_sad = sad[nsq_index];
     700           0 :             p_nsq_32x32[3] = nsq_index;
     701             :         }
     702             :     }
     703             :     /*16x16*/
     704             :     // 16x16_0
     705             :     // sad[0] = p_sad16x16[0];
     706           0 :     sad[1] = p_sad16x8[0] + p_sad16x8[1];
     707           0 :     sad[2] = p_sad8x16[0] + p_sad8x16[1];
     708           0 :     sad[3] = MAX_SAD_VALUE;
     709           0 :     sad[4] = MAX_SAD_VALUE;
     710             :     // sad[5] = p_sad8x8[0] + p_sad8x8[1] + p_sad8x8[2] + p_sad8x8[3];
     711           0 :     best_nsq_sad = MAX_SAD_VALUE;
     712           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     713           0 :         if (sad[nsq_index] < best_nsq_sad) {
     714           0 :             best_nsq_sad = sad[nsq_index];
     715           0 :             p_nsq_16x16[0] = nsq_index;
     716             :         }
     717             :     }
     718           0 :     p_nsq_8x8[0] = p_nsq_8x8[1] = p_nsq_8x8[2] = p_nsq_8x8[3] = p_nsq_16x16[0];
     719             :     // 16x16_1
     720             :     // sad[0] = p_sad16x16[1];
     721           0 :     sad[1] = p_sad16x8[2] + p_sad16x8[3];
     722           0 :     sad[2] = p_sad8x16[2] + p_sad8x16[3];
     723           0 :     sad[3] = MAX_SAD_VALUE;
     724           0 :     sad[4] = MAX_SAD_VALUE;
     725             :     // sad[5] = p_sad8x8[4] + p_sad8x8[5] + p_sad8x8[6] + p_sad8x8[7];
     726           0 :     best_nsq_sad = MAX_SAD_VALUE;
     727           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     728           0 :         if (sad[nsq_index] < best_nsq_sad) {
     729           0 :             best_nsq_sad = sad[nsq_index];
     730           0 :             p_nsq_16x16[1] = nsq_index;
     731             :         }
     732             :     }
     733           0 :     p_nsq_8x8[4] = p_nsq_8x8[5] = p_nsq_8x8[6] = p_nsq_8x8[7] = p_nsq_16x16[1];
     734             :     // 16x16_2
     735             :     // sad[0] = p_sad16x16[2];
     736           0 :     sad[1] = p_sad16x8[4] + p_sad16x8[5];
     737           0 :     sad[2] = p_sad8x16[4] + p_sad8x16[5];
     738           0 :     sad[3] = MAX_SAD_VALUE;
     739           0 :     sad[4] = MAX_SAD_VALUE;
     740             :     // sad[5] = p_sad8x8[8] + p_sad8x8[9] + p_sad8x8[10] + p_sad8x8[11];
     741           0 :     best_nsq_sad = MAX_SAD_VALUE;
     742           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     743           0 :         if (sad[nsq_index] < best_nsq_sad) {
     744           0 :             best_nsq_sad = sad[nsq_index];
     745           0 :             p_nsq_16x16[2] = nsq_index;
     746             :         }
     747             :     }
     748           0 :     p_nsq_8x8[8] = p_nsq_8x8[9] = p_nsq_8x8[10] = p_nsq_8x8[11] =
     749             :         p_nsq_16x16[2];
     750             :     // 16x16_3
     751             :     // sad[0] = p_sad16x16[3];
     752           0 :     sad[1] = p_sad16x8[6] + p_sad16x8[7];
     753           0 :     sad[2] = p_sad8x16[6] + p_sad8x16[7];
     754           0 :     sad[3] = MAX_SAD_VALUE;
     755           0 :     sad[4] = MAX_SAD_VALUE;
     756             :     // sad[5] = p_sad8x8[12] + p_sad8x8[13] + p_sad8x8[14] + p_sad8x8[15];
     757           0 :     best_nsq_sad = MAX_SAD_VALUE;
     758           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     759           0 :         if (sad[nsq_index] < best_nsq_sad) {
     760           0 :             best_nsq_sad = sad[nsq_index];
     761           0 :             p_nsq_16x16[3] = nsq_index;
     762             :         }
     763             :     }
     764           0 :     p_nsq_8x8[12] = p_nsq_8x8[13] = p_nsq_8x8[14] = p_nsq_8x8[15] =
     765             :         p_nsq_16x16[3];
     766             :     // 16x16_4
     767             :     // sad[0] = p_sad16x16[4];
     768           0 :     sad[1] = p_sad16x8[8] + p_sad16x8[9];
     769           0 :     sad[2] = p_sad8x16[8] + p_sad8x16[9];
     770           0 :     sad[3] = MAX_SAD_VALUE;
     771           0 :     sad[4] = MAX_SAD_VALUE;
     772             :     // sad[5] = p_sad8x8[16] + p_sad8x8[17] + p_sad8x8[18] + p_sad8x8[19];
     773           0 :     best_nsq_sad = MAX_SAD_VALUE;
     774           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     775           0 :         if (sad[nsq_index] < best_nsq_sad) {
     776           0 :             best_nsq_sad = sad[nsq_index];
     777           0 :             p_nsq_16x16[4] = nsq_index;
     778             :         }
     779             :     }
     780           0 :     p_nsq_8x8[16] = p_nsq_8x8[17] = p_nsq_8x8[18] = p_nsq_8x8[19] =
     781             :         p_nsq_16x16[4];
     782             :     // 16x16_5
     783             :     // sad[0] = p_sad16x16[5];
     784           0 :     sad[1] = p_sad16x8[10] + p_sad16x8[11];
     785           0 :     sad[2] = p_sad8x16[10] + p_sad8x16[11];
     786           0 :     sad[3] = MAX_SAD_VALUE;
     787           0 :     sad[4] = MAX_SAD_VALUE;
     788             :     // sad[5] = p_sad8x8[20] + p_sad8x8[21] + p_sad8x8[22] + p_sad8x8[23];
     789           0 :     best_nsq_sad = MAX_SAD_VALUE;
     790           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     791           0 :         if (sad[nsq_index] < best_nsq_sad) {
     792           0 :             best_nsq_sad = sad[nsq_index];
     793           0 :             p_nsq_16x16[5] = nsq_index;
     794             :         }
     795             :     }
     796           0 :     p_nsq_8x8[20] = p_nsq_8x8[21] = p_nsq_8x8[22] = p_nsq_8x8[23] =
     797             :         p_nsq_16x16[5];
     798             :     // 16x16_6
     799             :     // sad[0] = p_sad16x16[6];
     800           0 :     sad[1] = p_sad16x8[12] + p_sad16x8[13];
     801           0 :     sad[2] = p_sad8x16[12] + p_sad8x16[13];
     802           0 :     sad[3] = MAX_SAD_VALUE;
     803           0 :     sad[4] = MAX_SAD_VALUE;
     804             :     // sad[5] = p_sad8x8[24] + p_sad8x8[25] + p_sad8x8[26] + p_sad8x8[27];
     805           0 :     best_nsq_sad = MAX_SAD_VALUE;
     806           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     807           0 :         if (sad[nsq_index] < best_nsq_sad) {
     808           0 :             best_nsq_sad = sad[nsq_index];
     809           0 :             p_nsq_16x16[6] = nsq_index;
     810             :         }
     811             :     }
     812           0 :     p_nsq_8x8[24] = p_nsq_8x8[25] = p_nsq_8x8[26] = p_nsq_8x8[27] =
     813             :         p_nsq_16x16[6];
     814             :     // 16x16_7
     815             :     // sad[0] = p_sad16x16[7];
     816           0 :     sad[1] = p_sad16x8[14] + p_sad16x8[15];
     817           0 :     sad[2] = p_sad8x16[14] + p_sad8x16[15];
     818           0 :     sad[3] = MAX_SAD_VALUE;
     819           0 :     sad[4] = MAX_SAD_VALUE;
     820             :     // sad[5] = p_sad8x8[28] + p_sad8x8[29] + p_sad8x8[30] + p_sad8x8[31];
     821           0 :     best_nsq_sad = MAX_SAD_VALUE;
     822           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     823           0 :         if (sad[nsq_index] < best_nsq_sad) {
     824           0 :             best_nsq_sad = sad[nsq_index];
     825           0 :             p_nsq_16x16[7] = nsq_index;
     826             :         }
     827             :     }
     828           0 :     p_nsq_8x8[28] = p_nsq_8x8[29] = p_nsq_8x8[30] = p_nsq_8x8[31] =
     829             :         p_nsq_16x16[7];
     830             :     // 16x16_8
     831             :     // sad[0] = p_sad16x16[8];
     832           0 :     sad[1] = p_sad16x8[16] + p_sad16x8[17];
     833           0 :     sad[2] = p_sad8x16[16] + p_sad8x16[17];
     834           0 :     sad[3] = MAX_SAD_VALUE;
     835           0 :     sad[4] = MAX_SAD_VALUE;
     836             :     // sad[5] = p_sad8x8[32] + p_sad8x8[33] + p_sad8x8[34] + p_sad8x8[35];
     837           0 :     best_nsq_sad = MAX_SAD_VALUE;
     838           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     839           0 :         if (sad[nsq_index] < best_nsq_sad) {
     840           0 :             best_nsq_sad = sad[nsq_index];
     841           0 :             p_nsq_16x16[8] = nsq_index;
     842             :         }
     843             :     }
     844           0 :     p_nsq_8x8[32] = p_nsq_8x8[33] = p_nsq_8x8[34] = p_nsq_8x8[35] =
     845             :         p_nsq_16x16[8];
     846             :     // 16x16_9
     847             :     // sad[0] = p_sad16x16[9];
     848           0 :     sad[1] = p_sad16x8[18] + p_sad16x8[19];
     849           0 :     sad[2] = p_sad8x16[18] + p_sad8x16[19];
     850           0 :     sad[3] = MAX_SAD_VALUE;
     851           0 :     sad[4] = MAX_SAD_VALUE;
     852             :     // sad[5] = p_sad8x8[36] + p_sad8x8[37] + p_sad8x8[38] + p_sad8x8[39];
     853           0 :     best_nsq_sad = MAX_SAD_VALUE;
     854           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     855           0 :         if (sad[nsq_index] < best_nsq_sad) {
     856           0 :             best_nsq_sad = sad[nsq_index];
     857           0 :             p_nsq_16x16[9] = nsq_index;
     858             :         }
     859             :     }
     860           0 :     p_nsq_8x8[36] = p_nsq_8x8[37] = p_nsq_8x8[38] = p_nsq_8x8[39] =
     861             :         p_nsq_16x16[9];
     862             :     // 16x16_10
     863             :     // sad[0] = p_sad16x16[10];
     864           0 :     sad[1] = p_sad16x8[20] + p_sad16x8[21];
     865           0 :     sad[2] = p_sad8x16[20] + p_sad8x16[21];
     866           0 :     sad[3] = MAX_SAD_VALUE;
     867           0 :     sad[4] = MAX_SAD_VALUE;
     868             :     // sad[5] = p_sad8x8[40] + p_sad8x8[41] + p_sad8x8[42] + p_sad8x8[43];
     869           0 :     best_nsq_sad = MAX_SAD_VALUE;
     870           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     871           0 :         if (sad[nsq_index] < best_nsq_sad) {
     872           0 :             best_nsq_sad = sad[nsq_index];
     873           0 :             p_nsq_16x16[10] = nsq_index;
     874             :         }
     875             :     }
     876           0 :     p_nsq_8x8[40] = p_nsq_8x8[41] = p_nsq_8x8[42] = p_nsq_8x8[43] =
     877             :         p_nsq_16x16[10];
     878             :     // 16x16_11
     879             :     // sad[0] = p_sad16x16[11];
     880           0 :     sad[1] = p_sad16x8[22] + p_sad16x8[23];
     881           0 :     sad[2] = p_sad8x16[22] + p_sad8x16[23];
     882           0 :     sad[3] = MAX_SAD_VALUE;
     883           0 :     sad[4] = MAX_SAD_VALUE;
     884             :     // sad[5] = p_sad8x8[44] + p_sad8x8[45] + p_sad8x8[46] + p_sad8x8[47];
     885           0 :     best_nsq_sad = MAX_SAD_VALUE;
     886           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     887           0 :         if (sad[nsq_index] < best_nsq_sad) {
     888           0 :             best_nsq_sad = sad[nsq_index];
     889           0 :             p_nsq_16x16[11] = nsq_index;
     890             :         }
     891             :     }
     892           0 :     p_nsq_8x8[44] = p_nsq_8x8[45] = p_nsq_8x8[46] = p_nsq_8x8[47] =
     893             :         p_nsq_16x16[11];
     894             :     // 16x16_12
     895             :     // sad[0] = p_sad16x16[12];
     896           0 :     sad[1] = p_sad16x8[24] + p_sad16x8[25];
     897           0 :     sad[2] = p_sad8x16[24] + p_sad8x16[25];
     898           0 :     sad[3] = MAX_SAD_VALUE;
     899           0 :     sad[4] = MAX_SAD_VALUE;
     900             :     // sad[5] = p_sad8x8[48] + p_sad8x8[49] + p_sad8x8[50] + p_sad8x8[51];
     901           0 :     best_nsq_sad = MAX_SAD_VALUE;
     902           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     903           0 :         if (sad[nsq_index] < best_nsq_sad) {
     904           0 :             best_nsq_sad = sad[nsq_index];
     905           0 :             p_nsq_16x16[12] = nsq_index;
     906             :         }
     907             :     }
     908           0 :     p_nsq_8x8[48] = p_nsq_8x8[49] = p_nsq_8x8[50] = p_nsq_8x8[51] =
     909             :         p_nsq_16x16[12];
     910             :     // 16x16_13
     911             :     // sad[0] = p_sad16x16[13];
     912           0 :     sad[1] = p_sad16x8[26] + p_sad16x8[27];
     913           0 :     sad[2] = p_sad8x16[26] + p_sad8x16[27];
     914           0 :     sad[3] = MAX_SAD_VALUE;
     915           0 :     sad[4] = MAX_SAD_VALUE;
     916             :     // sad[5] = p_sad8x8[52] + p_sad8x8[53] + p_sad8x8[54] + p_sad8x8[55];
     917           0 :     best_nsq_sad = MAX_SAD_VALUE;
     918           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     919           0 :         if (sad[nsq_index] < best_nsq_sad) {
     920           0 :             best_nsq_sad = sad[nsq_index];
     921           0 :             p_nsq_16x16[13] = nsq_index;
     922             :         }
     923             :     }
     924           0 :     p_nsq_8x8[52] = p_nsq_8x8[53] = p_nsq_8x8[54] = p_nsq_8x8[55] =
     925             :         p_nsq_16x16[13];
     926             :     // 16x16_14
     927             :     // sad[0] = p_sad16x16[14];
     928           0 :     sad[1] = p_sad16x8[28] + p_sad16x8[29];
     929           0 :     sad[2] = p_sad8x16[28] + p_sad8x16[29];
     930           0 :     sad[3] = MAX_SAD_VALUE;
     931           0 :     sad[4] = MAX_SAD_VALUE;
     932             :     // sad[5] = p_sad8x8[56] + p_sad8x8[57] + p_sad8x8[58] + p_sad8x8[59];
     933           0 :     best_nsq_sad = MAX_SAD_VALUE;
     934           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     935           0 :         if (sad[nsq_index] < best_nsq_sad) {
     936           0 :             best_nsq_sad = sad[nsq_index];
     937           0 :             p_nsq_16x16[14] = nsq_index;
     938             :         }
     939             :     }
     940           0 :     p_nsq_8x8[56] = p_nsq_8x8[57] = p_nsq_8x8[58] = p_nsq_8x8[59] =
     941             :         p_nsq_16x16[14];
     942             :     // 16x16_15
     943             :     // sad[0] = p_sad16x16[15];
     944           0 :     sad[1] = p_sad16x8[30] + p_sad16x8[31];
     945           0 :     sad[2] = p_sad8x16[30] + p_sad8x16[31];
     946           0 :     sad[3] = MAX_SAD_VALUE;
     947           0 :     sad[4] = MAX_SAD_VALUE;
     948             :     // sad[5] = p_sad8x8[60] + p_sad8x8[61] + p_sad8x8[62] + p_sad8x8[63];
     949           0 :     best_nsq_sad = MAX_SAD_VALUE;
     950           0 :     for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
     951           0 :         if (sad[nsq_index] < best_nsq_sad) {
     952           0 :             best_nsq_sad = sad[nsq_index];
     953           0 :             p_nsq_16x16[15] = nsq_index;
     954             :         }
     955             :     }
     956           0 :     p_nsq_8x8[60] = p_nsq_8x8[61] = p_nsq_8x8[62] = p_nsq_8x8[63] =
     957             :         p_nsq_16x16[15];
     958           0 : }
     959             : 
     960             : /****************************************************
     961             : Calcualte SAD for Rect H, V and H4, V4 partitions
     962             : 
     963             : and update its Motion info if the result SAD is better
     964             : ****************************************************/
     965           0 : void ExtSadCalculation(uint32_t *p_sad8x8, uint32_t *p_sad16x16,
     966             :                        uint32_t *p_sad32x32, uint32_t *p_best_sad64x32,
     967             :                        uint32_t *p_best_mv64x32, uint32_t *p_best_sad32x16,
     968             :                        uint32_t *p_best_mv32x16, uint32_t *p_best_sad16x8,
     969             :                        uint32_t *p_best_mv16x8, uint32_t *p_best_sad32x64,
     970             :                        uint32_t *p_best_mv32x64, uint32_t *p_best_sad16x32,
     971             :                        uint32_t *p_best_mv16x32, uint32_t *p_best_sad8x16,
     972             :                        uint32_t *p_best_mv8x16, uint32_t *p_best_sad32x8,
     973             :                        uint32_t *p_best_mv32x8, uint32_t *p_best_sad8x32,
     974             :                        uint32_t *p_best_mv8x32, uint32_t *p_best_sad64x16,
     975             :                        uint32_t *p_best_mv64x16, uint32_t *p_best_sad16x64,
     976             :                        uint32_t *p_best_mv16x64, uint32_t mv) {
     977             :     uint32_t sad;
     978             : 
     979             :     uint32_t sad_16x8[32];
     980             :     uint32_t sad_8x16[32];
     981             :     uint32_t sad_32x16[8];
     982             :     uint32_t sad_16x32[8];
     983             : 
     984             :     // 64x32
     985           0 :     sad = p_sad32x32[0] + p_sad32x32[1];
     986           0 :     if (sad < p_best_sad64x32[0]) {
     987           0 :         p_best_sad64x32[0] = sad;
     988           0 :         p_best_mv64x32[0] = mv;
     989             :     }
     990             : 
     991           0 :     sad = p_sad32x32[2] + p_sad32x32[3];
     992           0 :     if (sad < p_best_sad64x32[1]) {
     993           0 :         p_best_sad64x32[1] = sad;
     994           0 :         p_best_mv64x32[1] = mv;
     995             :     }
     996             : 
     997             :     // 32x16
     998           0 :     sad_32x16[0] = p_sad16x16[0] + p_sad16x16[1];
     999           0 :     if (sad_32x16[0] < p_best_sad32x16[0]) {
    1000           0 :         p_best_sad32x16[0] = sad_32x16[0];
    1001           0 :         p_best_mv32x16[0] = mv;
    1002             :     }
    1003             : 
    1004           0 :     sad_32x16[1] = p_sad16x16[2] + p_sad16x16[3];
    1005           0 :     if (sad_32x16[1] < p_best_sad32x16[1]) {
    1006           0 :         p_best_sad32x16[1] = sad_32x16[1];
    1007           0 :         p_best_mv32x16[1] = mv;
    1008             :     }
    1009             : 
    1010           0 :     sad_32x16[2] = p_sad16x16[4] + p_sad16x16[5];
    1011           0 :     if (sad_32x16[2] < p_best_sad32x16[2]) {
    1012           0 :         p_best_sad32x16[2] = sad_32x16[2];
    1013           0 :         p_best_mv32x16[2] = mv;
    1014             :     }
    1015             : 
    1016           0 :     sad_32x16[3] = p_sad16x16[6] + p_sad16x16[7];
    1017           0 :     if (sad_32x16[3] < p_best_sad32x16[3]) {
    1018           0 :         p_best_sad32x16[3] = sad_32x16[3];
    1019           0 :         p_best_mv32x16[3] = mv;
    1020             :     }
    1021             : 
    1022           0 :     sad_32x16[4] = p_sad16x16[8] + p_sad16x16[9];
    1023           0 :     if (sad_32x16[4] < p_best_sad32x16[4]) {
    1024           0 :         p_best_sad32x16[4] = sad_32x16[4];
    1025           0 :         p_best_mv32x16[4] = mv;
    1026             :     }
    1027             : 
    1028           0 :     sad_32x16[5] = p_sad16x16[10] + p_sad16x16[11];
    1029           0 :     if (sad_32x16[5] < p_best_sad32x16[5]) {
    1030           0 :         p_best_sad32x16[5] = sad_32x16[5];
    1031           0 :         p_best_mv32x16[5] = mv;
    1032             :     }
    1033             : 
    1034           0 :     sad_32x16[6] = p_sad16x16[12] + p_sad16x16[13];
    1035           0 :     if (sad_32x16[6] < p_best_sad32x16[6]) {
    1036           0 :         p_best_sad32x16[6] = sad_32x16[6];
    1037           0 :         p_best_mv32x16[6] = mv;
    1038             :     }
    1039             : 
    1040           0 :     sad_32x16[7] = p_sad16x16[14] + p_sad16x16[15];
    1041           0 :     if (sad_32x16[7] < p_best_sad32x16[7]) {
    1042           0 :         p_best_sad32x16[7] = sad_32x16[7];
    1043           0 :         p_best_mv32x16[7] = mv;
    1044             :     }
    1045             : 
    1046             :     // 64x16
    1047           0 :     sad = sad_32x16[0] + sad_32x16[2];
    1048           0 :     if (sad < p_best_sad64x16[0]) {
    1049           0 :         p_best_sad64x16[0] = sad;
    1050           0 :         p_best_mv64x16[0] = mv;
    1051             :     }
    1052           0 :     sad = sad_32x16[1] + sad_32x16[3];
    1053           0 :     if (sad < p_best_sad64x16[1]) {
    1054           0 :         p_best_sad64x16[1] = sad;
    1055           0 :         p_best_mv64x16[1] = mv;
    1056             :     }
    1057             : 
    1058           0 :     sad = sad_32x16[4] + sad_32x16[6];
    1059           0 :     if (sad < p_best_sad64x16[2]) {
    1060           0 :         p_best_sad64x16[2] = sad;
    1061           0 :         p_best_mv64x16[2] = mv;
    1062             :     }
    1063           0 :     sad = sad_32x16[5] + sad_32x16[7];
    1064           0 :     if (sad < p_best_sad64x16[3]) {
    1065           0 :         p_best_sad64x16[3] = sad;
    1066           0 :         p_best_mv64x16[3] = mv;
    1067             :     }
    1068             : 
    1069             :     // 16x8
    1070           0 :     sad_16x8[0] = p_sad8x8[0] + p_sad8x8[1];
    1071           0 :     if (sad_16x8[0] < p_best_sad16x8[0]) {
    1072           0 :         p_best_sad16x8[0] = sad_16x8[0];
    1073           0 :         p_best_mv16x8[0] = mv;
    1074             :     }
    1075             : 
    1076           0 :     sad_16x8[1] = p_sad8x8[2] + p_sad8x8[3];
    1077           0 :     if (sad_16x8[1] < p_best_sad16x8[1]) {
    1078           0 :         p_best_sad16x8[1] = sad_16x8[1];
    1079           0 :         p_best_mv16x8[1] = mv;
    1080             :     }
    1081             : 
    1082           0 :     sad_16x8[2] = p_sad8x8[4] + p_sad8x8[5];
    1083           0 :     if (sad_16x8[2] < p_best_sad16x8[2]) {
    1084           0 :         p_best_sad16x8[2] = sad_16x8[2];
    1085           0 :         p_best_mv16x8[2] = mv;
    1086             :     }
    1087             : 
    1088           0 :     sad_16x8[3] = p_sad8x8[6] + p_sad8x8[7];
    1089           0 :     if (sad_16x8[3] < p_best_sad16x8[3]) {
    1090           0 :         p_best_sad16x8[3] = sad_16x8[3];
    1091           0 :         p_best_mv16x8[3] = mv;
    1092             :     }
    1093             : 
    1094           0 :     sad_16x8[4] = p_sad8x8[8] + p_sad8x8[9];
    1095           0 :     if (sad_16x8[4] < p_best_sad16x8[4]) {
    1096           0 :         p_best_sad16x8[4] = sad_16x8[4];
    1097           0 :         p_best_mv16x8[4] = mv;
    1098             :     }
    1099             : 
    1100           0 :     sad_16x8[5] = p_sad8x8[10] + p_sad8x8[11];
    1101           0 :     if (sad_16x8[5] < p_best_sad16x8[5]) {
    1102           0 :         p_best_sad16x8[5] = sad_16x8[5];
    1103           0 :         p_best_mv16x8[5] = mv;
    1104             :     }
    1105             : 
    1106           0 :     sad_16x8[6] = p_sad8x8[12] + p_sad8x8[13];
    1107           0 :     if (sad_16x8[6] < p_best_sad16x8[6]) {
    1108           0 :         p_best_sad16x8[6] = sad_16x8[6];
    1109           0 :         p_best_mv16x8[6] = mv;
    1110             :     }
    1111             : 
    1112           0 :     sad_16x8[7] = p_sad8x8[14] + p_sad8x8[15];
    1113           0 :     if (sad_16x8[7] < p_best_sad16x8[7]) {
    1114           0 :         p_best_sad16x8[7] = sad_16x8[7];
    1115           0 :         p_best_mv16x8[7] = mv;
    1116             :     }
    1117             : 
    1118           0 :     sad_16x8[8] = p_sad8x8[16] + p_sad8x8[17];
    1119           0 :     if (sad_16x8[8] < p_best_sad16x8[8]) {
    1120           0 :         p_best_sad16x8[8] = sad_16x8[8];
    1121           0 :         p_best_mv16x8[8] = mv;
    1122             :     }
    1123             : 
    1124           0 :     sad_16x8[9] = p_sad8x8[18] + p_sad8x8[19];
    1125           0 :     if (sad_16x8[9] < p_best_sad16x8[9]) {
    1126           0 :         p_best_sad16x8[9] = sad_16x8[9];
    1127           0 :         p_best_mv16x8[9] = mv;
    1128             :     }
    1129             : 
    1130           0 :     sad_16x8[10] = p_sad8x8[20] + p_sad8x8[21];
    1131           0 :     if (sad_16x8[10] < p_best_sad16x8[10]) {
    1132           0 :         p_best_sad16x8[10] = sad_16x8[10];
    1133           0 :         p_best_mv16x8[10] = mv;
    1134             :     }
    1135             : 
    1136           0 :     sad_16x8[11] = p_sad8x8[22] + p_sad8x8[23];
    1137           0 :     if (sad_16x8[11] < p_best_sad16x8[11]) {
    1138           0 :         p_best_sad16x8[11] = sad_16x8[11];
    1139           0 :         p_best_mv16x8[11] = mv;
    1140             :     }
    1141             : 
    1142           0 :     sad_16x8[12] = p_sad8x8[24] + p_sad8x8[25];
    1143           0 :     if (sad_16x8[12] < p_best_sad16x8[12]) {
    1144           0 :         p_best_sad16x8[12] = sad_16x8[12];
    1145           0 :         p_best_mv16x8[12] = mv;
    1146             :     }
    1147             : 
    1148           0 :     sad_16x8[13] = p_sad8x8[26] + p_sad8x8[27];
    1149           0 :     if (sad_16x8[13] < p_best_sad16x8[13]) {
    1150           0 :         p_best_sad16x8[13] = sad_16x8[13];
    1151           0 :         p_best_mv16x8[13] = mv;
    1152             :     }
    1153             : 
    1154           0 :     sad_16x8[14] = p_sad8x8[28] + p_sad8x8[29];
    1155           0 :     if (sad_16x8[14] < p_best_sad16x8[14]) {
    1156           0 :         p_best_sad16x8[14] = sad_16x8[14];
    1157           0 :         p_best_mv16x8[14] = mv;
    1158             :     }
    1159             : 
    1160           0 :     sad_16x8[15] = p_sad8x8[30] + p_sad8x8[31];
    1161           0 :     if (sad_16x8[15] < p_best_sad16x8[15]) {
    1162           0 :         p_best_sad16x8[15] = sad_16x8[15];
    1163           0 :         p_best_mv16x8[15] = mv;
    1164             :     }
    1165             : 
    1166           0 :     sad_16x8[16] = p_sad8x8[32] + p_sad8x8[33];
    1167           0 :     if (sad_16x8[16] < p_best_sad16x8[16]) {
    1168           0 :         p_best_sad16x8[16] = sad_16x8[16];
    1169           0 :         p_best_mv16x8[16] = mv;
    1170             :     }
    1171             : 
    1172           0 :     sad_16x8[17] = p_sad8x8[34] + p_sad8x8[35];
    1173           0 :     if (sad_16x8[17] < p_best_sad16x8[17]) {
    1174           0 :         p_best_sad16x8[17] = sad_16x8[17];
    1175           0 :         p_best_mv16x8[17] = mv;
    1176             :     }
    1177             : 
    1178           0 :     sad_16x8[18] = p_sad8x8[36] + p_sad8x8[37];
    1179           0 :     if (sad_16x8[18] < p_best_sad16x8[18]) {
    1180           0 :         p_best_sad16x8[18] = sad_16x8[18];
    1181           0 :         p_best_mv16x8[18] = mv;
    1182             :     }
    1183             : 
    1184           0 :     sad_16x8[19] = p_sad8x8[38] + p_sad8x8[39];
    1185           0 :     if (sad_16x8[19] < p_best_sad16x8[19]) {
    1186           0 :         p_best_sad16x8[19] = sad_16x8[19];
    1187           0 :         p_best_mv16x8[19] = mv;
    1188             :     }
    1189             : 
    1190           0 :     sad_16x8[20] = p_sad8x8[40] + p_sad8x8[41];
    1191           0 :     if (sad_16x8[20] < p_best_sad16x8[20]) {
    1192           0 :         p_best_sad16x8[20] = sad_16x8[20];
    1193           0 :         p_best_mv16x8[20] = mv;
    1194             :     }
    1195             : 
    1196           0 :     sad_16x8[21] = p_sad8x8[42] + p_sad8x8[43];
    1197           0 :     if (sad_16x8[21] < p_best_sad16x8[21]) {
    1198           0 :         p_best_sad16x8[21] = sad_16x8[21];
    1199           0 :         p_best_mv16x8[21] = mv;
    1200             :     }
    1201             : 
    1202           0 :     sad_16x8[22] = p_sad8x8[44] + p_sad8x8[45];
    1203           0 :     if (sad_16x8[22] < p_best_sad16x8[22]) {
    1204           0 :         p_best_sad16x8[22] = sad_16x8[22];
    1205           0 :         p_best_mv16x8[22] = mv;
    1206             :     }
    1207             : 
    1208           0 :     sad_16x8[23] = p_sad8x8[46] + p_sad8x8[47];
    1209           0 :     if (sad_16x8[23] < p_best_sad16x8[23]) {
    1210           0 :         p_best_sad16x8[23] = sad_16x8[23];
    1211           0 :         p_best_mv16x8[23] = mv;
    1212             :     }
    1213             : 
    1214           0 :     sad_16x8[24] = p_sad8x8[48] + p_sad8x8[49];
    1215           0 :     if (sad_16x8[24] < p_best_sad16x8[24]) {
    1216           0 :         p_best_sad16x8[24] = sad_16x8[24];
    1217           0 :         p_best_mv16x8[24] = mv;
    1218             :     }
    1219             : 
    1220           0 :     sad_16x8[25] = p_sad8x8[50] + p_sad8x8[51];
    1221           0 :     if (sad_16x8[25] < p_best_sad16x8[25]) {
    1222           0 :         p_best_sad16x8[25] = sad_16x8[25];
    1223           0 :         p_best_mv16x8[25] = mv;
    1224             :     }
    1225             : 
    1226           0 :     sad_16x8[26] = p_sad8x8[52] + p_sad8x8[53];
    1227           0 :     if (sad_16x8[26] < p_best_sad16x8[26]) {
    1228           0 :         p_best_sad16x8[26] = sad_16x8[26];
    1229           0 :         p_best_mv16x8[26] = mv;
    1230             :     }
    1231             : 
    1232           0 :     sad_16x8[27] = p_sad8x8[54] + p_sad8x8[55];
    1233           0 :     if (sad_16x8[27] < p_best_sad16x8[27]) {
    1234           0 :         p_best_sad16x8[27] = sad_16x8[27];
    1235           0 :         p_best_mv16x8[27] = mv;
    1236             :     }
    1237             : 
    1238           0 :     sad_16x8[28] = p_sad8x8[56] + p_sad8x8[57];
    1239           0 :     if (sad_16x8[28] < p_best_sad16x8[28]) {
    1240           0 :         p_best_sad16x8[28] = sad_16x8[28];
    1241           0 :         p_best_mv16x8[28] = mv;
    1242             :     }
    1243             : 
    1244           0 :     sad_16x8[29] = p_sad8x8[58] + p_sad8x8[59];
    1245           0 :     if (sad_16x8[29] < p_best_sad16x8[29]) {
    1246           0 :         p_best_sad16x8[29] = sad_16x8[29];
    1247           0 :         p_best_mv16x8[29] = mv;
    1248             :     }
    1249             : 
    1250           0 :     sad_16x8[30] = p_sad8x8[60] + p_sad8x8[61];
    1251           0 :     if (sad_16x8[30] < p_best_sad16x8[30]) {
    1252           0 :         p_best_sad16x8[30] = sad_16x8[30];
    1253           0 :         p_best_mv16x8[30] = mv;
    1254             :     }
    1255             : 
    1256           0 :     sad_16x8[31] = p_sad8x8[62] + p_sad8x8[63];
    1257           0 :     if (sad_16x8[31] < p_best_sad16x8[31]) {
    1258           0 :         p_best_sad16x8[31] = sad_16x8[31];
    1259           0 :         p_best_mv16x8[31] = mv;
    1260             :     }
    1261             : 
    1262             :     // 32x64
    1263           0 :     sad = p_sad32x32[0] + p_sad32x32[2];
    1264           0 :     if (sad < p_best_sad32x64[0]) {
    1265           0 :         p_best_sad32x64[0] = sad;
    1266           0 :         p_best_mv32x64[0] = mv;
    1267             :     }
    1268             : 
    1269           0 :     sad = p_sad32x32[1] + p_sad32x32[3];
    1270           0 :     if (sad < p_best_sad32x64[1]) {
    1271           0 :         p_best_sad32x64[1] = sad;
    1272           0 :         p_best_mv32x64[1] = mv;
    1273             :     }
    1274             : 
    1275             :     // 16x32
    1276           0 :     sad_16x32[0] = p_sad16x16[0] + p_sad16x16[2];
    1277           0 :     if (sad_16x32[0] < p_best_sad16x32[0]) {
    1278           0 :         p_best_sad16x32[0] = sad_16x32[0];
    1279           0 :         p_best_mv16x32[0] = mv;
    1280             :     }
    1281             : 
    1282           0 :     sad_16x32[1] = p_sad16x16[1] + p_sad16x16[3];
    1283           0 :     if (sad_16x32[1] < p_best_sad16x32[1]) {
    1284           0 :         p_best_sad16x32[1] = sad_16x32[1];
    1285           0 :         p_best_mv16x32[1] = mv;
    1286             :     }
    1287             : 
    1288           0 :     sad_16x32[2] = p_sad16x16[4] + p_sad16x16[6];
    1289           0 :     if (sad_16x32[2] < p_best_sad16x32[2]) {
    1290           0 :         p_best_sad16x32[2] = sad_16x32[2];
    1291           0 :         p_best_mv16x32[2] = mv;
    1292             :     }
    1293             : 
    1294           0 :     sad_16x32[3] = p_sad16x16[5] + p_sad16x16[7];
    1295           0 :     if (sad_16x32[3] < p_best_sad16x32[3]) {
    1296           0 :         p_best_sad16x32[3] = sad_16x32[3];
    1297           0 :         p_best_mv16x32[3] = mv;
    1298             :     }
    1299             : 
    1300           0 :     sad_16x32[4] = p_sad16x16[8] + p_sad16x16[10];
    1301           0 :     if (sad_16x32[4] < p_best_sad16x32[4]) {
    1302           0 :         p_best_sad16x32[4] = sad_16x32[4];
    1303           0 :         p_best_mv16x32[4] = mv;
    1304             :     }
    1305             : 
    1306           0 :     sad_16x32[5] = p_sad16x16[9] + p_sad16x16[11];
    1307           0 :     if (sad_16x32[5] < p_best_sad16x32[5]) {
    1308           0 :         p_best_sad16x32[5] = sad_16x32[5];
    1309           0 :         p_best_mv16x32[5] = mv;
    1310             :     }
    1311             : 
    1312           0 :     sad_16x32[6] = p_sad16x16[12] + p_sad16x16[14];
    1313           0 :     if (sad_16x32[6] < p_best_sad16x32[6]) {
    1314           0 :         p_best_sad16x32[6] = sad_16x32[6];
    1315           0 :         p_best_mv16x32[6] = mv;
    1316             :     }
    1317             : 
    1318           0 :     sad_16x32[7] = p_sad16x16[13] + p_sad16x16[15];
    1319           0 :     if (sad_16x32[7] < p_best_sad16x32[7]) {
    1320           0 :         p_best_sad16x32[7] = sad_16x32[7];
    1321           0 :         p_best_mv16x32[7] = mv;
    1322             :     }
    1323             : 
    1324           0 :     sad = sad_16x32[0] + sad_16x32[4];
    1325           0 :     if (sad < p_best_sad16x64[0]) {
    1326           0 :         p_best_sad16x64[0] = sad;
    1327           0 :         p_best_mv16x64[0] = mv;
    1328             :     }
    1329           0 :     sad = sad_16x32[1] + sad_16x32[5];
    1330           0 :     if (sad < p_best_sad16x64[1]) {
    1331           0 :         p_best_sad16x64[1] = sad;
    1332           0 :         p_best_mv16x64[1] = mv;
    1333             :     }
    1334             : 
    1335           0 :     sad = sad_16x32[2] + sad_16x32[6];
    1336           0 :     if (sad < p_best_sad16x64[2]) {
    1337           0 :         p_best_sad16x64[2] = sad;
    1338           0 :         p_best_mv16x64[2] = mv;
    1339             :     }
    1340             : 
    1341           0 :     sad = sad_16x32[3] + sad_16x32[7];
    1342           0 :     if (sad < p_best_sad16x64[3]) {
    1343           0 :         p_best_sad16x64[3] = sad;
    1344           0 :         p_best_mv16x64[3] = mv;
    1345             :     }
    1346             : 
    1347             :     // 8x16
    1348           0 :     sad_8x16[0] = p_sad8x8[0] + p_sad8x8[2];
    1349           0 :     if (sad_8x16[0] < p_best_sad8x16[0]) {
    1350           0 :         p_best_sad8x16[0] = sad_8x16[0];
    1351           0 :         p_best_mv8x16[0] = mv;
    1352             :     }
    1353             : 
    1354           0 :     sad_8x16[1] = p_sad8x8[1] + p_sad8x8[3];
    1355           0 :     if (sad_8x16[1] < p_best_sad8x16[1]) {
    1356           0 :         p_best_sad8x16[1] = sad_8x16[1];
    1357           0 :         p_best_mv8x16[1] = mv;
    1358             :     }
    1359             : 
    1360           0 :     sad_8x16[2] = p_sad8x8[4] + p_sad8x8[6];
    1361           0 :     if (sad_8x16[2] < p_best_sad8x16[2]) {
    1362           0 :         p_best_sad8x16[2] = sad_8x16[2];
    1363           0 :         p_best_mv8x16[2] = mv;
    1364             :     }
    1365             : 
    1366           0 :     sad_8x16[3] = p_sad8x8[5] + p_sad8x8[7];
    1367           0 :     if (sad_8x16[3] < p_best_sad8x16[3]) {
    1368           0 :         p_best_sad8x16[3] = sad_8x16[3];
    1369           0 :         p_best_mv8x16[3] = mv;
    1370             :     }
    1371             : 
    1372           0 :     sad_8x16[4] = p_sad8x8[8] + p_sad8x8[10];
    1373           0 :     if (sad_8x16[4] < p_best_sad8x16[4]) {
    1374           0 :         p_best_sad8x16[4] = sad_8x16[4];
    1375           0 :         p_best_mv8x16[4] = mv;
    1376             :     }
    1377             : 
    1378           0 :     sad_8x16[5] = p_sad8x8[9] + p_sad8x8[11];
    1379           0 :     if (sad_8x16[5] < p_best_sad8x16[5]) {
    1380           0 :         p_best_sad8x16[5] = sad_8x16[5];
    1381           0 :         p_best_mv8x16[5] = mv;
    1382             :     }
    1383             : 
    1384           0 :     sad_8x16[6] = p_sad8x8[12] + p_sad8x8[14];
    1385           0 :     if (sad_8x16[6] < p_best_sad8x16[6]) {
    1386           0 :         p_best_sad8x16[6] = sad_8x16[6];
    1387           0 :         p_best_mv8x16[6] = mv;
    1388             :     }
    1389             : 
    1390           0 :     sad_8x16[7] = p_sad8x8[13] + p_sad8x8[15];
    1391           0 :     if (sad_8x16[7] < p_best_sad8x16[7]) {
    1392           0 :         p_best_sad8x16[7] = sad_8x16[7];
    1393           0 :         p_best_mv8x16[7] = mv;
    1394             :     }
    1395             : 
    1396           0 :     sad_8x16[8] = p_sad8x8[16] + p_sad8x8[18];
    1397           0 :     if (sad_8x16[8] < p_best_sad8x16[8]) {
    1398           0 :         p_best_sad8x16[8] = sad_8x16[8];
    1399           0 :         p_best_mv8x16[8] = mv;
    1400             :     }
    1401             : 
    1402           0 :     sad_8x16[9] = p_sad8x8[17] + p_sad8x8[19];
    1403           0 :     if (sad_8x16[9] < p_best_sad8x16[9]) {
    1404           0 :         p_best_sad8x16[9] = sad_8x16[9];
    1405           0 :         p_best_mv8x16[9] = mv;
    1406             :     }
    1407             : 
    1408           0 :     sad_8x16[10] = p_sad8x8[20] + p_sad8x8[22];
    1409           0 :     if (sad_8x16[10] < p_best_sad8x16[10]) {
    1410           0 :         p_best_sad8x16[10] = sad_8x16[10];
    1411           0 :         p_best_mv8x16[10] = mv;
    1412             :     }
    1413             : 
    1414           0 :     sad_8x16[11] = p_sad8x8[21] + p_sad8x8[23];
    1415           0 :     if (sad_8x16[11] < p_best_sad8x16[11]) {
    1416           0 :         p_best_sad8x16[11] = sad_8x16[11];
    1417           0 :         p_best_mv8x16[11] = mv;
    1418             :     }
    1419             : 
    1420           0 :     sad_8x16[12] = p_sad8x8[24] + p_sad8x8[26];
    1421           0 :     if (sad_8x16[12] < p_best_sad8x16[12]) {
    1422           0 :         p_best_sad8x16[12] = sad_8x16[12];
    1423           0 :         p_best_mv8x16[12] = mv;
    1424             :     }
    1425             : 
    1426           0 :     sad_8x16[13] = p_sad8x8[25] + p_sad8x8[27];
    1427           0 :     if (sad_8x16[13] < p_best_sad8x16[13]) {
    1428           0 :         p_best_sad8x16[13] = sad_8x16[13];
    1429           0 :         p_best_mv8x16[13] = mv;
    1430             :     }
    1431             : 
    1432           0 :     sad_8x16[14] = p_sad8x8[28] + p_sad8x8[30];
    1433           0 :     if (sad_8x16[14] < p_best_sad8x16[14]) {
    1434           0 :         p_best_sad8x16[14] = sad_8x16[14];
    1435           0 :         p_best_mv8x16[14] = mv;
    1436             :     }
    1437             : 
    1438           0 :     sad_8x16[15] = p_sad8x8[29] + p_sad8x8[31];
    1439           0 :     if (sad_8x16[15] < p_best_sad8x16[15]) {
    1440           0 :         p_best_sad8x16[15] = sad_8x16[15];
    1441           0 :         p_best_mv8x16[15] = mv;
    1442             :     }
    1443             : 
    1444           0 :     sad_8x16[16] = p_sad8x8[32] + p_sad8x8[34];
    1445           0 :     if (sad_8x16[16] < p_best_sad8x16[16]) {
    1446           0 :         p_best_sad8x16[16] = sad_8x16[16];
    1447           0 :         p_best_mv8x16[16] = mv;
    1448             :     }
    1449             : 
    1450           0 :     sad_8x16[17] = p_sad8x8[33] + p_sad8x8[35];
    1451           0 :     if (sad_8x16[17] < p_best_sad8x16[17]) {
    1452           0 :         p_best_sad8x16[17] = sad_8x16[17];
    1453           0 :         p_best_mv8x16[17] = mv;
    1454             :     }
    1455             : 
    1456           0 :     sad_8x16[18] = p_sad8x8[36] + p_sad8x8[38];
    1457           0 :     if (sad_8x16[18] < p_best_sad8x16[18]) {
    1458           0 :         p_best_sad8x16[18] = sad_8x16[18];
    1459           0 :         p_best_mv8x16[18] = mv;
    1460             :     }
    1461             : 
    1462           0 :     sad_8x16[19] = p_sad8x8[37] + p_sad8x8[39];
    1463           0 :     if (sad_8x16[19] < p_best_sad8x16[19]) {
    1464           0 :         p_best_sad8x16[19] = sad_8x16[19];
    1465           0 :         p_best_mv8x16[19] = mv;
    1466             :     }
    1467             : 
    1468           0 :     sad_8x16[20] = p_sad8x8[40] + p_sad8x8[42];
    1469           0 :     if (sad_8x16[20] < p_best_sad8x16[20]) {
    1470           0 :         p_best_sad8x16[20] = sad_8x16[20];
    1471           0 :         p_best_mv8x16[20] = mv;
    1472             :     }
    1473             : 
    1474           0 :     sad_8x16[21] = p_sad8x8[41] + p_sad8x8[43];
    1475           0 :     if (sad_8x16[21] < p_best_sad8x16[21]) {
    1476           0 :         p_best_sad8x16[21] = sad_8x16[21];
    1477           0 :         p_best_mv8x16[21] = mv;
    1478             :     }
    1479             : 
    1480           0 :     sad_8x16[22] = p_sad8x8[44] + p_sad8x8[46];
    1481           0 :     if (sad_8x16[22] < p_best_sad8x16[22]) {
    1482           0 :         p_best_sad8x16[22] = sad_8x16[22];
    1483           0 :         p_best_mv8x16[22] = mv;
    1484             :     }
    1485             : 
    1486           0 :     sad_8x16[23] = p_sad8x8[45] + p_sad8x8[47];
    1487           0 :     if (sad_8x16[23] < p_best_sad8x16[23]) {
    1488           0 :         p_best_sad8x16[23] = sad_8x16[23];
    1489           0 :         p_best_mv8x16[23] = mv;
    1490             :     }
    1491             : 
    1492           0 :     sad_8x16[24] = p_sad8x8[48] + p_sad8x8[50];
    1493           0 :     if (sad_8x16[24] < p_best_sad8x16[24]) {
    1494           0 :         p_best_sad8x16[24] = sad_8x16[24];
    1495           0 :         p_best_mv8x16[24] = mv;
    1496             :     }
    1497             : 
    1498           0 :     sad_8x16[25] = p_sad8x8[49] + p_sad8x8[51];
    1499           0 :     if (sad_8x16[25] < p_best_sad8x16[25]) {
    1500           0 :         p_best_sad8x16[25] = sad_8x16[25];
    1501           0 :         p_best_mv8x16[25] = mv;
    1502             :     }
    1503             : 
    1504           0 :     sad_8x16[26] = p_sad8x8[52] + p_sad8x8[54];
    1505           0 :     if (sad_8x16[26] < p_best_sad8x16[26]) {
    1506           0 :         p_best_sad8x16[26] = sad_8x16[26];
    1507           0 :         p_best_mv8x16[26] = mv;
    1508             :     }
    1509             : 
    1510           0 :     sad_8x16[27] = p_sad8x8[53] + p_sad8x8[55];
    1511           0 :     if (sad_8x16[27] < p_best_sad8x16[27]) {
    1512           0 :         p_best_sad8x16[27] = sad_8x16[27];
    1513           0 :         p_best_mv8x16[27] = mv;
    1514             :     }
    1515             : 
    1516           0 :     sad_8x16[28] = p_sad8x8[56] + p_sad8x8[58];
    1517           0 :     if (sad_8x16[28] < p_best_sad8x16[28]) {
    1518           0 :         p_best_sad8x16[28] = sad_8x16[28];
    1519           0 :         p_best_mv8x16[28] = mv;
    1520             :     }
    1521             : 
    1522           0 :     sad_8x16[29] = p_sad8x8[57] + p_sad8x8[59];
    1523           0 :     if (sad_8x16[29] < p_best_sad8x16[29]) {
    1524           0 :         p_best_sad8x16[29] = sad_8x16[29];
    1525           0 :         p_best_mv8x16[29] = mv;
    1526             :     }
    1527             : 
    1528           0 :     sad_8x16[30] = p_sad8x8[60] + p_sad8x8[62];
    1529           0 :     if (sad_8x16[30] < p_best_sad8x16[30]) {
    1530           0 :         p_best_sad8x16[30] = sad_8x16[30];
    1531           0 :         p_best_mv8x16[30] = mv;
    1532             :     }
    1533             : 
    1534           0 :     sad_8x16[31] = p_sad8x8[61] + p_sad8x8[63];
    1535           0 :     if (sad_8x16[31] < p_best_sad8x16[31]) {
    1536           0 :         p_best_sad8x16[31] = sad_8x16[31];
    1537           0 :         p_best_mv8x16[31] = mv;
    1538             :     }
    1539             : 
    1540             :     // 32x8
    1541           0 :     sad = sad_16x8[0] + sad_16x8[2];
    1542           0 :     if (sad < p_best_sad32x8[0]) {
    1543           0 :         p_best_sad32x8[0] = sad;
    1544           0 :         p_best_mv32x8[0] = mv;
    1545             :     }
    1546             : 
    1547           0 :     sad = sad_16x8[1] + sad_16x8[3];
    1548           0 :     if (sad < p_best_sad32x8[1]) {
    1549           0 :         p_best_sad32x8[1] = sad;
    1550           0 :         p_best_mv32x8[1] = mv;
    1551             :     }
    1552             : 
    1553           0 :     sad = sad_16x8[4] + sad_16x8[6];
    1554           0 :     if (sad < p_best_sad32x8[2]) {
    1555           0 :         p_best_sad32x8[2] = sad;
    1556           0 :         p_best_mv32x8[2] = mv;
    1557             :     }
    1558             : 
    1559           0 :     sad = sad_16x8[5] + sad_16x8[7];
    1560           0 :     if (sad < p_best_sad32x8[3]) {
    1561           0 :         p_best_sad32x8[3] = sad;
    1562           0 :         p_best_mv32x8[3] = mv;
    1563             :     }
    1564             : 
    1565           0 :     sad = sad_16x8[8] + sad_16x8[10];
    1566           0 :     if (sad < p_best_sad32x8[4]) {
    1567           0 :         p_best_sad32x8[4] = sad;
    1568           0 :         p_best_mv32x8[4] = mv;
    1569             :     }
    1570             : 
    1571           0 :     sad = sad_16x8[9] + sad_16x8[11];
    1572           0 :     if (sad < p_best_sad32x8[5]) {
    1573           0 :         p_best_sad32x8[5] = sad;
    1574           0 :         p_best_mv32x8[5] = mv;
    1575             :     }
    1576             : 
    1577           0 :     sad = sad_16x8[12] + sad_16x8[14];
    1578           0 :     if (sad < p_best_sad32x8[6]) {
    1579           0 :         p_best_sad32x8[6] = sad;
    1580           0 :         p_best_mv32x8[6] = mv;
    1581             :     }
    1582             : 
    1583           0 :     sad = sad_16x8[13] + sad_16x8[15];
    1584           0 :     if (sad < p_best_sad32x8[7]) {
    1585           0 :         p_best_sad32x8[7] = sad;
    1586           0 :         p_best_mv32x8[7] = mv;
    1587             :     }
    1588             : 
    1589           0 :     sad = sad_16x8[16] + sad_16x8[18];
    1590           0 :     if (sad < p_best_sad32x8[8]) {
    1591           0 :         p_best_sad32x8[8] = sad;
    1592           0 :         p_best_mv32x8[8] = mv;
    1593             :     }
    1594             : 
    1595           0 :     sad = sad_16x8[17] + sad_16x8[19];
    1596           0 :     if (sad < p_best_sad32x8[9]) {
    1597           0 :         p_best_sad32x8[9] = sad;
    1598           0 :         p_best_mv32x8[9] = mv;
    1599             :     }
    1600             : 
    1601           0 :     sad = sad_16x8[20] + sad_16x8[22];
    1602           0 :     if (sad < p_best_sad32x8[10]) {
    1603           0 :         p_best_sad32x8[10] = sad;
    1604           0 :         p_best_mv32x8[10] = mv;
    1605             :     }
    1606             : 
    1607           0 :     sad = sad_16x8[21] + sad_16x8[23];
    1608           0 :     if (sad < p_best_sad32x8[11]) {
    1609           0 :         p_best_sad32x8[11] = sad;
    1610           0 :         p_best_mv32x8[11] = mv;
    1611             :     }
    1612             : 
    1613           0 :     sad = sad_16x8[24] + sad_16x8[26];
    1614           0 :     if (sad < p_best_sad32x8[12]) {
    1615           0 :         p_best_sad32x8[12] = sad;
    1616           0 :         p_best_mv32x8[12] = mv;
    1617             :     }
    1618             : 
    1619           0 :     sad = sad_16x8[25] + sad_16x8[27];
    1620           0 :     if (sad < p_best_sad32x8[13]) {
    1621           0 :         p_best_sad32x8[13] = sad;
    1622           0 :         p_best_mv32x8[13] = mv;
    1623             :     }
    1624             : 
    1625           0 :     sad = sad_16x8[28] + sad_16x8[30];
    1626           0 :     if (sad < p_best_sad32x8[14]) {
    1627           0 :         p_best_sad32x8[14] = sad;
    1628           0 :         p_best_mv32x8[14] = mv;
    1629             :     }
    1630             : 
    1631           0 :     sad = sad_16x8[29] + sad_16x8[31];
    1632           0 :     if (sad < p_best_sad32x8[15]) {
    1633           0 :         p_best_sad32x8[15] = sad;
    1634           0 :         p_best_mv32x8[15] = mv;
    1635             :     }
    1636             : 
    1637             :     // 8x32
    1638           0 :     sad = sad_8x16[0] + sad_8x16[4];
    1639           0 :     if (sad < p_best_sad8x32[0]) {
    1640           0 :         p_best_sad8x32[0] = sad;
    1641           0 :         p_best_mv8x32[0] = mv;
    1642             :     }
    1643             : 
    1644           0 :     sad = sad_8x16[1] + sad_8x16[5];
    1645           0 :     if (sad < p_best_sad8x32[1]) {
    1646           0 :         p_best_sad8x32[1] = sad;
    1647           0 :         p_best_mv8x32[1] = mv;
    1648             :     }
    1649             : 
    1650           0 :     sad = sad_8x16[2] + sad_8x16[6];
    1651           0 :     if (sad < p_best_sad8x32[2]) {
    1652           0 :         p_best_sad8x32[2] = sad;
    1653           0 :         p_best_mv8x32[2] = mv;
    1654             :     }
    1655             : 
    1656           0 :     sad = sad_8x16[3] + sad_8x16[7];
    1657           0 :     if (sad < p_best_sad8x32[3]) {
    1658           0 :         p_best_sad8x32[3] = sad;
    1659           0 :         p_best_mv8x32[3] = mv;
    1660             :     }
    1661             : 
    1662           0 :     sad = sad_8x16[8] + sad_8x16[12];
    1663           0 :     if (sad < p_best_sad8x32[4]) {
    1664           0 :         p_best_sad8x32[4] = sad;
    1665           0 :         p_best_mv8x32[4] = mv;
    1666             :     }
    1667             : 
    1668           0 :     sad = sad_8x16[9] + sad_8x16[13];
    1669           0 :     if (sad < p_best_sad8x32[5]) {
    1670           0 :         p_best_sad8x32[5] = sad;
    1671           0 :         p_best_mv8x32[5] = mv;
    1672             :     }
    1673             : 
    1674           0 :     sad = sad_8x16[10] + sad_8x16[14];
    1675           0 :     if (sad < p_best_sad8x32[6]) {
    1676           0 :         p_best_sad8x32[6] = sad;
    1677           0 :         p_best_mv8x32[6] = mv;
    1678             :     }
    1679             : 
    1680           0 :     sad = sad_8x16[11] + sad_8x16[15];
    1681           0 :     if (sad < p_best_sad8x32[7]) {
    1682           0 :         p_best_sad8x32[7] = sad;
    1683           0 :         p_best_mv8x32[7] = mv;
    1684             :     }
    1685             : 
    1686           0 :     sad = sad_8x16[16] + sad_8x16[20];
    1687           0 :     if (sad < p_best_sad8x32[8]) {
    1688           0 :         p_best_sad8x32[8] = sad;
    1689           0 :         p_best_mv8x32[8] = mv;
    1690             :     }
    1691             : 
    1692           0 :     sad = sad_8x16[17] + sad_8x16[21];
    1693           0 :     if (sad < p_best_sad8x32[9]) {
    1694           0 :         p_best_sad8x32[9] = sad;
    1695           0 :         p_best_mv8x32[9] = mv;
    1696             :     }
    1697             : 
    1698           0 :     sad = sad_8x16[18] + sad_8x16[22];
    1699           0 :     if (sad < p_best_sad8x32[10]) {
    1700           0 :         p_best_sad8x32[10] = sad;
    1701           0 :         p_best_mv8x32[10] = mv;
    1702             :     }
    1703             : 
    1704           0 :     sad = sad_8x16[19] + sad_8x16[23];
    1705           0 :     if (sad < p_best_sad8x32[11]) {
    1706           0 :         p_best_sad8x32[11] = sad;
    1707           0 :         p_best_mv8x32[11] = mv;
    1708             :     }
    1709             : 
    1710           0 :     sad = sad_8x16[24] + sad_8x16[28];
    1711           0 :     if (sad < p_best_sad8x32[12]) {
    1712           0 :         p_best_sad8x32[12] = sad;
    1713           0 :         p_best_mv8x32[12] = mv;
    1714             :     }
    1715             : 
    1716           0 :     sad = sad_8x16[25] + sad_8x16[29];
    1717           0 :     if (sad < p_best_sad8x32[13]) {
    1718           0 :         p_best_sad8x32[13] = sad;
    1719           0 :         p_best_mv8x32[13] = mv;
    1720             :     }
    1721             : 
    1722           0 :     sad = sad_8x16[26] + sad_8x16[30];
    1723           0 :     if (sad < p_best_sad8x32[14]) {
    1724           0 :         p_best_sad8x32[14] = sad;
    1725           0 :         p_best_mv8x32[14] = mv;
    1726             :     }
    1727             : 
    1728           0 :     sad = sad_8x16[27] + sad_8x16[31];
    1729           0 :     if (sad < p_best_sad8x32[15]) {
    1730           0 :         p_best_sad8x32[15] = sad;
    1731           0 :         p_best_mv8x32[15] = mv;
    1732             :     }
    1733           0 : }
    1734             : 
    1735             : /****************************************************
    1736             : Calcualte SAD for Rect H, V and H4, V4 partitions
    1737             : and update its Motion info if the result SAD is better
    1738             : ****************************************************/
    1739           0 : void ext_eigth_sad_calculation_nsq_c(
    1740             :     uint32_t p_sad8x8[64][8], uint32_t p_sad16x16[16][8],
    1741             :     uint32_t p_sad32x32[4][8], uint32_t *p_best_sad64x32,
    1742             :     uint32_t *p_best_mv64x32, uint32_t *p_best_sad32x16,
    1743             :     uint32_t *p_best_mv32x16, uint32_t *p_best_sad16x8, uint32_t *p_best_mv16x8,
    1744             :     uint32_t *p_best_sad32x64, uint32_t *p_best_mv32x64,
    1745             :     uint32_t *p_best_sad16x32, uint32_t *p_best_mv16x32,
    1746             :     uint32_t *p_best_sad8x16, uint32_t *p_best_mv8x16, uint32_t *p_best_sad32x8,
    1747             :     uint32_t *p_best_mv32x8, uint32_t *p_best_sad8x32, uint32_t *p_best_mv8x32,
    1748             :     uint32_t *p_best_sad64x16, uint32_t *p_best_mv64x16,
    1749             :     uint32_t *p_best_sad16x64, uint32_t *p_best_mv16x64, uint32_t mv) {
    1750             :     uint8_t search_index;
    1751             :     uint32_t sad;
    1752             :     uint32_t sad_16x8[32];
    1753             :     uint32_t sad_8x16[32];
    1754             :     uint32_t sad_32x16[8];
    1755             :     uint32_t sad_16x32[8];
    1756             : 
    1757             :     int16_t x_mv, y_mv;
    1758             : 
    1759           0 :     for (search_index = 0; search_index < 8; search_index++) {
    1760             :         // 64x32
    1761           0 :         sad = p_sad32x32[0][search_index] + p_sad32x32[1][search_index];
    1762           0 :         if (sad < p_best_sad64x32[0]) {
    1763           0 :             p_best_sad64x32[0] = sad;
    1764           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1765           0 :             y_mv = _MVYT(mv);
    1766           0 :             p_best_mv64x32[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1767             :         }
    1768             : 
    1769           0 :         sad = p_sad32x32[2][search_index] + p_sad32x32[3][search_index];
    1770           0 :         if (sad < p_best_sad64x32[1]) {
    1771           0 :             p_best_sad64x32[1] = sad;
    1772           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1773           0 :             y_mv = _MVYT(mv);
    1774           0 :             p_best_mv64x32[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1775             :         }
    1776             : 
    1777             :         // 32x16
    1778           0 :         sad_32x16[0] =
    1779           0 :             p_sad16x16[0][search_index] + p_sad16x16[1][search_index];
    1780           0 :         if (sad_32x16[0] < p_best_sad32x16[0]) {
    1781           0 :             p_best_sad32x16[0] = sad_32x16[0];
    1782           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1783           0 :             y_mv = _MVYT(mv);
    1784           0 :             p_best_mv32x16[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1785             :         }
    1786             : 
    1787           0 :         sad_32x16[1] =
    1788           0 :             p_sad16x16[2][search_index] + p_sad16x16[3][search_index];
    1789           0 :         if (sad_32x16[1] < p_best_sad32x16[1]) {
    1790           0 :             p_best_sad32x16[1] = sad_32x16[1];
    1791           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1792           0 :             y_mv = _MVYT(mv);
    1793           0 :             p_best_mv32x16[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1794             :         }
    1795             : 
    1796           0 :         sad_32x16[2] =
    1797           0 :             p_sad16x16[4][search_index] + p_sad16x16[5][search_index];
    1798           0 :         if (sad_32x16[2] < p_best_sad32x16[2]) {
    1799           0 :             p_best_sad32x16[2] = sad_32x16[2];
    1800           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1801           0 :             y_mv = _MVYT(mv);
    1802           0 :             p_best_mv32x16[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1803             :         }
    1804             : 
    1805           0 :         sad_32x16[3] =
    1806           0 :             p_sad16x16[6][search_index] + p_sad16x16[7][search_index];
    1807           0 :         if (sad_32x16[3] < p_best_sad32x16[3]) {
    1808           0 :             p_best_sad32x16[3] = sad_32x16[3];
    1809           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1810           0 :             y_mv = _MVYT(mv);
    1811           0 :             p_best_mv32x16[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1812             :         }
    1813             : 
    1814           0 :         sad_32x16[4] =
    1815           0 :             p_sad16x16[8][search_index] + p_sad16x16[9][search_index];
    1816           0 :         if (sad_32x16[4] < p_best_sad32x16[4]) {
    1817           0 :             p_best_sad32x16[4] = sad_32x16[4];
    1818           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1819           0 :             y_mv = _MVYT(mv);
    1820           0 :             p_best_mv32x16[4] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1821             :         }
    1822             : 
    1823           0 :         sad_32x16[5] =
    1824           0 :             p_sad16x16[10][search_index] + p_sad16x16[11][search_index];
    1825           0 :         if (sad_32x16[5] < p_best_sad32x16[5]) {
    1826           0 :             p_best_sad32x16[5] = sad_32x16[5];
    1827           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1828           0 :             y_mv = _MVYT(mv);
    1829           0 :             p_best_mv32x16[5] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1830             :         }
    1831             : 
    1832           0 :         sad_32x16[6] =
    1833           0 :             p_sad16x16[12][search_index] + p_sad16x16[13][search_index];
    1834           0 :         if (sad_32x16[6] < p_best_sad32x16[6]) {
    1835           0 :             p_best_sad32x16[6] = sad_32x16[6];
    1836           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1837           0 :             y_mv = _MVYT(mv);
    1838           0 :             p_best_mv32x16[6] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1839             :         }
    1840             : 
    1841           0 :         sad_32x16[7] =
    1842           0 :             p_sad16x16[14][search_index] + p_sad16x16[15][search_index];
    1843           0 :         if (sad_32x16[7] < p_best_sad32x16[7]) {
    1844           0 :             p_best_sad32x16[7] = sad_32x16[7];
    1845           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1846           0 :             y_mv = _MVYT(mv);
    1847           0 :             p_best_mv32x16[7] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1848             :         }
    1849             : 
    1850             :         // 64x16
    1851           0 :         sad = sad_32x16[0] + sad_32x16[2];
    1852           0 :         if (sad < p_best_sad64x16[0]) {
    1853           0 :             p_best_sad64x16[0] = sad;
    1854           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1855           0 :             y_mv = _MVYT(mv);
    1856           0 :             p_best_mv64x16[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1857             :         }
    1858           0 :         sad = sad_32x16[1] + sad_32x16[3];
    1859           0 :         if (sad < p_best_sad64x16[1]) {
    1860           0 :             p_best_sad64x16[1] = sad;
    1861           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1862           0 :             y_mv = _MVYT(mv);
    1863           0 :             p_best_mv64x16[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1864             :         }
    1865             : 
    1866           0 :         sad = sad_32x16[4] + sad_32x16[6];
    1867           0 :         if (sad < p_best_sad64x16[2]) {
    1868           0 :             p_best_sad64x16[2] = sad;
    1869           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1870           0 :             y_mv = _MVYT(mv);
    1871           0 :             p_best_mv64x16[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1872             :         }
    1873           0 :         sad = sad_32x16[5] + sad_32x16[7];
    1874           0 :         if (sad < p_best_sad64x16[3]) {
    1875           0 :             p_best_sad64x16[3] = sad;
    1876           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1877           0 :             y_mv = _MVYT(mv);
    1878           0 :             p_best_mv64x16[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1879             :         }
    1880             :         // 16x8
    1881           0 :         sad_16x8[0] = p_sad8x8[0][search_index] + p_sad8x8[1][search_index];
    1882           0 :         if (sad_16x8[0] < p_best_sad16x8[0]) {
    1883           0 :             p_best_sad16x8[0] = sad_16x8[0];
    1884           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1885           0 :             y_mv = _MVYT(mv);
    1886           0 :             p_best_mv16x8[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1887             :         }
    1888             : 
    1889           0 :         sad_16x8[1] = p_sad8x8[2][search_index] + p_sad8x8[3][search_index];
    1890           0 :         if (sad_16x8[1] < p_best_sad16x8[1]) {
    1891           0 :             p_best_sad16x8[1] = sad_16x8[1];
    1892           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1893           0 :             y_mv = _MVYT(mv);
    1894           0 :             p_best_mv16x8[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1895             :         }
    1896             : 
    1897           0 :         sad_16x8[2] = p_sad8x8[4][search_index] + p_sad8x8[5][search_index];
    1898           0 :         if (sad_16x8[2] < p_best_sad16x8[2]) {
    1899           0 :             p_best_sad16x8[2] = sad_16x8[2];
    1900           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1901           0 :             y_mv = _MVYT(mv);
    1902           0 :             p_best_mv16x8[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1903             :         }
    1904             : 
    1905           0 :         sad_16x8[3] = p_sad8x8[6][search_index] + p_sad8x8[7][search_index];
    1906           0 :         if (sad_16x8[3] < p_best_sad16x8[3]) {
    1907           0 :             p_best_sad16x8[3] = sad_16x8[3];
    1908           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1909           0 :             y_mv = _MVYT(mv);
    1910           0 :             p_best_mv16x8[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1911             :         }
    1912             : 
    1913           0 :         sad_16x8[4] = p_sad8x8[8][search_index] + p_sad8x8[9][search_index];
    1914           0 :         if (sad_16x8[4] < p_best_sad16x8[4]) {
    1915           0 :             p_best_sad16x8[4] = sad_16x8[4];
    1916           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1917           0 :             y_mv = _MVYT(mv);
    1918           0 :             p_best_mv16x8[4] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1919             :         }
    1920             : 
    1921           0 :         sad_16x8[5] = p_sad8x8[10][search_index] + p_sad8x8[11][search_index];
    1922           0 :         if (sad_16x8[5] < p_best_sad16x8[5]) {
    1923           0 :             p_best_sad16x8[5] = sad_16x8[5];
    1924           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1925           0 :             y_mv = _MVYT(mv);
    1926           0 :             p_best_mv16x8[5] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1927             :         }
    1928             : 
    1929           0 :         sad_16x8[6] = p_sad8x8[12][search_index] + p_sad8x8[13][search_index];
    1930           0 :         if (sad_16x8[6] < p_best_sad16x8[6]) {
    1931           0 :             p_best_sad16x8[6] = sad_16x8[6];
    1932           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1933           0 :             y_mv = _MVYT(mv);
    1934           0 :             p_best_mv16x8[6] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1935             :         }
    1936             : 
    1937           0 :         sad_16x8[7] = p_sad8x8[14][search_index] + p_sad8x8[15][search_index];
    1938           0 :         if (sad_16x8[7] < p_best_sad16x8[7]) {
    1939           0 :             p_best_sad16x8[7] = sad_16x8[7];
    1940           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1941           0 :             y_mv = _MVYT(mv);
    1942           0 :             p_best_mv16x8[7] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1943             :         }
    1944             : 
    1945           0 :         sad_16x8[8] = p_sad8x8[16][search_index] + p_sad8x8[17][search_index];
    1946           0 :         if (sad_16x8[8] < p_best_sad16x8[8]) {
    1947           0 :             p_best_sad16x8[8] = sad_16x8[8];
    1948           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1949           0 :             y_mv = _MVYT(mv);
    1950           0 :             p_best_mv16x8[8] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1951             :         }
    1952             : 
    1953           0 :         sad_16x8[9] = p_sad8x8[18][search_index] + p_sad8x8[19][search_index];
    1954           0 :         if (sad_16x8[9] < p_best_sad16x8[9]) {
    1955           0 :             p_best_sad16x8[9] = sad_16x8[9];
    1956           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1957           0 :             y_mv = _MVYT(mv);
    1958           0 :             p_best_mv16x8[9] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1959             :         }
    1960             : 
    1961           0 :         sad_16x8[10] = p_sad8x8[20][search_index] + p_sad8x8[21][search_index];
    1962           0 :         if (sad_16x8[10] < p_best_sad16x8[10]) {
    1963           0 :             p_best_sad16x8[10] = sad_16x8[10];
    1964           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1965           0 :             y_mv = _MVYT(mv);
    1966           0 :             p_best_mv16x8[10] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1967             :         }
    1968             : 
    1969           0 :         sad_16x8[11] = p_sad8x8[22][search_index] + p_sad8x8[23][search_index];
    1970           0 :         if (sad_16x8[11] < p_best_sad16x8[11]) {
    1971           0 :             p_best_sad16x8[11] = sad_16x8[11];
    1972           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1973           0 :             y_mv = _MVYT(mv);
    1974           0 :             p_best_mv16x8[11] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1975             :         }
    1976             : 
    1977           0 :         sad_16x8[12] = p_sad8x8[24][search_index] + p_sad8x8[25][search_index];
    1978           0 :         if (sad_16x8[12] < p_best_sad16x8[12]) {
    1979           0 :             p_best_sad16x8[12] = sad_16x8[12];
    1980           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1981           0 :             y_mv = _MVYT(mv);
    1982           0 :             p_best_mv16x8[12] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1983             :         }
    1984             : 
    1985           0 :         sad_16x8[13] = p_sad8x8[26][search_index] + p_sad8x8[27][search_index];
    1986           0 :         if (sad_16x8[13] < p_best_sad16x8[13]) {
    1987           0 :             p_best_sad16x8[13] = sad_16x8[13];
    1988           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1989           0 :             y_mv = _MVYT(mv);
    1990           0 :             p_best_mv16x8[13] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1991             :         }
    1992             : 
    1993           0 :         sad_16x8[14] = p_sad8x8[28][search_index] + p_sad8x8[29][search_index];
    1994           0 :         if (sad_16x8[14] < p_best_sad16x8[14]) {
    1995           0 :             p_best_sad16x8[14] = sad_16x8[14];
    1996           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    1997           0 :             y_mv = _MVYT(mv);
    1998           0 :             p_best_mv16x8[14] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    1999             :         }
    2000             : 
    2001           0 :         sad_16x8[15] = p_sad8x8[30][search_index] + p_sad8x8[31][search_index];
    2002           0 :         if (sad_16x8[15] < p_best_sad16x8[15]) {
    2003           0 :             p_best_sad16x8[15] = sad_16x8[15];
    2004           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2005           0 :             y_mv = _MVYT(mv);
    2006           0 :             p_best_mv16x8[15] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2007             :         }
    2008             : 
    2009           0 :         sad_16x8[16] = p_sad8x8[32][search_index] + p_sad8x8[33][search_index];
    2010           0 :         if (sad_16x8[16] < p_best_sad16x8[16]) {
    2011           0 :             p_best_sad16x8[16] = sad_16x8[16];
    2012           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2013           0 :             y_mv = _MVYT(mv);
    2014           0 :             p_best_mv16x8[16] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2015             :         }
    2016             : 
    2017           0 :         sad_16x8[17] = p_sad8x8[34][search_index] + p_sad8x8[35][search_index];
    2018           0 :         if (sad_16x8[17] < p_best_sad16x8[17]) {
    2019           0 :             p_best_sad16x8[17] = sad_16x8[17];
    2020           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2021           0 :             y_mv = _MVYT(mv);
    2022           0 :             p_best_mv16x8[17] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2023             :         }
    2024             : 
    2025           0 :         sad_16x8[18] = p_sad8x8[36][search_index] + p_sad8x8[37][search_index];
    2026           0 :         if (sad_16x8[18] < p_best_sad16x8[18]) {
    2027           0 :             p_best_sad16x8[18] = sad_16x8[18];
    2028           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2029           0 :             y_mv = _MVYT(mv);
    2030           0 :             p_best_mv16x8[18] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2031             :         }
    2032             : 
    2033           0 :         sad_16x8[19] = p_sad8x8[38][search_index] + p_sad8x8[39][search_index];
    2034           0 :         if (sad_16x8[19] < p_best_sad16x8[19]) {
    2035           0 :             p_best_sad16x8[19] = sad_16x8[19];
    2036           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2037           0 :             y_mv = _MVYT(mv);
    2038           0 :             p_best_mv16x8[19] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2039             :         }
    2040             : 
    2041           0 :         sad_16x8[20] = p_sad8x8[40][search_index] + p_sad8x8[41][search_index];
    2042           0 :         if (sad_16x8[20] < p_best_sad16x8[20]) {
    2043           0 :             p_best_sad16x8[20] = sad_16x8[20];
    2044           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2045           0 :             y_mv = _MVYT(mv);
    2046           0 :             p_best_mv16x8[20] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2047             :         }
    2048             : 
    2049           0 :         sad_16x8[21] = p_sad8x8[42][search_index] + p_sad8x8[43][search_index];
    2050           0 :         if (sad_16x8[21] < p_best_sad16x8[21]) {
    2051           0 :             p_best_sad16x8[21] = sad_16x8[21];
    2052           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2053           0 :             y_mv = _MVYT(mv);
    2054           0 :             p_best_mv16x8[21] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2055             :         }
    2056             : 
    2057           0 :         sad_16x8[22] = p_sad8x8[44][search_index] + p_sad8x8[45][search_index];
    2058           0 :         if (sad_16x8[22] < p_best_sad16x8[22]) {
    2059           0 :             p_best_sad16x8[22] = sad_16x8[22];
    2060           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2061           0 :             y_mv = _MVYT(mv);
    2062           0 :             p_best_mv16x8[22] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2063             :         }
    2064             : 
    2065           0 :         sad_16x8[23] = p_sad8x8[46][search_index] + p_sad8x8[47][search_index];
    2066           0 :         if (sad_16x8[23] < p_best_sad16x8[23]) {
    2067           0 :             p_best_sad16x8[23] = sad_16x8[23];
    2068           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2069           0 :             y_mv = _MVYT(mv);
    2070           0 :             p_best_mv16x8[23] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2071             :         }
    2072             : 
    2073           0 :         sad_16x8[24] = p_sad8x8[48][search_index] + p_sad8x8[49][search_index];
    2074           0 :         if (sad_16x8[24] < p_best_sad16x8[24]) {
    2075           0 :             p_best_sad16x8[24] = sad_16x8[24];
    2076           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2077           0 :             y_mv = _MVYT(mv);
    2078           0 :             p_best_mv16x8[24] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2079             :         }
    2080             : 
    2081           0 :         sad_16x8[25] = p_sad8x8[50][search_index] + p_sad8x8[51][search_index];
    2082           0 :         if (sad_16x8[25] < p_best_sad16x8[25]) {
    2083           0 :             p_best_sad16x8[25] = sad_16x8[25];
    2084           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2085           0 :             y_mv = _MVYT(mv);
    2086           0 :             p_best_mv16x8[25] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2087             :         }
    2088             : 
    2089           0 :         sad_16x8[26] = p_sad8x8[52][search_index] + p_sad8x8[53][search_index];
    2090           0 :         if (sad_16x8[26] < p_best_sad16x8[26]) {
    2091           0 :             p_best_sad16x8[26] = sad_16x8[26];
    2092           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2093           0 :             y_mv = _MVYT(mv);
    2094           0 :             p_best_mv16x8[26] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2095             :         }
    2096             : 
    2097           0 :         sad_16x8[27] = p_sad8x8[54][search_index] + p_sad8x8[55][search_index];
    2098           0 :         if (sad_16x8[27] < p_best_sad16x8[27]) {
    2099           0 :             p_best_sad16x8[27] = sad_16x8[27];
    2100           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2101           0 :             y_mv = _MVYT(mv);
    2102           0 :             p_best_mv16x8[27] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2103             :         }
    2104             : 
    2105           0 :         sad_16x8[28] = p_sad8x8[56][search_index] + p_sad8x8[57][search_index];
    2106           0 :         if (sad_16x8[28] < p_best_sad16x8[28]) {
    2107           0 :             p_best_sad16x8[28] = sad_16x8[28];
    2108           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2109           0 :             y_mv = _MVYT(mv);
    2110           0 :             p_best_mv16x8[28] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2111             :         }
    2112             : 
    2113           0 :         sad_16x8[29] = p_sad8x8[58][search_index] + p_sad8x8[59][search_index];
    2114           0 :         if (sad_16x8[29] < p_best_sad16x8[29]) {
    2115           0 :             p_best_sad16x8[29] = sad_16x8[29];
    2116           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2117           0 :             y_mv = _MVYT(mv);
    2118           0 :             p_best_mv16x8[29] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2119             :         }
    2120             : 
    2121           0 :         sad_16x8[30] = p_sad8x8[60][search_index] + p_sad8x8[61][search_index];
    2122           0 :         if (sad_16x8[30] < p_best_sad16x8[30]) {
    2123           0 :             p_best_sad16x8[30] = sad_16x8[30];
    2124           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2125           0 :             y_mv = _MVYT(mv);
    2126           0 :             p_best_mv16x8[30] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2127             :         }
    2128             : 
    2129           0 :         sad_16x8[31] = p_sad8x8[62][search_index] + p_sad8x8[63][search_index];
    2130           0 :         if (sad_16x8[31] < p_best_sad16x8[31]) {
    2131           0 :             p_best_sad16x8[31] = sad_16x8[31];
    2132           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2133           0 :             y_mv = _MVYT(mv);
    2134           0 :             p_best_mv16x8[31] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2135             :         }
    2136             : 
    2137             :         // 32x64
    2138           0 :         sad = p_sad32x32[0][search_index] + p_sad32x32[2][search_index];
    2139           0 :         if (sad < p_best_sad32x64[0]) {
    2140           0 :             p_best_sad32x64[0] = sad;
    2141           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2142           0 :             y_mv = _MVYT(mv);
    2143           0 :             p_best_mv32x64[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2144             :         }
    2145             : 
    2146           0 :         sad = p_sad32x32[1][search_index] + p_sad32x32[3][search_index];
    2147           0 :         if (sad < p_best_sad32x64[1]) {
    2148           0 :             p_best_sad32x64[1] = sad;
    2149           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2150           0 :             y_mv = _MVYT(mv);
    2151           0 :             p_best_mv32x64[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2152             :         }
    2153             : 
    2154             :         // 16x32
    2155           0 :         sad_16x32[0] =
    2156           0 :             p_sad16x16[0][search_index] + p_sad16x16[2][search_index];
    2157           0 :         if (sad_16x32[0] < p_best_sad16x32[0]) {
    2158           0 :             p_best_sad16x32[0] = sad_16x32[0];
    2159           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2160           0 :             y_mv = _MVYT(mv);
    2161           0 :             p_best_mv16x32[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2162             :         }
    2163             : 
    2164           0 :         sad_16x32[1] =
    2165           0 :             p_sad16x16[1][search_index] + p_sad16x16[3][search_index];
    2166           0 :         if (sad_16x32[1] < p_best_sad16x32[1]) {
    2167           0 :             p_best_sad16x32[1] = sad_16x32[1];
    2168           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2169           0 :             y_mv = _MVYT(mv);
    2170           0 :             p_best_mv16x32[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2171             :         }
    2172             : 
    2173           0 :         sad_16x32[2] =
    2174           0 :             p_sad16x16[4][search_index] + p_sad16x16[6][search_index];
    2175           0 :         if (sad_16x32[2] < p_best_sad16x32[2]) {
    2176           0 :             p_best_sad16x32[2] = sad_16x32[2];
    2177           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2178           0 :             y_mv = _MVYT(mv);
    2179           0 :             p_best_mv16x32[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2180             :         }
    2181             : 
    2182           0 :         sad_16x32[3] =
    2183           0 :             p_sad16x16[5][search_index] + p_sad16x16[7][search_index];
    2184           0 :         if (sad_16x32[3] < p_best_sad16x32[3]) {
    2185           0 :             p_best_sad16x32[3] = sad_16x32[3];
    2186           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2187           0 :             y_mv = _MVYT(mv);
    2188           0 :             p_best_mv16x32[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2189             :         }
    2190             : 
    2191           0 :         sad_16x32[4] =
    2192           0 :             p_sad16x16[8][search_index] + p_sad16x16[10][search_index];
    2193           0 :         if (sad_16x32[4] < p_best_sad16x32[4]) {
    2194           0 :             p_best_sad16x32[4] = sad_16x32[4];
    2195           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2196           0 :             y_mv = _MVYT(mv);
    2197           0 :             p_best_mv16x32[4] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2198             :         }
    2199             : 
    2200           0 :         sad_16x32[5] =
    2201           0 :             p_sad16x16[9][search_index] + p_sad16x16[11][search_index];
    2202           0 :         if (sad_16x32[5] < p_best_sad16x32[5]) {
    2203           0 :             p_best_sad16x32[5] = sad_16x32[5];
    2204           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2205           0 :             y_mv = _MVYT(mv);
    2206           0 :             p_best_mv16x32[5] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2207             :         }
    2208             : 
    2209           0 :         sad_16x32[6] =
    2210           0 :             p_sad16x16[12][search_index] + p_sad16x16[14][search_index];
    2211           0 :         if (sad_16x32[6] < p_best_sad16x32[6]) {
    2212           0 :             p_best_sad16x32[6] = sad_16x32[6];
    2213           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2214           0 :             y_mv = _MVYT(mv);
    2215           0 :             p_best_mv16x32[6] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2216             :         }
    2217             : 
    2218           0 :         sad_16x32[7] =
    2219           0 :             p_sad16x16[13][search_index] + p_sad16x16[15][search_index];
    2220           0 :         if (sad_16x32[7] < p_best_sad16x32[7]) {
    2221           0 :             p_best_sad16x32[7] = sad_16x32[7];
    2222           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2223           0 :             y_mv = _MVYT(mv);
    2224           0 :             p_best_mv16x32[7] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2225             :         }
    2226             : 
    2227           0 :         sad = sad_16x32[0] + sad_16x32[4];
    2228           0 :         if (sad < p_best_sad16x64[0]) {
    2229           0 :             p_best_sad16x64[0] = sad;
    2230           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2231           0 :             y_mv = _MVYT(mv);
    2232           0 :             p_best_mv16x64[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2233             :         }
    2234           0 :         sad = sad_16x32[1] + sad_16x32[5];
    2235           0 :         if (sad < p_best_sad16x64[1]) {
    2236           0 :             p_best_sad16x64[1] = sad;
    2237           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2238           0 :             y_mv = _MVYT(mv);
    2239           0 :             p_best_mv16x64[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2240             :         }
    2241             : 
    2242           0 :         sad = sad_16x32[2] + sad_16x32[6];
    2243           0 :         if (sad < p_best_sad16x64[2]) {
    2244           0 :             p_best_sad16x64[2] = sad;
    2245           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2246           0 :             y_mv = _MVYT(mv);
    2247           0 :             p_best_mv16x64[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2248             :         }
    2249             : 
    2250           0 :         sad = sad_16x32[3] + sad_16x32[7];
    2251           0 :         if (sad < p_best_sad16x64[3]) {
    2252           0 :             p_best_sad16x64[3] = sad;
    2253           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2254           0 :             y_mv = _MVYT(mv);
    2255           0 :             p_best_mv16x64[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2256             :         }
    2257             :         // 8x16
    2258           0 :         sad_8x16[0] = p_sad8x8[0][search_index] + p_sad8x8[2][search_index];
    2259           0 :         if (sad_8x16[0] < p_best_sad8x16[0]) {
    2260           0 :             p_best_sad8x16[0] = sad_8x16[0];
    2261           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2262           0 :             y_mv = _MVYT(mv);
    2263           0 :             p_best_mv8x16[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2264             :         }
    2265             : 
    2266           0 :         sad_8x16[1] = p_sad8x8[1][search_index] + p_sad8x8[3][search_index];
    2267           0 :         if (sad_8x16[1] < p_best_sad8x16[1]) {
    2268           0 :             p_best_sad8x16[1] = sad_8x16[1];
    2269           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2270           0 :             y_mv = _MVYT(mv);
    2271           0 :             p_best_mv8x16[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2272             :         }
    2273             : 
    2274           0 :         sad_8x16[2] = p_sad8x8[4][search_index] + p_sad8x8[6][search_index];
    2275           0 :         if (sad_8x16[2] < p_best_sad8x16[2]) {
    2276           0 :             p_best_sad8x16[2] = sad_8x16[2];
    2277           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2278           0 :             y_mv = _MVYT(mv);
    2279           0 :             p_best_mv8x16[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2280             :         }
    2281             : 
    2282           0 :         sad_8x16[3] = p_sad8x8[5][search_index] + p_sad8x8[7][search_index];
    2283           0 :         if (sad_8x16[3] < p_best_sad8x16[3]) {
    2284           0 :             p_best_sad8x16[3] = sad_8x16[3];
    2285           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2286           0 :             y_mv = _MVYT(mv);
    2287           0 :             p_best_mv8x16[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2288             :         }
    2289             : 
    2290           0 :         sad_8x16[4] = p_sad8x8[8][search_index] + p_sad8x8[10][search_index];
    2291           0 :         if (sad_8x16[4] < p_best_sad8x16[4]) {
    2292           0 :             p_best_sad8x16[4] = sad_8x16[4];
    2293           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2294           0 :             y_mv = _MVYT(mv);
    2295           0 :             p_best_mv8x16[4] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2296             :         }
    2297             : 
    2298           0 :         sad_8x16[5] = p_sad8x8[9][search_index] + p_sad8x8[11][search_index];
    2299           0 :         if (sad_8x16[5] < p_best_sad8x16[5]) {
    2300           0 :             p_best_sad8x16[5] = sad_8x16[5];
    2301           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2302           0 :             y_mv = _MVYT(mv);
    2303           0 :             p_best_mv8x16[5] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2304             :         }
    2305             : 
    2306           0 :         sad_8x16[6] = p_sad8x8[12][search_index] + p_sad8x8[14][search_index];
    2307           0 :         if (sad_8x16[6] < p_best_sad8x16[6]) {
    2308           0 :             p_best_sad8x16[6] = sad_8x16[6];
    2309           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2310           0 :             y_mv = _MVYT(mv);
    2311           0 :             p_best_mv8x16[6] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2312             :         }
    2313             : 
    2314           0 :         sad_8x16[7] = p_sad8x8[13][search_index] + p_sad8x8[15][search_index];
    2315           0 :         if (sad_8x16[7] < p_best_sad8x16[7]) {
    2316           0 :             p_best_sad8x16[7] = sad_8x16[7];
    2317           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2318           0 :             y_mv = _MVYT(mv);
    2319           0 :             p_best_mv8x16[7] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2320             :         }
    2321             : 
    2322           0 :         sad_8x16[8] = p_sad8x8[16][search_index] + p_sad8x8[18][search_index];
    2323           0 :         if (sad_8x16[8] < p_best_sad8x16[8]) {
    2324           0 :             p_best_sad8x16[8] = sad_8x16[8];
    2325           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2326           0 :             y_mv = _MVYT(mv);
    2327           0 :             p_best_mv8x16[8] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2328             :         }
    2329             : 
    2330           0 :         sad_8x16[9] = p_sad8x8[17][search_index] + p_sad8x8[19][search_index];
    2331           0 :         if (sad_8x16[9] < p_best_sad8x16[9]) {
    2332           0 :             p_best_sad8x16[9] = sad_8x16[9];
    2333           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2334           0 :             y_mv = _MVYT(mv);
    2335           0 :             p_best_mv8x16[9] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2336             :         }
    2337             : 
    2338           0 :         sad_8x16[10] = p_sad8x8[20][search_index] + p_sad8x8[22][search_index];
    2339           0 :         if (sad_8x16[10] < p_best_sad8x16[10]) {
    2340           0 :             p_best_sad8x16[10] = sad_8x16[10];
    2341           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2342           0 :             y_mv = _MVYT(mv);
    2343           0 :             p_best_mv8x16[10] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2344             :         }
    2345             : 
    2346           0 :         sad_8x16[11] = p_sad8x8[21][search_index] + p_sad8x8[23][search_index];
    2347           0 :         if (sad_8x16[11] < p_best_sad8x16[11]) {
    2348           0 :             p_best_sad8x16[11] = sad_8x16[11];
    2349           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2350           0 :             y_mv = _MVYT(mv);
    2351           0 :             p_best_mv8x16[11] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2352             :         }
    2353             : 
    2354           0 :         sad_8x16[12] = p_sad8x8[24][search_index] + p_sad8x8[26][search_index];
    2355           0 :         if (sad_8x16[12] < p_best_sad8x16[12]) {
    2356           0 :             p_best_sad8x16[12] = sad_8x16[12];
    2357           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2358           0 :             y_mv = _MVYT(mv);
    2359           0 :             p_best_mv8x16[12] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2360             :         }
    2361             : 
    2362           0 :         sad_8x16[13] = p_sad8x8[25][search_index] + p_sad8x8[27][search_index];
    2363           0 :         if (sad_8x16[13] < p_best_sad8x16[13]) {
    2364           0 :             p_best_sad8x16[13] = sad_8x16[13];
    2365           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2366           0 :             y_mv = _MVYT(mv);
    2367           0 :             p_best_mv8x16[13] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2368             :         }
    2369             : 
    2370           0 :         sad_8x16[14] = p_sad8x8[28][search_index] + p_sad8x8[30][search_index];
    2371           0 :         if (sad_8x16[14] < p_best_sad8x16[14]) {
    2372           0 :             p_best_sad8x16[14] = sad_8x16[14];
    2373           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2374           0 :             y_mv = _MVYT(mv);
    2375           0 :             p_best_mv8x16[14] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2376             :         }
    2377             : 
    2378           0 :         sad_8x16[15] = p_sad8x8[29][search_index] + p_sad8x8[31][search_index];
    2379           0 :         if (sad_8x16[15] < p_best_sad8x16[15]) {
    2380           0 :             p_best_sad8x16[15] = sad_8x16[15];
    2381           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2382           0 :             y_mv = _MVYT(mv);
    2383           0 :             p_best_mv8x16[15] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2384             :         }
    2385             : 
    2386           0 :         sad_8x16[16] = p_sad8x8[32][search_index] + p_sad8x8[34][search_index];
    2387           0 :         if (sad_8x16[16] < p_best_sad8x16[16]) {
    2388           0 :             p_best_sad8x16[16] = sad_8x16[16];
    2389           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2390           0 :             y_mv = _MVYT(mv);
    2391           0 :             p_best_mv8x16[16] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2392             :         }
    2393             : 
    2394           0 :         sad_8x16[17] = p_sad8x8[33][search_index] + p_sad8x8[35][search_index];
    2395           0 :         if (sad_8x16[17] < p_best_sad8x16[17]) {
    2396           0 :             p_best_sad8x16[17] = sad_8x16[17];
    2397           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2398           0 :             y_mv = _MVYT(mv);
    2399           0 :             p_best_mv8x16[17] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2400             :         }
    2401             : 
    2402           0 :         sad_8x16[18] = p_sad8x8[36][search_index] + p_sad8x8[38][search_index];
    2403           0 :         if (sad_8x16[18] < p_best_sad8x16[18]) {
    2404           0 :             p_best_sad8x16[18] = sad_8x16[18];
    2405           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2406           0 :             y_mv = _MVYT(mv);
    2407           0 :             p_best_mv8x16[18] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2408             :         }
    2409             : 
    2410           0 :         sad_8x16[19] = p_sad8x8[37][search_index] + p_sad8x8[39][search_index];
    2411           0 :         if (sad_8x16[19] < p_best_sad8x16[19]) {
    2412           0 :             p_best_sad8x16[19] = sad_8x16[19];
    2413           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2414           0 :             y_mv = _MVYT(mv);
    2415           0 :             p_best_mv8x16[19] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2416             :         }
    2417             : 
    2418           0 :         sad_8x16[20] = p_sad8x8[40][search_index] + p_sad8x8[42][search_index];
    2419           0 :         if (sad_8x16[20] < p_best_sad8x16[20]) {
    2420           0 :             p_best_sad8x16[20] = sad_8x16[20];
    2421           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2422           0 :             y_mv = _MVYT(mv);
    2423           0 :             p_best_mv8x16[20] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2424             :         }
    2425             : 
    2426           0 :         sad_8x16[21] = p_sad8x8[41][search_index] + p_sad8x8[43][search_index];
    2427           0 :         if (sad_8x16[21] < p_best_sad8x16[21]) {
    2428           0 :             p_best_sad8x16[21] = sad_8x16[21];
    2429           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2430           0 :             y_mv = _MVYT(mv);
    2431           0 :             p_best_mv8x16[21] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2432             :         }
    2433             : 
    2434           0 :         sad_8x16[22] = p_sad8x8[44][search_index] + p_sad8x8[46][search_index];
    2435           0 :         if (sad_8x16[22] < p_best_sad8x16[22]) {
    2436           0 :             p_best_sad8x16[22] = sad_8x16[22];
    2437           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2438           0 :             y_mv = _MVYT(mv);
    2439           0 :             p_best_mv8x16[22] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2440             :         }
    2441             : 
    2442           0 :         sad_8x16[23] = p_sad8x8[45][search_index] + p_sad8x8[47][search_index];
    2443           0 :         if (sad_8x16[23] < p_best_sad8x16[23]) {
    2444           0 :             p_best_sad8x16[23] = sad_8x16[23];
    2445           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2446           0 :             y_mv = _MVYT(mv);
    2447           0 :             p_best_mv8x16[23] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2448             :         }
    2449             : 
    2450           0 :         sad_8x16[24] = p_sad8x8[48][search_index] + p_sad8x8[50][search_index];
    2451           0 :         if (sad_8x16[24] < p_best_sad8x16[24]) {
    2452           0 :             p_best_sad8x16[24] = sad_8x16[24];
    2453           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2454           0 :             y_mv = _MVYT(mv);
    2455           0 :             p_best_mv8x16[24] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2456             :         }
    2457             : 
    2458           0 :         sad_8x16[25] = p_sad8x8[49][search_index] + p_sad8x8[51][search_index];
    2459           0 :         if (sad_8x16[25] < p_best_sad8x16[25]) {
    2460           0 :             p_best_sad8x16[25] = sad_8x16[25];
    2461           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2462           0 :             y_mv = _MVYT(mv);
    2463           0 :             p_best_mv8x16[25] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2464             :         }
    2465             : 
    2466           0 :         sad_8x16[26] = p_sad8x8[52][search_index] + p_sad8x8[54][search_index];
    2467           0 :         if (sad_8x16[26] < p_best_sad8x16[26]) {
    2468           0 :             p_best_sad8x16[26] = sad_8x16[26];
    2469           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2470           0 :             y_mv = _MVYT(mv);
    2471           0 :             p_best_mv8x16[26] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2472             :         }
    2473             : 
    2474           0 :         sad_8x16[27] = p_sad8x8[53][search_index] + p_sad8x8[55][search_index];
    2475           0 :         if (sad_8x16[27] < p_best_sad8x16[27]) {
    2476           0 :             p_best_sad8x16[27] = sad_8x16[27];
    2477           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2478           0 :             y_mv = _MVYT(mv);
    2479           0 :             p_best_mv8x16[27] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2480             :         }
    2481             : 
    2482           0 :         sad_8x16[28] = p_sad8x8[56][search_index] + p_sad8x8[58][search_index];
    2483           0 :         if (sad_8x16[28] < p_best_sad8x16[28]) {
    2484           0 :             p_best_sad8x16[28] = sad_8x16[28];
    2485           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2486           0 :             y_mv = _MVYT(mv);
    2487           0 :             p_best_mv8x16[28] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2488             :         }
    2489             : 
    2490           0 :         sad_8x16[29] = p_sad8x8[57][search_index] + p_sad8x8[59][search_index];
    2491           0 :         if (sad_8x16[29] < p_best_sad8x16[29]) {
    2492           0 :             p_best_sad8x16[29] = sad_8x16[29];
    2493           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2494           0 :             y_mv = _MVYT(mv);
    2495           0 :             p_best_mv8x16[29] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2496             :         }
    2497             : 
    2498           0 :         sad_8x16[30] = p_sad8x8[60][search_index] + p_sad8x8[62][search_index];
    2499           0 :         if (sad_8x16[30] < p_best_sad8x16[30]) {
    2500           0 :             p_best_sad8x16[30] = sad_8x16[30];
    2501           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2502           0 :             y_mv = _MVYT(mv);
    2503           0 :             p_best_mv8x16[30] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2504             :         }
    2505             : 
    2506           0 :         sad_8x16[31] = p_sad8x8[61][search_index] + p_sad8x8[63][search_index];
    2507           0 :         if (sad_8x16[31] < p_best_sad8x16[31]) {
    2508           0 :             p_best_sad8x16[31] = sad_8x16[31];
    2509           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2510           0 :             y_mv = _MVYT(mv);
    2511           0 :             p_best_mv8x16[31] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2512             :         }
    2513             : 
    2514             :         // 32x8
    2515           0 :         sad = sad_16x8[0] + sad_16x8[2];
    2516           0 :         if (sad < p_best_sad32x8[0]) {
    2517           0 :             p_best_sad32x8[0] = sad;
    2518           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2519           0 :             y_mv = _MVYT(mv);
    2520           0 :             p_best_mv32x8[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2521             :         }
    2522             : 
    2523           0 :         sad = sad_16x8[1] + sad_16x8[3];
    2524           0 :         if (sad < p_best_sad32x8[1]) {
    2525           0 :             p_best_sad32x8[1] = sad;
    2526           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2527           0 :             y_mv = _MVYT(mv);
    2528           0 :             p_best_mv32x8[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2529             :         }
    2530             : 
    2531           0 :         sad = sad_16x8[4] + sad_16x8[6];
    2532           0 :         if (sad < p_best_sad32x8[2]) {
    2533           0 :             p_best_sad32x8[2] = sad;
    2534           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2535           0 :             y_mv = _MVYT(mv);
    2536           0 :             p_best_mv32x8[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2537             :         }
    2538             : 
    2539           0 :         sad = sad_16x8[5] + sad_16x8[7];
    2540           0 :         if (sad < p_best_sad32x8[3]) {
    2541           0 :             p_best_sad32x8[3] = sad;
    2542           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2543           0 :             y_mv = _MVYT(mv);
    2544           0 :             p_best_mv32x8[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2545             :         }
    2546             : 
    2547           0 :         sad = sad_16x8[8] + sad_16x8[10];
    2548           0 :         if (sad < p_best_sad32x8[4]) {
    2549           0 :             p_best_sad32x8[4] = sad;
    2550           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2551           0 :             y_mv = _MVYT(mv);
    2552           0 :             p_best_mv32x8[4] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2553             :         }
    2554             : 
    2555           0 :         sad = sad_16x8[9] + sad_16x8[11];
    2556           0 :         if (sad < p_best_sad32x8[5]) {
    2557           0 :             p_best_sad32x8[5] = sad;
    2558           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2559           0 :             y_mv = _MVYT(mv);
    2560           0 :             p_best_mv32x8[5] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2561             :         }
    2562             : 
    2563           0 :         sad = sad_16x8[12] + sad_16x8[14];
    2564           0 :         if (sad < p_best_sad32x8[6]) {
    2565           0 :             p_best_sad32x8[6] = sad;
    2566           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2567           0 :             y_mv = _MVYT(mv);
    2568           0 :             p_best_mv32x8[6] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2569             :         }
    2570             : 
    2571           0 :         sad = sad_16x8[13] + sad_16x8[15];
    2572           0 :         if (sad < p_best_sad32x8[7]) {
    2573           0 :             p_best_sad32x8[7] = sad;
    2574           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2575           0 :             y_mv = _MVYT(mv);
    2576           0 :             p_best_mv32x8[7] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2577             :         }
    2578             : 
    2579           0 :         sad = sad_16x8[16] + sad_16x8[18];
    2580           0 :         if (sad < p_best_sad32x8[8]) {
    2581           0 :             p_best_sad32x8[8] = sad;
    2582           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2583           0 :             y_mv = _MVYT(mv);
    2584           0 :             p_best_mv32x8[8] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2585             :         }
    2586             : 
    2587           0 :         sad = sad_16x8[17] + sad_16x8[19];
    2588           0 :         if (sad < p_best_sad32x8[9]) {
    2589           0 :             p_best_sad32x8[9] = sad;
    2590           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2591           0 :             y_mv = _MVYT(mv);
    2592           0 :             p_best_mv32x8[9] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2593             :         }
    2594             : 
    2595           0 :         sad = sad_16x8[20] + sad_16x8[22];
    2596           0 :         if (sad < p_best_sad32x8[10]) {
    2597           0 :             p_best_sad32x8[10] = sad;
    2598           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2599           0 :             y_mv = _MVYT(mv);
    2600           0 :             p_best_mv32x8[10] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2601             :         }
    2602             : 
    2603           0 :         sad = sad_16x8[21] + sad_16x8[23];
    2604           0 :         if (sad < p_best_sad32x8[11]) {
    2605           0 :             p_best_sad32x8[11] = sad;
    2606           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2607           0 :             y_mv = _MVYT(mv);
    2608           0 :             p_best_mv32x8[11] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2609             :         }
    2610             : 
    2611           0 :         sad = sad_16x8[24] + sad_16x8[26];
    2612           0 :         if (sad < p_best_sad32x8[12]) {
    2613           0 :             p_best_sad32x8[12] = sad;
    2614           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2615           0 :             y_mv = _MVYT(mv);
    2616           0 :             p_best_mv32x8[12] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2617             :         }
    2618             : 
    2619           0 :         sad = sad_16x8[25] + sad_16x8[27];
    2620           0 :         if (sad < p_best_sad32x8[13]) {
    2621           0 :             p_best_sad32x8[13] = sad;
    2622           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2623           0 :             y_mv = _MVYT(mv);
    2624           0 :             p_best_mv32x8[13] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2625             :         }
    2626             : 
    2627           0 :         sad = sad_16x8[28] + sad_16x8[30];
    2628           0 :         if (sad < p_best_sad32x8[14]) {
    2629           0 :             p_best_sad32x8[14] = sad;
    2630           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2631           0 :             y_mv = _MVYT(mv);
    2632           0 :             p_best_mv32x8[14] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2633             :         }
    2634             : 
    2635           0 :         sad = sad_16x8[29] + sad_16x8[31];
    2636           0 :         if (sad < p_best_sad32x8[15]) {
    2637           0 :             p_best_sad32x8[15] = sad;
    2638           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2639           0 :             y_mv = _MVYT(mv);
    2640           0 :             p_best_mv32x8[15] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2641             :         }
    2642             :         // 8x32
    2643           0 :         sad = sad_8x16[0] + sad_8x16[4];
    2644           0 :         if (sad < p_best_sad8x32[0]) {
    2645           0 :             p_best_sad8x32[0] = sad;
    2646           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2647           0 :             y_mv = _MVYT(mv);
    2648           0 :             p_best_mv8x32[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2649             :         }
    2650             : 
    2651           0 :         sad = sad_8x16[1] + sad_8x16[5];
    2652           0 :         if (sad < p_best_sad8x32[1]) {
    2653           0 :             p_best_sad8x32[1] = sad;
    2654           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2655           0 :             y_mv = _MVYT(mv);
    2656           0 :             p_best_mv8x32[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2657             :         }
    2658             : 
    2659           0 :         sad = sad_8x16[2] + sad_8x16[6];
    2660           0 :         if (sad < p_best_sad8x32[2]) {
    2661           0 :             p_best_sad8x32[2] = sad;
    2662           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2663           0 :             y_mv = _MVYT(mv);
    2664           0 :             p_best_mv8x32[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2665             :         }
    2666             : 
    2667           0 :         sad = sad_8x16[3] + sad_8x16[7];
    2668           0 :         if (sad < p_best_sad8x32[3]) {
    2669           0 :             p_best_sad8x32[3] = sad;
    2670           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2671           0 :             y_mv = _MVYT(mv);
    2672           0 :             p_best_mv8x32[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2673             :         }
    2674             : 
    2675           0 :         sad = sad_8x16[8] + sad_8x16[12];
    2676           0 :         if (sad < p_best_sad8x32[4]) {
    2677           0 :             p_best_sad8x32[4] = sad;
    2678           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2679           0 :             y_mv = _MVYT(mv);
    2680           0 :             p_best_mv8x32[4] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2681             :         }
    2682             : 
    2683           0 :         sad = sad_8x16[9] + sad_8x16[13];
    2684           0 :         if (sad < p_best_sad8x32[5]) {
    2685           0 :             p_best_sad8x32[5] = sad;
    2686           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2687           0 :             y_mv = _MVYT(mv);
    2688           0 :             p_best_mv8x32[5] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2689             :         }
    2690             : 
    2691           0 :         sad = sad_8x16[10] + sad_8x16[14];
    2692           0 :         if (sad < p_best_sad8x32[6]) {
    2693           0 :             p_best_sad8x32[6] = sad;
    2694           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2695           0 :             y_mv = _MVYT(mv);
    2696           0 :             p_best_mv8x32[6] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2697             :         }
    2698             : 
    2699           0 :         sad = sad_8x16[11] + sad_8x16[15];
    2700           0 :         if (sad < p_best_sad8x32[7]) {
    2701           0 :             p_best_sad8x32[7] = sad;
    2702           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2703           0 :             y_mv = _MVYT(mv);
    2704           0 :             p_best_mv8x32[7] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2705             :         }
    2706             : 
    2707           0 :         sad = sad_8x16[16] + sad_8x16[20];
    2708           0 :         if (sad < p_best_sad8x32[8]) {
    2709           0 :             p_best_sad8x32[8] = sad;
    2710           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2711           0 :             y_mv = _MVYT(mv);
    2712           0 :             p_best_mv8x32[8] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2713             :         }
    2714             : 
    2715           0 :         sad = sad_8x16[17] + sad_8x16[21];
    2716           0 :         if (sad < p_best_sad8x32[9]) {
    2717           0 :             p_best_sad8x32[9] = sad;
    2718           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2719           0 :             y_mv = _MVYT(mv);
    2720           0 :             p_best_mv8x32[9] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2721             :         }
    2722             : 
    2723           0 :         sad = sad_8x16[18] + sad_8x16[22];
    2724           0 :         if (sad < p_best_sad8x32[10]) {
    2725           0 :             p_best_sad8x32[10] = sad;
    2726           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2727           0 :             y_mv = _MVYT(mv);
    2728           0 :             p_best_mv8x32[10] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2729             :         }
    2730             : 
    2731           0 :         sad = sad_8x16[19] + sad_8x16[23];
    2732           0 :         if (sad < p_best_sad8x32[11]) {
    2733           0 :             p_best_sad8x32[11] = sad;
    2734           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2735           0 :             y_mv = _MVYT(mv);
    2736           0 :             p_best_mv8x32[11] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2737             :         }
    2738             : 
    2739           0 :         sad = sad_8x16[24] + sad_8x16[28];
    2740           0 :         if (sad < p_best_sad8x32[12]) {
    2741           0 :             p_best_sad8x32[12] = sad;
    2742           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2743           0 :             y_mv = _MVYT(mv);
    2744           0 :             p_best_mv8x32[12] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2745             :         }
    2746             : 
    2747           0 :         sad = sad_8x16[25] + sad_8x16[29];
    2748           0 :         if (sad < p_best_sad8x32[13]) {
    2749           0 :             p_best_sad8x32[13] = sad;
    2750           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2751           0 :             y_mv = _MVYT(mv);
    2752           0 :             p_best_mv8x32[13] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2753             :         }
    2754             : 
    2755           0 :         sad = sad_8x16[26] + sad_8x16[30];
    2756           0 :         if (sad < p_best_sad8x32[14]) {
    2757           0 :             p_best_sad8x32[14] = sad;
    2758           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2759           0 :             y_mv = _MVYT(mv);
    2760           0 :             p_best_mv8x32[14] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2761             :         }
    2762             : 
    2763           0 :         sad = sad_8x16[27] + sad_8x16[31];
    2764           0 :         if (sad < p_best_sad8x32[15]) {
    2765           0 :             p_best_sad8x32[15] = sad;
    2766           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2767           0 :             y_mv = _MVYT(mv);
    2768           0 :             p_best_mv8x32[15] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2769             :         }
    2770             :     }
    2771           0 : }
    2772             : 
    2773             : /*******************************************
    2774             :  * ext_eight_sad_calculation_8x8_16x16
    2775             :  *******************************************/
    2776           0 : static void ext_eight_sad_calculation_8x8_16x16(
    2777             :     uint8_t *src, uint32_t src_stride, uint8_t *ref, uint32_t ref_stride,
    2778             :     uint32_t mv, uint32_t start_16x16_pos, uint32_t *p_best_sad8x8,
    2779             :     uint32_t *p_best_sad16x16, uint32_t *p_best_mv8x8, uint32_t *p_best_mv16x16,
    2780             :     uint32_t p_eight_sad16x16[16][8], uint32_t p_eight_sad8x8[64][8]) {
    2781           0 :     const uint32_t start_8x8_pos = 4 * start_16x16_pos;
    2782             :     uint32_t sad8x8_0, sad8x8_1, sad8x8_2, sad8x8_3;
    2783             :     uint32_t sad16x16;
    2784             :     uint32_t search_index;
    2785             :     int16_t x_mv, y_mv;
    2786           0 :     uint32_t srcStrideSub = (src_stride << 1);
    2787           0 :     uint32_t refStrideSub = (ref_stride << 1);
    2788             : 
    2789           0 :     p_best_sad8x8 += start_8x8_pos;
    2790           0 :     p_best_mv8x8 += start_8x8_pos;
    2791           0 :     p_best_sad16x16 += start_16x16_pos;
    2792           0 :     p_best_mv16x16 += start_16x16_pos;
    2793             : 
    2794           0 :     for (search_index = 0; search_index < 8; search_index++) {
    2795           0 :         p_eight_sad8x8[0 + start_8x8_pos][search_index] = sad8x8_0 =
    2796           0 :             (compute8x4_sad_kernel_c(
    2797             :                 src, srcStrideSub, ref + search_index, refStrideSub))
    2798             :             << 1;
    2799           0 :         if (sad8x8_0 < p_best_sad8x8[0]) {
    2800           0 :             p_best_sad8x8[0] = (uint32_t)sad8x8_0;
    2801           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2802           0 :             y_mv = _MVYT(mv);
    2803           0 :             p_best_mv8x8[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2804             :         }
    2805             : 
    2806           0 :         p_eight_sad8x8[1 + start_8x8_pos][search_index] = sad8x8_1 =
    2807           0 :             (compute8x4_sad_kernel_c(
    2808           0 :                 src + 8, srcStrideSub, ref + 8 + search_index, refStrideSub))
    2809             :             << 1;
    2810           0 :         if (sad8x8_1 < p_best_sad8x8[1]) {
    2811           0 :             p_best_sad8x8[1] = (uint32_t)sad8x8_1;
    2812           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2813           0 :             y_mv = _MVYT(mv);
    2814           0 :             p_best_mv8x8[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2815             :         }
    2816             : 
    2817           0 :         p_eight_sad8x8[2 + start_8x8_pos][search_index] = sad8x8_2 =
    2818           0 :             (compute8x4_sad_kernel_c(
    2819           0 :                 src + (src_stride << 3),
    2820             :                 srcStrideSub,
    2821           0 :                 ref + (ref_stride << 3) + search_index,
    2822             :                 refStrideSub))
    2823             :             << 1;
    2824           0 :         if (sad8x8_2 < p_best_sad8x8[2]) {
    2825           0 :             p_best_sad8x8[2] = (uint32_t)sad8x8_2;
    2826           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2827           0 :             y_mv = _MVYT(mv);
    2828           0 :             p_best_mv8x8[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2829             :         }
    2830             : 
    2831           0 :         p_eight_sad8x8[3 + start_8x8_pos][search_index] = sad8x8_3 =
    2832           0 :             (compute8x4_sad_kernel_c(
    2833           0 :                 src + (src_stride << 3) + 8,
    2834             :                 srcStrideSub,
    2835           0 :                 ref + (ref_stride << 3) + 8 + search_index,
    2836             :                 refStrideSub))
    2837             :             << 1;
    2838           0 :         if (sad8x8_3 < p_best_sad8x8[3]) {
    2839           0 :             p_best_sad8x8[3] = (uint32_t)sad8x8_3;
    2840           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2841           0 :             y_mv = _MVYT(mv);
    2842           0 :             p_best_mv8x8[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2843             :         }
    2844             : 
    2845           0 :         p_eight_sad16x16[start_16x16_pos][search_index] = sad16x16 =
    2846           0 :             sad8x8_0 + sad8x8_1 + sad8x8_2 + sad8x8_3;
    2847           0 :         if (sad16x16 < p_best_sad16x16[0]) {
    2848           0 :             p_best_sad16x16[0] = (uint32_t)sad16x16;
    2849           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2850           0 :             y_mv = _MVYT(mv);
    2851           0 :             p_best_mv16x16[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2852             :         }
    2853             :     }
    2854           0 : }
    2855             : 
    2856           0 : void ext_all_sad_calculation_8x8_16x16_c(
    2857             :     uint8_t *src, uint32_t src_stride, uint8_t *ref, uint32_t ref_stride,
    2858             :     uint32_t mv, uint32_t *p_best_sad8x8, uint32_t *p_best_sad16x16,
    2859             :     uint32_t *p_best_mv8x8, uint32_t *p_best_mv16x16,
    2860             :     uint32_t p_eight_sad16x16[16][8], uint32_t p_eight_sad8x8[64][8]) {
    2861             :     static const char offsets[16] = {
    2862             :         0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
    2863             : 
    2864             :     //---- 16x16 : 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15
    2865           0 :     for (int y = 0; y < 4; y++) {
    2866           0 :         for (int x = 0; x < 4; x++) {
    2867           0 :             const uint32_t blockIndex = 16 * y * src_stride + 16 * x;
    2868           0 :             const uint32_t searchPositionIndex = 16 * y * ref_stride + 16 * x;
    2869           0 :             ext_eight_sad_calculation_8x8_16x16(src + blockIndex,
    2870             :                                                 src_stride,
    2871             :                                                 ref + searchPositionIndex,
    2872             :                                                 ref_stride,
    2873             :                                                 mv,
    2874           0 :                                                 offsets[4 * y + x],
    2875             :                                                 p_best_sad8x8,
    2876             :                                                 p_best_sad16x16,
    2877             :                                                 p_best_mv8x8,
    2878             :                                                 p_best_mv16x16,
    2879             :                                                 p_eight_sad16x16,
    2880             :                                                 p_eight_sad8x8);
    2881             :         }
    2882             :     }
    2883           0 : }
    2884             : 
    2885             : /*******************************************
    2886             : Calcualte SAD for 32x32,64x64 from 16x16
    2887             : and check if there is improvment, if yes keep
    2888             : the best SAD+MV
    2889             : *******************************************/
    2890           0 : void ext_eight_sad_calculation_32x32_64x64_c(
    2891             :     uint32_t p_sad16x16[16][8], uint32_t *p_best_sad32x32,
    2892             :     uint32_t *p_best_sad64x64, uint32_t *p_best_mv32x32,
    2893             :     uint32_t *p_best_mv64x64, uint32_t mv, uint32_t p_sad32x32[4][8]) {
    2894             :     uint32_t search_index;
    2895             :     int16_t x_mv, y_mv;
    2896           0 :     for (search_index = 0; search_index < 8; search_index++) {
    2897             :         uint32_t sad32x32_0, sad32x32_1, sad32x32_2, sad32x32_3, sad64x64;
    2898             : 
    2899           0 :         p_sad32x32[0][search_index] = sad32x32_0 =
    2900           0 :             p_sad16x16[0][search_index] + p_sad16x16[1][search_index] +
    2901           0 :             p_sad16x16[2][search_index] + p_sad16x16[3][search_index];
    2902           0 :         if (sad32x32_0 < p_best_sad32x32[0]) {
    2903           0 :             p_best_sad32x32[0] = sad32x32_0;
    2904           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2905           0 :             y_mv = _MVYT(mv);
    2906           0 :             p_best_mv32x32[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2907             :         }
    2908             : 
    2909           0 :         p_sad32x32[1][search_index] = sad32x32_1 =
    2910           0 :             p_sad16x16[4][search_index] + p_sad16x16[5][search_index] +
    2911           0 :             p_sad16x16[6][search_index] + p_sad16x16[7][search_index];
    2912           0 :         if (sad32x32_1 < p_best_sad32x32[1]) {
    2913           0 :             p_best_sad32x32[1] = sad32x32_1;
    2914           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2915           0 :             y_mv = _MVYT(mv);
    2916           0 :             p_best_mv32x32[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2917             :         }
    2918             : 
    2919           0 :         p_sad32x32[2][search_index] = sad32x32_2 =
    2920           0 :             p_sad16x16[8][search_index] + p_sad16x16[9][search_index] +
    2921           0 :             p_sad16x16[10][search_index] + p_sad16x16[11][search_index];
    2922           0 :         if (sad32x32_2 < p_best_sad32x32[2]) {
    2923           0 :             p_best_sad32x32[2] = sad32x32_2;
    2924           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2925           0 :             y_mv = _MVYT(mv);
    2926           0 :             p_best_mv32x32[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2927             :         }
    2928             : 
    2929           0 :         p_sad32x32[3][search_index] = sad32x32_3 =
    2930           0 :             p_sad16x16[12][search_index] + p_sad16x16[13][search_index] +
    2931           0 :             p_sad16x16[14][search_index] + p_sad16x16[15][search_index];
    2932           0 :         if (sad32x32_3 < p_best_sad32x32[3]) {
    2933           0 :             p_best_sad32x32[3] = sad32x32_3;
    2934           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2935           0 :             y_mv = _MVYT(mv);
    2936           0 :             p_best_mv32x32[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2937             :         }
    2938             : 
    2939           0 :         sad64x64 = sad32x32_0 + sad32x32_1 + sad32x32_2 + sad32x32_3;
    2940           0 :         if (sad64x64 < p_best_sad64x64[0]) {
    2941           0 :             p_best_sad64x64[0] = sad64x64;
    2942           0 :             x_mv = _MVXT(mv) + (int16_t)search_index * 4;
    2943           0 :             y_mv = _MVYT(mv);
    2944           0 :             p_best_mv64x64[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    2945             :         }
    2946             :     }
    2947           0 : }
    2948             : 
    2949             : /*******************************************
    2950             :  * open_loop_me_get_search_point_results_block
    2951             :  *******************************************/
    2952           0 : static void open_loop_me_get_eight_search_point_results_block(
    2953             :     MeContext
    2954             :         *context_ptr,    // input parameter, ME context Ptr, used to get SB Ptr
    2955             :     uint32_t listIndex,  // input parameter, reference list index
    2956             :     uint32_t ref_pic_index,
    2957             :     uint32_t searchRegionIndex,  // input parameter, search area origin, used to
    2958             :                                  // point to reference samples
    2959             :     int32_t xSearchIndex,  // input parameter, search region position in the
    2960             :                            // horizontal direction, used to derive xMV
    2961             :     int32_t ySearchIndex  // input parameter, search region position in the
    2962             :                            // vertical direction, used to derive yMV
    2963             :     ) {
    2964             :     // uint32_t reflumaStride = refPicPtr->stride_y; // NADER
    2965             :     // uint8_t  *refPtr = refPicPtr->buffer_y; // NADER
    2966           0 :     uint32_t reflumaStride =
    2967             :         context_ptr->interpolated_full_stride[listIndex][ref_pic_index];
    2968           0 :     uint8_t *refPtr =
    2969           0 :         context_ptr->integer_buffer_ptr[listIndex][ref_pic_index] +
    2970           0 :         ((ME_FILTER_TAP >> 1) *
    2971           0 :          context_ptr->interpolated_full_stride[listIndex][ref_pic_index]) +
    2972           0 :         (ME_FILTER_TAP >> 1) + searchRegionIndex;
    2973             : 
    2974           0 :     uint32_t currMV1 = (((uint16_t)ySearchIndex) << 18);
    2975           0 :     uint16_t currMV2 = (((uint16_t)xSearchIndex << 2));
    2976           0 :     uint32_t currMV = currMV1 | currMV2;
    2977             : 
    2978           0 :     ext_all_sad_calculation_8x8_16x16(
    2979             :         context_ptr->sb_src_ptr,
    2980             :         context_ptr->sb_src_stride,
    2981             :         refPtr,
    2982             :         reflumaStride,
    2983             :         currMV,
    2984             :         context_ptr->p_best_sad8x8,
    2985             :         context_ptr->p_best_sad16x16,
    2986             :         context_ptr->p_best_mv8x8,
    2987             :         context_ptr->p_best_mv16x16,
    2988           0 :         context_ptr->p_eight_sad16x16,
    2989           0 :         context_ptr->p_eight_sad8x8);
    2990             : 
    2991           0 :     ext_eight_sad_calculation_32x32_64x64(
    2992           0 :         context_ptr->p_eight_sad16x16,
    2993             :         context_ptr->p_best_sad32x32,
    2994             :         context_ptr->p_best_sad64x64,
    2995             :         context_ptr->p_best_mv32x32,
    2996             :         context_ptr->p_best_mv64x64,
    2997             :         currMV,
    2998           0 :         context_ptr->p_eight_sad32x32);
    2999             : 
    3000           0 :     ext_eigth_sad_calculation_nsq(
    3001           0 :         context_ptr->p_eight_sad8x8,
    3002           0 :         context_ptr->p_eight_sad16x16,
    3003           0 :         context_ptr->p_eight_sad32x32,
    3004             :         context_ptr->p_best_sad64x32,
    3005             :         context_ptr->p_best_mv64x32,
    3006             :         context_ptr->p_best_sad32x16,
    3007             :         context_ptr->p_best_mv32x16,
    3008             :         context_ptr->p_best_sad16x8,
    3009             :         context_ptr->p_best_mv16x8,
    3010             :         context_ptr->p_best_sad32x64,
    3011             :         context_ptr->p_best_mv32x64,
    3012             :         context_ptr->p_best_sad16x32,
    3013             :         context_ptr->p_best_mv16x32,
    3014             :         context_ptr->p_best_sad8x16,
    3015             :         context_ptr->p_best_mv8x16,
    3016             :         context_ptr->p_best_sad32x8,
    3017             :         context_ptr->p_best_mv32x8,
    3018             :         context_ptr->p_best_sad8x32,
    3019             :         context_ptr->p_best_mv8x32,
    3020             :         context_ptr->p_best_sad64x16,
    3021             :         context_ptr->p_best_mv64x16,
    3022             :         context_ptr->p_best_sad16x64,
    3023             :         context_ptr->p_best_mv16x64,
    3024             :         currMV);
    3025           0 : }
    3026             : 
    3027             : /*******************************************
    3028             :  * nsq_get_analysis_results_block returns the
    3029             :  * the best partition for each sq_block based
    3030             :  * on the ME SAD
    3031             :  *******************************************/
    3032           0 : static void nsq_get_analysis_results_block(MeContext *context_ptr) {
    3033           0 :     uint32_t *p_best_sad64x32 = context_ptr->p_best_sad64x32;
    3034           0 :     uint32_t *p_best_sad32x16 = context_ptr->p_best_sad32x16;
    3035           0 :     uint32_t *p_best_sad16x8 = context_ptr->p_best_sad16x8;
    3036           0 :     uint32_t *p_best_sad32x64 = context_ptr->p_best_sad32x64;
    3037           0 :     uint32_t *p_best_sad16x32 = context_ptr->p_best_sad16x32;
    3038           0 :     uint32_t *p_best_sad8x16 = context_ptr->p_best_sad8x16;
    3039           0 :     uint32_t *p_best_sad32x8 = context_ptr->p_best_sad32x8;
    3040           0 :     uint32_t *p_best_sad8x32 = context_ptr->p_best_sad8x32;
    3041           0 :     uint32_t *p_best_sad64x16 = context_ptr->p_best_sad64x16;
    3042           0 :     uint32_t *p_best_sad16x64 = context_ptr->p_best_sad16x64;
    3043           0 :     uint8_t *p_best_nsq_64x64 = context_ptr->p_best_nsq64x64;
    3044           0 :     uint8_t *p_best_nsq_32x32 = context_ptr->p_best_nsq32x32;
    3045           0 :     uint8_t *p_best_nsq_16x16 = context_ptr->p_best_nsq16x16;
    3046           0 :     uint8_t *p_best_nsq_8x8 = context_ptr->p_best_nsq8x8;
    3047             : 
    3048           0 :     nsq_me_analysis(p_best_sad64x32,
    3049             :                     p_best_sad32x16,
    3050             :                     p_best_sad16x8,
    3051             :                     p_best_sad32x64,
    3052             :                     p_best_sad16x32,
    3053             :                     p_best_sad8x16,
    3054             :                     p_best_sad32x8,
    3055             :                     p_best_sad8x32,
    3056             :                     p_best_sad64x16,
    3057             :                     p_best_sad16x64,
    3058             :                     p_best_nsq_64x64,
    3059             :                     p_best_nsq_32x32,
    3060             :                     p_best_nsq_16x16,
    3061             :                     p_best_nsq_8x8);
    3062           0 : }
    3063             : 
    3064             : /*******************************************
    3065             :  * open_loop_me_get_search_point_results_block
    3066             :  *******************************************/
    3067           0 : static void open_loop_me_get_search_point_results_block(
    3068             :     MeContext
    3069             :         *context_ptr,    // input parameter, ME context Ptr, used to get SB Ptr
    3070             :     uint32_t listIndex,  // input parameter, reference list index
    3071             :     uint32_t ref_pic_index,
    3072             :     uint32_t searchRegionIndex,  // input parameter, search area origin, used to
    3073             :                                  // point to reference samples
    3074             :     int32_t xSearchIndex,  // input parameter, search region position in the
    3075             :                            // horizontal direction, used to derive xMV
    3076             :     int32_t ySearchIndex)  // input parameter, search region position in the
    3077             :                            // vertical direction, used to derive yMV
    3078             : {
    3079           0 :     const EbBool sub_sad = (context_ptr->me_search_method == SUB_SAD_SEARCH);
    3080           0 :     uint8_t *src_ptr = context_ptr->sb_src_ptr;
    3081             : 
    3082             :     // uint8_t  *refPtr = refPicPtr->buffer_y; // NADER
    3083           0 :     uint8_t *refPtr =
    3084           0 :         context_ptr->integer_buffer_ptr[listIndex][ref_pic_index] +
    3085           0 :         (ME_FILTER_TAP >> 1) +
    3086           0 :         ((ME_FILTER_TAP >> 1) *
    3087           0 :          context_ptr->interpolated_full_stride[listIndex][ref_pic_index]);
    3088             :     // uint32_t reflumaStride = refPicPtr->stride_y; // NADER
    3089           0 :     uint32_t reflumaStride =
    3090             :         context_ptr->interpolated_full_stride[listIndex][ref_pic_index];
    3091           0 :     uint32_t searchPositionTLIndex = searchRegionIndex;
    3092             :     uint32_t searchPositionIndex;
    3093             :     uint32_t blockIndex;
    3094           0 :     uint32_t srcNext16x16Offset = (BLOCK_SIZE_64 << 4);
    3095             :     // uint32_t refNext16x16Offset = (refPicPtr->stride_y << 4); // NADER
    3096           0 :     uint32_t refNext16x16Offset = (reflumaStride << 4);
    3097           0 :     uint32_t currMV1 = (((uint16_t)ySearchIndex) << 18);
    3098           0 :     uint16_t currMV2 = (((uint16_t)xSearchIndex << 2));
    3099           0 :     uint32_t currMV = currMV1 | currMV2;
    3100           0 :     uint32_t *p_best_sad8x8 = context_ptr->p_best_sad8x8;
    3101           0 :     uint32_t *p_best_sad16x16 = context_ptr->p_best_sad16x16;
    3102           0 :     uint32_t *p_best_sad32x32 = context_ptr->p_best_sad32x32;
    3103           0 :     uint32_t *p_best_sad64x64 = context_ptr->p_best_sad64x64;
    3104           0 :     uint32_t *p_best_sad64x32 = context_ptr->p_best_sad64x32;
    3105           0 :     uint32_t *p_best_sad32x16 = context_ptr->p_best_sad32x16;
    3106           0 :     uint32_t *p_best_sad16x8 = context_ptr->p_best_sad16x8;
    3107           0 :     uint32_t *p_best_sad32x64 = context_ptr->p_best_sad32x64;
    3108           0 :     uint32_t *p_best_sad16x32 = context_ptr->p_best_sad16x32;
    3109           0 :     uint32_t *p_best_sad8x16 = context_ptr->p_best_sad8x16;
    3110           0 :     uint32_t *p_best_sad32x8 = context_ptr->p_best_sad32x8;
    3111           0 :     uint32_t *p_best_sad8x32 = context_ptr->p_best_sad8x32;
    3112           0 :     uint32_t *p_best_sad64x16 = context_ptr->p_best_sad64x16;
    3113           0 :     uint32_t *p_best_sad16x64 = context_ptr->p_best_sad16x64;
    3114           0 :     uint32_t *p_best_mv8x8 = context_ptr->p_best_mv8x8;
    3115           0 :     uint32_t *p_best_mv16x16 = context_ptr->p_best_mv16x16;
    3116           0 :     uint32_t *p_best_mv32x32 = context_ptr->p_best_mv32x32;
    3117           0 :     uint32_t *p_best_mv64x64 = context_ptr->p_best_mv64x64;
    3118           0 :     uint32_t *p_best_mv64x32 = context_ptr->p_best_mv64x32;
    3119           0 :     uint32_t *p_best_mv32x16 = context_ptr->p_best_mv32x16;
    3120           0 :     uint32_t *p_best_mv16x8 = context_ptr->p_best_mv16x8;
    3121           0 :     uint32_t *p_best_mv32x64 = context_ptr->p_best_mv32x64;
    3122           0 :     uint32_t *p_best_mv16x32 = context_ptr->p_best_mv16x32;
    3123           0 :     uint32_t *p_best_mv8x16 = context_ptr->p_best_mv8x16;
    3124           0 :     uint32_t *p_best_mv32x8 = context_ptr->p_best_mv32x8;
    3125           0 :     uint32_t *p_best_mv8x32 = context_ptr->p_best_mv8x32;
    3126           0 :     uint32_t *p_sad32x32 = context_ptr->p_sad32x32;
    3127           0 :     uint32_t *p_sad16x16 = context_ptr->p_sad16x16;
    3128           0 :     uint32_t *p_sad8x8 = context_ptr->p_sad8x8;
    3129           0 :     uint32_t *p_best_mv64x16 = context_ptr->p_best_mv64x16;
    3130           0 :     uint32_t *p_best_mv16x64 = context_ptr->p_best_mv16x64;
    3131             : 
    3132             :     // TODO: blockIndex searchPositionIndex could be removed
    3133             : 
    3134           0 :     const uint32_t src_stride = context_ptr->sb_src_stride;
    3135           0 :     srcNext16x16Offset = src_stride << 4;
    3136             : 
    3137             :     //---- 16x16 : 0
    3138           0 :     blockIndex = 0;
    3139           0 :     searchPositionIndex = searchPositionTLIndex;
    3140             : 
    3141           0 :     ext_sad_calculation_8x8_16x16(
    3142             :         src_ptr + blockIndex,
    3143             :         src_stride,
    3144             :         refPtr + searchPositionIndex,
    3145             :         reflumaStride,
    3146             :         &p_best_sad8x8[0],
    3147             :         &p_best_sad16x16[0],
    3148             :         &p_best_mv8x8[0],
    3149             :         &p_best_mv16x16[0],
    3150             :         currMV,
    3151             :         &p_sad16x16[0],
    3152             :         &p_sad8x8[0],
    3153             :         sub_sad);
    3154             : 
    3155             :     //---- 16x16 : 1
    3156           0 :     blockIndex = blockIndex + 16;
    3157           0 :     searchPositionIndex = searchPositionTLIndex + 16;
    3158           0 :     ext_sad_calculation_8x8_16x16(
    3159             :         src_ptr + blockIndex,
    3160             :         src_stride,
    3161             :         refPtr + searchPositionIndex,
    3162             :         reflumaStride,
    3163             :         &p_best_sad8x8[4],
    3164             :         &p_best_sad16x16[1],
    3165             :         &p_best_mv8x8[4],
    3166             :         &p_best_mv16x16[1],
    3167             :         currMV,
    3168             :         &p_sad16x16[1],
    3169             :         &p_sad8x8[4],
    3170             :         sub_sad);
    3171             :     //---- 16x16 : 4
    3172           0 :     blockIndex = blockIndex + 16;
    3173           0 :     searchPositionIndex = searchPositionIndex + 16;
    3174             : 
    3175           0 :     ext_sad_calculation_8x8_16x16(
    3176             :         src_ptr + blockIndex,
    3177             :         src_stride,
    3178             :         refPtr + searchPositionIndex,
    3179             :         reflumaStride,
    3180             :         &p_best_sad8x8[16],
    3181             :         &p_best_sad16x16[4],
    3182             :         &p_best_mv8x8[16],
    3183             :         &p_best_mv16x16[4],
    3184             :         currMV,
    3185             :         &p_sad16x16[4],
    3186             :         &p_sad8x8[16],
    3187             :         sub_sad);
    3188             : 
    3189             :     //---- 16x16 : 5
    3190           0 :     blockIndex = blockIndex + 16;
    3191           0 :     searchPositionIndex = searchPositionIndex + 16;
    3192           0 :     ext_sad_calculation_8x8_16x16(
    3193             :         src_ptr + blockIndex,
    3194             :         src_stride,
    3195             :         refPtr + searchPositionIndex,
    3196             :         reflumaStride,
    3197             :         &p_best_sad8x8[20],
    3198             :         &p_best_sad16x16[5],
    3199             :         &p_best_mv8x8[20],
    3200             :         &p_best_mv16x16[5],
    3201             :         currMV,
    3202             :         &p_sad16x16[5],
    3203             :         &p_sad8x8[20],
    3204             :         sub_sad);
    3205             : 
    3206             :     //---- 16x16 : 2
    3207           0 :     blockIndex = srcNext16x16Offset;
    3208           0 :     searchPositionIndex = searchPositionTLIndex + refNext16x16Offset;
    3209           0 :     ext_sad_calculation_8x8_16x16(
    3210             :         src_ptr + blockIndex,
    3211             :         src_stride,
    3212             :         refPtr + searchPositionIndex,
    3213             :         reflumaStride,
    3214             :         &p_best_sad8x8[8],
    3215             :         &p_best_sad16x16[2],
    3216             :         &p_best_mv8x8[8],
    3217             :         &p_best_mv16x16[2],
    3218             :         currMV,
    3219             :         &p_sad16x16[2],
    3220             :         &p_sad8x8[8],
    3221             :         sub_sad);
    3222             :     //---- 16x16 : 3
    3223           0 :     blockIndex = blockIndex + 16;
    3224           0 :     searchPositionIndex = searchPositionIndex + 16;
    3225           0 :     ext_sad_calculation_8x8_16x16(
    3226             :         src_ptr + blockIndex,
    3227             :         src_stride,
    3228             :         refPtr + searchPositionIndex,
    3229             :         reflumaStride,
    3230             :         &p_best_sad8x8[12],
    3231             :         &p_best_sad16x16[3],
    3232             :         &p_best_mv8x8[12],
    3233             :         &p_best_mv16x16[3],
    3234             :         currMV,
    3235             :         &p_sad16x16[3],
    3236             :         &p_sad8x8[12],
    3237             :         sub_sad);
    3238             :     //---- 16x16 : 6
    3239           0 :     blockIndex = blockIndex + 16;
    3240           0 :     searchPositionIndex = searchPositionIndex + 16;
    3241           0 :     ext_sad_calculation_8x8_16x16(
    3242             :         src_ptr + blockIndex,
    3243             :         src_stride,
    3244             :         refPtr + searchPositionIndex,
    3245             :         reflumaStride,
    3246             :         &p_best_sad8x8[24],
    3247             :         &p_best_sad16x16[6],
    3248             :         &p_best_mv8x8[24],
    3249             :         &p_best_mv16x16[6],
    3250             :         currMV,
    3251             :         &p_sad16x16[6],
    3252             :         &p_sad8x8[24],
    3253             :         sub_sad);
    3254             :     //---- 16x16 : 7
    3255           0 :     blockIndex = blockIndex + 16;
    3256           0 :     searchPositionIndex = searchPositionIndex + 16;
    3257           0 :     ext_sad_calculation_8x8_16x16(
    3258             :         src_ptr + blockIndex,
    3259             :         src_stride,
    3260             :         refPtr + searchPositionIndex,
    3261             :         reflumaStride,
    3262             :         &p_best_sad8x8[28],
    3263             :         &p_best_sad16x16[7],
    3264             :         &p_best_mv8x8[28],
    3265             :         &p_best_mv16x16[7],
    3266             :         currMV,
    3267             :         &p_sad16x16[7],
    3268             :         &p_sad8x8[28],
    3269             :         sub_sad);
    3270             : 
    3271             :     //---- 16x16 : 8
    3272           0 :     blockIndex = (srcNext16x16Offset << 1);
    3273           0 :     searchPositionIndex = searchPositionTLIndex + (refNext16x16Offset << 1);
    3274           0 :     ext_sad_calculation_8x8_16x16(
    3275             :         src_ptr + blockIndex,
    3276             :         src_stride,
    3277             :         refPtr + searchPositionIndex,
    3278             :         reflumaStride,
    3279             :         &p_best_sad8x8[32],
    3280             :         &p_best_sad16x16[8],
    3281             :         &p_best_mv8x8[32],
    3282             :         &p_best_mv16x16[8],
    3283             :         currMV,
    3284             :         &p_sad16x16[8],
    3285             :         &p_sad8x8[32],
    3286             :         sub_sad);
    3287             :     //---- 16x16 : 9
    3288           0 :     blockIndex = blockIndex + 16;
    3289           0 :     searchPositionIndex = searchPositionIndex + 16;
    3290           0 :     ext_sad_calculation_8x8_16x16(
    3291             :         src_ptr + blockIndex,
    3292             :         src_stride,
    3293             :         refPtr + searchPositionIndex,
    3294             :         reflumaStride,
    3295             :         &p_best_sad8x8[36],
    3296             :         &p_best_sad16x16[9],
    3297             :         &p_best_mv8x8[36],
    3298             :         &p_best_mv16x16[9],
    3299             :         currMV,
    3300             :         &p_sad16x16[9],
    3301             :         &p_sad8x8[36],
    3302             :         sub_sad);
    3303             :     //---- 16x16 : 12
    3304           0 :     blockIndex = blockIndex + 16;
    3305           0 :     searchPositionIndex = searchPositionIndex + 16;
    3306           0 :     ext_sad_calculation_8x8_16x16(
    3307             :         src_ptr + blockIndex,
    3308             :         src_stride,
    3309             :         refPtr + searchPositionIndex,
    3310             :         reflumaStride,
    3311             :         &p_best_sad8x8[48],
    3312             :         &p_best_sad16x16[12],
    3313             :         &p_best_mv8x8[48],
    3314             :         &p_best_mv16x16[12],
    3315             :         currMV,
    3316             :         &p_sad16x16[12],
    3317             :         &p_sad8x8[48],
    3318             :         sub_sad);
    3319             :     //---- 16x16 : 13
    3320           0 :     blockIndex = blockIndex + 16;
    3321           0 :     searchPositionIndex = searchPositionIndex + 16;
    3322           0 :     ext_sad_calculation_8x8_16x16(
    3323             :         src_ptr + blockIndex,
    3324             :         src_stride,
    3325             :         refPtr + searchPositionIndex,
    3326             :         reflumaStride,
    3327             :         &p_best_sad8x8[52],
    3328             :         &p_best_sad16x16[13],
    3329             :         &p_best_mv8x8[52],
    3330             :         &p_best_mv16x16[13],
    3331             :         currMV,
    3332             :         &p_sad16x16[13],
    3333             :         &p_sad8x8[52],
    3334             :         sub_sad);
    3335             : 
    3336             :     //---- 16x16 : 10
    3337           0 :     blockIndex = (srcNext16x16Offset * 3);
    3338           0 :     searchPositionIndex = searchPositionTLIndex + (refNext16x16Offset * 3);
    3339           0 :     ext_sad_calculation_8x8_16x16(
    3340             :         src_ptr + blockIndex,
    3341             :         src_stride,
    3342             :         refPtr + searchPositionIndex,
    3343             :         reflumaStride,
    3344             :         &p_best_sad8x8[40],
    3345             :         &p_best_sad16x16[10],
    3346             :         &p_best_mv8x8[40],
    3347             :         &p_best_mv16x16[10],
    3348             :         currMV,
    3349             :         &p_sad16x16[10],
    3350             :         &p_sad8x8[40],
    3351             :         sub_sad);
    3352             :     //---- 16x16 : 11
    3353           0 :     blockIndex = blockIndex + 16;
    3354           0 :     searchPositionIndex = searchPositionIndex + 16;
    3355           0 :     ext_sad_calculation_8x8_16x16(
    3356             :         src_ptr + blockIndex,
    3357             :         src_stride,
    3358             :         refPtr + searchPositionIndex,
    3359             :         reflumaStride,
    3360             :         &p_best_sad8x8[44],
    3361             :         &p_best_sad16x16[11],
    3362             :         &p_best_mv8x8[44],
    3363             :         &p_best_mv16x16[11],
    3364             :         currMV,
    3365             :         &p_sad16x16[11],
    3366             :         &p_sad8x8[44],
    3367             :         sub_sad);
    3368             :     //---- 16x16 : 14
    3369           0 :     blockIndex = blockIndex + 16;
    3370           0 :     searchPositionIndex = searchPositionIndex + 16;
    3371           0 :     ext_sad_calculation_8x8_16x16(
    3372             :         src_ptr + blockIndex,
    3373             :         src_stride,
    3374             :         refPtr + searchPositionIndex,
    3375             :         reflumaStride,
    3376             :         &p_best_sad8x8[56],
    3377             :         &p_best_sad16x16[14],
    3378             :         &p_best_mv8x8[56],
    3379             :         &p_best_mv16x16[14],
    3380             :         currMV,
    3381             :         &p_sad16x16[14],
    3382             :         &p_sad8x8[56],
    3383             :         sub_sad);
    3384             :     //---- 16x16 : 15
    3385           0 :     blockIndex = blockIndex + 16;
    3386           0 :     searchPositionIndex = searchPositionIndex + 16;
    3387           0 :     ext_sad_calculation_8x8_16x16(
    3388             :         src_ptr + blockIndex,
    3389             :         src_stride,
    3390             :         refPtr + searchPositionIndex,
    3391             :         reflumaStride,
    3392             :         &p_best_sad8x8[60],
    3393             :         &p_best_sad16x16[15],
    3394             :         &p_best_mv8x8[60],
    3395             :         &p_best_mv16x16[15],
    3396             :         currMV,
    3397             :         &p_sad16x16[15],
    3398             :         &p_sad8x8[60],
    3399             :         sub_sad);
    3400             : 
    3401           0 :     ext_sad_calculation_32x32_64x64(p_sad16x16,
    3402             :                                     p_best_sad32x32,
    3403             :                                     p_best_sad64x64,
    3404             :                                     p_best_mv32x32,
    3405             :                                     p_best_mv64x64,
    3406             :                                     currMV,
    3407             :                                     &p_sad32x32[0]);
    3408             : 
    3409           0 :     ExtSadCalculation(p_sad8x8,
    3410             :                     p_sad16x16,
    3411             :                     p_sad32x32,
    3412             :                     p_best_sad64x32,
    3413             :                     p_best_mv64x32,
    3414             :                     p_best_sad32x16,
    3415             :                     p_best_mv32x16,
    3416             :                     p_best_sad16x8,
    3417             :                     p_best_mv16x8,
    3418             :                     p_best_sad32x64,
    3419             :                     p_best_mv32x64,
    3420             :                     p_best_sad16x32,
    3421             :                     p_best_mv16x32,
    3422             :                     p_best_sad8x16,
    3423             :                     p_best_mv8x16,
    3424             :                     p_best_sad32x8,
    3425             :                     p_best_mv32x8,
    3426             :                     p_best_sad8x32,
    3427             :                     p_best_mv8x32,
    3428             :                     p_best_sad64x16,
    3429             :                     p_best_mv64x16,
    3430             :                     p_best_sad16x64,
    3431             :                     p_best_mv16x64,
    3432             :                     currMV);
    3433           0 : }
    3434             : 
    3435             : /*******************************************
    3436             :  * GetSearchPointResults
    3437             :  *******************************************/
    3438           0 : static void GetSearchPointResults(
    3439             :     MeContext
    3440             :         *context_ptr,    // input parameter, ME context Ptr, used to get SB Ptr
    3441             :     uint32_t listIndex,  // input parameter, reference list index
    3442             :     uint32_t ref_pic_index,
    3443             :     uint32_t searchRegionIndex,  // input parameter, search area origin, used to
    3444             :                                  // point to reference samples
    3445             :     int32_t xSearchIndex,  // input parameter, search region position in the
    3446             :                            // horizontal direction, used to derive xMV
    3447             :     int32_t ySearchIndex)  // input parameter, search region position in the
    3448             :                            // vertical direction, used to derive yMV
    3449             : {
    3450           0 :     const EbBool sub_sad = (context_ptr->me_search_method == SUB_SAD_SEARCH);
    3451           0 :     uint8_t *src_ptr = context_ptr->sb_src_ptr;
    3452             : 
    3453             :     // uint8_t  *refPtr = refPicPtr->buffer_y; // NADER
    3454           0 :     uint8_t *refPtr =
    3455           0 :         context_ptr->integer_buffer_ptr[listIndex][ref_pic_index] +
    3456           0 :         (ME_FILTER_TAP >> 1) +
    3457           0 :         ((ME_FILTER_TAP >> 1) *
    3458           0 :          context_ptr->interpolated_full_stride[listIndex][ref_pic_index]);
    3459             :     // uint32_t reflumaStride = refPicPtr->stride_y; // NADER
    3460           0 :     uint32_t reflumaStride =
    3461             :         context_ptr->interpolated_full_stride[listIndex][ref_pic_index];
    3462             : 
    3463           0 :     uint32_t searchPositionTLIndex = searchRegionIndex;
    3464             :     uint32_t searchPositionIndex;
    3465             :     uint32_t blockIndex;
    3466             : 
    3467           0 :     uint32_t srcNext16x16Offset = (BLOCK_SIZE_64 << 4);
    3468             :     // uint32_t refNext16x16Offset = (refPicPtr->stride_y << 4); // NADER
    3469           0 :     uint32_t refNext16x16Offset = (reflumaStride << 4);
    3470             : 
    3471           0 :     uint32_t currMV1 = (((uint16_t)ySearchIndex) << 18);
    3472           0 :     uint16_t currMV2 = (((uint16_t)xSearchIndex << 2));
    3473           0 :     uint32_t currMV = currMV1 | currMV2;
    3474             : 
    3475           0 :     uint32_t *p_best_sad8x8 = context_ptr->p_best_sad8x8;
    3476           0 :     uint32_t *p_best_sad16x16 = context_ptr->p_best_sad16x16;
    3477           0 :     uint32_t *p_best_sad32x32 = context_ptr->p_best_sad32x32;
    3478           0 :     uint32_t *p_best_sad64x64 = context_ptr->p_best_sad64x64;
    3479             : 
    3480           0 :     uint32_t *p_best_mv8x8 = context_ptr->p_best_mv8x8;
    3481           0 :     uint32_t *p_best_mv16x16 = context_ptr->p_best_mv16x16;
    3482           0 :     uint32_t *p_best_mv32x32 = context_ptr->p_best_mv32x32;
    3483           0 :     uint32_t *p_best_mv64x64 = context_ptr->p_best_mv64x64;
    3484           0 :     uint32_t *p_sad16x16 = context_ptr->p_sad16x16;
    3485             : 
    3486             :     // TODO: blockIndex searchPositionIndex could be removed
    3487             : 
    3488           0 :     const uint32_t src_stride = context_ptr->sb_src_stride;
    3489           0 :     srcNext16x16Offset = src_stride << 4;
    3490             : 
    3491             :     //---- 16x16 : 0
    3492           0 :     blockIndex = 0;
    3493           0 :     searchPositionIndex = searchPositionTLIndex;
    3494             : 
    3495           0 :     sad_calculation_8x8_16x16(
    3496             :         src_ptr + blockIndex,
    3497             :         src_stride,
    3498             :         refPtr + searchPositionIndex,
    3499             :         reflumaStride,
    3500             :         &p_best_sad8x8[0],
    3501             :         &p_best_sad16x16[0],
    3502             :         &p_best_mv8x8[0],
    3503             :         &p_best_mv16x16[0],
    3504             :         currMV,
    3505             :         &p_sad16x16[0],
    3506             :         sub_sad);
    3507             : 
    3508             :     //---- 16x16 : 1
    3509           0 :     blockIndex = blockIndex + 16;
    3510           0 :     searchPositionIndex = searchPositionTLIndex + 16;
    3511           0 :     sad_calculation_8x8_16x16(
    3512             :         src_ptr + blockIndex,
    3513             :         src_stride,
    3514             :         refPtr + searchPositionIndex,
    3515             :         reflumaStride,
    3516             :         &p_best_sad8x8[4],
    3517             :         &p_best_sad16x16[1],
    3518             :         &p_best_mv8x8[4],
    3519             :         &p_best_mv16x16[1],
    3520             :         currMV,
    3521             :         &p_sad16x16[1],
    3522             :         sub_sad);
    3523             :     //---- 16x16 : 4
    3524           0 :     blockIndex = blockIndex + 16;
    3525           0 :     searchPositionIndex = searchPositionIndex + 16;
    3526             : 
    3527           0 :     sad_calculation_8x8_16x16(
    3528             :         src_ptr + blockIndex,
    3529             :         src_stride,
    3530             :         refPtr + searchPositionIndex,
    3531             :         reflumaStride,
    3532             :         &p_best_sad8x8[16],
    3533             :         &p_best_sad16x16[4],
    3534             :         &p_best_mv8x8[16],
    3535             :         &p_best_mv16x16[4],
    3536             :         currMV,
    3537             :         &p_sad16x16[4],
    3538             :         sub_sad);
    3539             : 
    3540             :     //---- 16x16 : 5
    3541           0 :     blockIndex = blockIndex + 16;
    3542           0 :     searchPositionIndex = searchPositionIndex + 16;
    3543           0 :     sad_calculation_8x8_16x16(
    3544             :         src_ptr + blockIndex,
    3545             :         src_stride,
    3546             :         refPtr + searchPositionIndex,
    3547             :         reflumaStride,
    3548             :         &p_best_sad8x8[20],
    3549             :         &p_best_sad16x16[5],
    3550             :         &p_best_mv8x8[20],
    3551             :         &p_best_mv16x16[5],
    3552             :         currMV,
    3553             :         &p_sad16x16[5],
    3554             :         sub_sad);
    3555             : 
    3556             :     //---- 16x16 : 2
    3557           0 :     blockIndex = srcNext16x16Offset;
    3558           0 :     searchPositionIndex = searchPositionTLIndex + refNext16x16Offset;
    3559           0 :     sad_calculation_8x8_16x16(
    3560             :         src_ptr + blockIndex,
    3561             :         src_stride,
    3562             :         refPtr + searchPositionIndex,
    3563             :         reflumaStride,
    3564             :         &p_best_sad8x8[8],
    3565             :         &p_best_sad16x16[2],
    3566             :         &p_best_mv8x8[8],
    3567             :         &p_best_mv16x16[2],
    3568             :         currMV,
    3569             :         &p_sad16x16[2],
    3570             :         sub_sad);
    3571             :     //---- 16x16 : 3
    3572           0 :     blockIndex = blockIndex + 16;
    3573           0 :     searchPositionIndex = searchPositionIndex + 16;
    3574           0 :     sad_calculation_8x8_16x16(
    3575             :         src_ptr + blockIndex,
    3576             :         src_stride,
    3577             :         refPtr + searchPositionIndex,
    3578             :         reflumaStride,
    3579             :         &p_best_sad8x8[12],
    3580             :         &p_best_sad16x16[3],
    3581             :         &p_best_mv8x8[12],
    3582             :         &p_best_mv16x16[3],
    3583             :         currMV,
    3584             :         &p_sad16x16[3],
    3585             :         sub_sad);
    3586             :     //---- 16x16 : 6
    3587           0 :     blockIndex = blockIndex + 16;
    3588           0 :     searchPositionIndex = searchPositionIndex + 16;
    3589           0 :     sad_calculation_8x8_16x16(
    3590             :         src_ptr + blockIndex,
    3591             :         src_stride,
    3592             :         refPtr + searchPositionIndex,
    3593             :         reflumaStride,
    3594             :         &p_best_sad8x8[24],
    3595             :         &p_best_sad16x16[6],
    3596             :         &p_best_mv8x8[24],
    3597             :         &p_best_mv16x16[6],
    3598             :         currMV,
    3599             :         &p_sad16x16[6],
    3600             :         sub_sad);
    3601             :     //---- 16x16 : 7
    3602           0 :     blockIndex = blockIndex + 16;
    3603           0 :     searchPositionIndex = searchPositionIndex + 16;
    3604           0 :     sad_calculation_8x8_16x16(
    3605             :         src_ptr + blockIndex,
    3606             :         src_stride,
    3607             :         refPtr + searchPositionIndex,
    3608             :         reflumaStride,
    3609             :         &p_best_sad8x8[28],
    3610             :         &p_best_sad16x16[7],
    3611             :         &p_best_mv8x8[28],
    3612             :         &p_best_mv16x16[7],
    3613             :         currMV,
    3614             :         &p_sad16x16[7],
    3615             :         sub_sad);
    3616             : 
    3617             :     //---- 16x16 : 8
    3618           0 :     blockIndex = (srcNext16x16Offset << 1);
    3619           0 :     searchPositionIndex = searchPositionTLIndex + (refNext16x16Offset << 1);
    3620           0 :     sad_calculation_8x8_16x16(
    3621             :         src_ptr + blockIndex,
    3622             :         src_stride,
    3623             :         refPtr + searchPositionIndex,
    3624             :         reflumaStride,
    3625             :         &p_best_sad8x8[32],
    3626             :         &p_best_sad16x16[8],
    3627             :         &p_best_mv8x8[32],
    3628             :         &p_best_mv16x16[8],
    3629             :         currMV,
    3630             :         &p_sad16x16[8],
    3631             :         sub_sad);
    3632             :     //---- 16x16 : 9
    3633           0 :     blockIndex = blockIndex + 16;
    3634           0 :     searchPositionIndex = searchPositionIndex + 16;
    3635           0 :     sad_calculation_8x8_16x16(
    3636             :         src_ptr + blockIndex,
    3637             :         src_stride,
    3638             :         refPtr + searchPositionIndex,
    3639             :         reflumaStride,
    3640             :         &p_best_sad8x8[36],
    3641             :         &p_best_sad16x16[9],
    3642             :         &p_best_mv8x8[36],
    3643             :         &p_best_mv16x16[9],
    3644             :         currMV,
    3645             :         &p_sad16x16[9],
    3646             :         sub_sad);
    3647             :     //---- 16x16 : 12
    3648           0 :     blockIndex = blockIndex + 16;
    3649           0 :     searchPositionIndex = searchPositionIndex + 16;
    3650           0 :     sad_calculation_8x8_16x16(
    3651             :         src_ptr + blockIndex,
    3652             :         src_stride,
    3653             :         refPtr + searchPositionIndex,
    3654             :         reflumaStride,
    3655             :         &p_best_sad8x8[48],
    3656             :         &p_best_sad16x16[12],
    3657             :         &p_best_mv8x8[48],
    3658             :         &p_best_mv16x16[12],
    3659             :         currMV,
    3660             :         &p_sad16x16[12],
    3661             :         sub_sad);
    3662             :     //---- 16x16 : 13
    3663           0 :     blockIndex = blockIndex + 16;
    3664           0 :     searchPositionIndex = searchPositionIndex + 16;
    3665           0 :     sad_calculation_8x8_16x16(
    3666             :         src_ptr + blockIndex,
    3667             :         src_stride,
    3668             :         refPtr + searchPositionIndex,
    3669             :         reflumaStride,
    3670             :         &p_best_sad8x8[52],
    3671             :         &p_best_sad16x16[13],
    3672             :         &p_best_mv8x8[52],
    3673             :         &p_best_mv16x16[13],
    3674             :         currMV,
    3675             :         &p_sad16x16[13],
    3676             :         sub_sad);
    3677             : 
    3678             :     //---- 16x16 : 10
    3679           0 :     blockIndex = (srcNext16x16Offset * 3);
    3680           0 :     searchPositionIndex = searchPositionTLIndex + (refNext16x16Offset * 3);
    3681           0 :     sad_calculation_8x8_16x16(
    3682             :         src_ptr + blockIndex,
    3683             :         src_stride,
    3684             :         refPtr + searchPositionIndex,
    3685             :         reflumaStride,
    3686             :         &p_best_sad8x8[40],
    3687             :         &p_best_sad16x16[10],
    3688             :         &p_best_mv8x8[40],
    3689             :         &p_best_mv16x16[10],
    3690             :         currMV,
    3691             :         &p_sad16x16[10],
    3692             :         sub_sad);
    3693             :     //---- 16x16 : 11
    3694           0 :     blockIndex = blockIndex + 16;
    3695           0 :     searchPositionIndex = searchPositionIndex + 16;
    3696           0 :     sad_calculation_8x8_16x16(
    3697             :         src_ptr + blockIndex,
    3698             :         src_stride,
    3699             :         refPtr + searchPositionIndex,
    3700             :         reflumaStride,
    3701             :         &p_best_sad8x8[44],
    3702             :         &p_best_sad16x16[11],
    3703             :         &p_best_mv8x8[44],
    3704             :         &p_best_mv16x16[11],
    3705             :         currMV,
    3706             :         &p_sad16x16[11],
    3707             :         sub_sad);
    3708             :     //---- 16x16 : 14
    3709           0 :     blockIndex = blockIndex + 16;
    3710           0 :     searchPositionIndex = searchPositionIndex + 16;
    3711           0 :     sad_calculation_8x8_16x16(
    3712             :         src_ptr + blockIndex,
    3713             :         src_stride,
    3714             :         refPtr + searchPositionIndex,
    3715             :         reflumaStride,
    3716             :         &p_best_sad8x8[56],
    3717             :         &p_best_sad16x16[14],
    3718             :         &p_best_mv8x8[56],
    3719             :         &p_best_mv16x16[14],
    3720             :         currMV,
    3721             :         &p_sad16x16[14],
    3722             :         sub_sad);
    3723             :     //---- 16x16 : 15
    3724           0 :     blockIndex = blockIndex + 16;
    3725           0 :     searchPositionIndex = searchPositionIndex + 16;
    3726           0 :     sad_calculation_8x8_16x16(
    3727             :         src_ptr + blockIndex,
    3728             :         src_stride,
    3729             :         refPtr + searchPositionIndex,
    3730             :         reflumaStride,
    3731             :         &p_best_sad8x8[60],
    3732             :         &p_best_sad16x16[15],
    3733             :         &p_best_mv8x8[60],
    3734             :         &p_best_mv16x16[15],
    3735             :         currMV,
    3736             :         &p_sad16x16[15],
    3737             :         sub_sad);
    3738             : 
    3739           0 :     sad_calculation_32x32_64x64(p_sad16x16,
    3740             :                                 p_best_sad32x32,
    3741             :                                 p_best_sad64x64,
    3742             :                                 p_best_mv32x32,
    3743             :                                 p_best_mv64x64,
    3744             :                                 currMV);
    3745           0 : }
    3746             : 
    3747             : /*******************************************
    3748             :  * GetEightHorizontalSearchPointResultsAll85CUs
    3749             :  *******************************************/
    3750           0 : static void GetEightHorizontalSearchPointResultsAll85PUs(
    3751             :     MeContext *context_ptr, uint32_t listIndex,
    3752             :     uint32_t ref_pic_index,
    3753             :     uint32_t searchRegionIndex,
    3754             :     int32_t xSearchIndex,  // input parameter, search region position in the
    3755             :                            // horizontal direction, used to derive xMV
    3756             :     int32_t ySearchIndex) {  // input parameter, search region position in the
    3757             :                              // vertical direction, used to derive yMV
    3758           0 :     const EbBool sub_sad = (context_ptr->me_search_method == SUB_SAD_SEARCH);
    3759           0 :     uint8_t *src_ptr = context_ptr->sb_src_ptr;
    3760           0 :     uint8_t *refPtr =
    3761           0 :         context_ptr->integer_buffer_ptr[listIndex][ref_pic_index] +
    3762           0 :         (ME_FILTER_TAP >> 1) +
    3763           0 :         ((ME_FILTER_TAP >> 1) *
    3764           0 :          context_ptr->interpolated_full_stride[listIndex][ref_pic_index]);
    3765           0 :     uint32_t reflumaStride =
    3766             :         context_ptr->interpolated_full_stride[listIndex][ref_pic_index];
    3767             : 
    3768           0 :     uint32_t searchPositionTLIndex = searchRegionIndex;
    3769             :     uint32_t searchPositionIndex;
    3770             :     uint32_t blockIndex;
    3771             : 
    3772           0 :     uint32_t srcNext16x16Offset = (BLOCK_SIZE_64 << 4);
    3773           0 :     uint32_t refNext16x16Offset = (reflumaStride << 4);
    3774             : 
    3775           0 :     uint32_t currMVy = (((uint16_t)ySearchIndex) << 18);
    3776           0 :     uint16_t currMVx = (((uint16_t)xSearchIndex << 2));
    3777           0 :     uint32_t currMV = currMVy | currMVx;
    3778             : 
    3779           0 :     uint32_t *p_best_sad8x8 = context_ptr->p_best_sad8x8;
    3780           0 :     uint32_t *p_best_sad16x16 = context_ptr->p_best_sad16x16;
    3781           0 :     uint32_t *p_best_sad32x32 = context_ptr->p_best_sad32x32;
    3782           0 :     uint32_t *p_best_sad64x64 = context_ptr->p_best_sad64x64;
    3783             : 
    3784           0 :     uint32_t *p_best_mv8x8 = context_ptr->p_best_mv8x8;
    3785           0 :     uint32_t *p_best_mv16x16 = context_ptr->p_best_mv16x16;
    3786           0 :     uint32_t *p_best_mv32x32 = context_ptr->p_best_mv32x32;
    3787           0 :     uint32_t *p_best_mv64x64 = context_ptr->p_best_mv64x64;
    3788             : 
    3789           0 :     uint16_t *p_sad16x16 = context_ptr->p_eight_pos_sad16x16;
    3790             : 
    3791             :     /*
    3792             :     ----------------------    ----------------------
    3793             :     |  16x16_0  |  16x16_1  |  16x16_4  |  16x16_5  |
    3794             :     ----------------------    ----------------------
    3795             :     |  16x16_2  |  16x16_3  |  16x16_6  |  16x16_7  |
    3796             :     -----------------------   -----------------------
    3797             :     |  16x16_8  |  16x16_9  |  16x16_12 |  16x16_13 |
    3798             :     ----------------------    ----------------------
    3799             :     |  16x16_10 |  16x16_11 |  16x16_14 |  16x16_15 |
    3800             :     -----------------------   -----------------------
    3801             :     */
    3802             : 
    3803           0 :     const uint32_t src_stride = context_ptr->sb_src_stride;
    3804           0 :     srcNext16x16Offset = src_stride << 4;
    3805             : 
    3806             :     //---- 16x16_0
    3807           0 :     blockIndex = 0;
    3808           0 :     searchPositionIndex = searchPositionTLIndex;
    3809           0 :     get_eight_horizontal_search_point_results_8x8_16x16_pu(
    3810             :                    src_ptr + blockIndex,
    3811             :                    context_ptr->sb_src_stride,
    3812             :                    refPtr + searchPositionIndex,
    3813             :                    reflumaStride,
    3814             :                    &p_best_sad8x8[0],
    3815             :                    &p_best_mv8x8[0],
    3816             :                    &p_best_sad16x16[0],
    3817             :                    &p_best_mv16x16[0],
    3818             :                    currMV,
    3819             :                    &p_sad16x16[0 * 8],
    3820             :                    sub_sad);
    3821             :     //---- 16x16_1
    3822           0 :     blockIndex = blockIndex + 16;
    3823           0 :     searchPositionIndex = searchPositionTLIndex + 16;
    3824           0 :     get_eight_horizontal_search_point_results_8x8_16x16_pu(
    3825             :                    src_ptr + blockIndex,
    3826             :                    context_ptr->sb_src_stride,
    3827             :                    refPtr + searchPositionIndex,
    3828             :                    reflumaStride,
    3829             :                    &p_best_sad8x8[4],
    3830             :                    &p_best_mv8x8[4],
    3831             :                    &p_best_sad16x16[1],
    3832             :                    &p_best_mv16x16[1],
    3833             :                    currMV,
    3834             :                    &p_sad16x16[1 * 8],
    3835             :                    sub_sad);
    3836             :     //---- 16x16_4
    3837           0 :     blockIndex = blockIndex + 16;
    3838           0 :     searchPositionIndex = searchPositionIndex + 16;
    3839           0 :     get_eight_horizontal_search_point_results_8x8_16x16_pu(
    3840             :                    src_ptr + blockIndex,
    3841             :                    context_ptr->sb_src_stride,
    3842             :                    refPtr + searchPositionIndex,
    3843             :                    reflumaStride,
    3844             :                    &p_best_sad8x8[16],
    3845             :                    &p_best_mv8x8[16],
    3846             :                    &p_best_sad16x16[4],
    3847             :                    &p_best_mv16x16[4],
    3848             :                    currMV,
    3849             :                    &p_sad16x16[4 * 8],
    3850             :                    sub_sad);
    3851             :     //---- 16x16_5
    3852           0 :     blockIndex = blockIndex + 16;
    3853           0 :     searchPositionIndex = searchPositionIndex + 16;
    3854           0 :     get_eight_horizontal_search_point_results_8x8_16x16_pu(
    3855             :                    src_ptr + blockIndex,
    3856             :                    context_ptr->sb_src_stride,
    3857             :                    refPtr + searchPositionIndex,
    3858             :                    reflumaStride,
    3859             :                    &p_best_sad8x8[20],
    3860             :                    &p_best_mv8x8[20],
    3861             :                    &p_best_sad16x16[5],
    3862             :                    &p_best_mv16x16[5],
    3863             :                    currMV,
    3864             :                    &p_sad16x16[5 * 8],
    3865             :                    sub_sad);
    3866             : 
    3867             :     //---- 16x16_2
    3868           0 :     blockIndex = srcNext16x16Offset;
    3869           0 :     searchPositionIndex = searchPositionTLIndex + refNext16x16Offset;
    3870           0 :     get_eight_horizontal_search_point_results_8x8_16x16_pu(
    3871             :                    src_ptr + blockIndex,
    3872             :                    context_ptr->sb_src_stride,
    3873             :                    refPtr + searchPositionIndex,
    3874             :                    reflumaStride,
    3875             :                    &p_best_sad8x8[8],
    3876             :                    &p_best_mv8x8[8],
    3877             :                    &p_best_sad16x16[2],
    3878             :                    &p_best_mv16x16[2],
    3879             :                    currMV,
    3880             :                    &p_sad16x16[2 * 8],
    3881             :                    sub_sad);
    3882             :     //---- 16x16_3
    3883           0 :     blockIndex = blockIndex + 16;
    3884           0 :     searchPositionIndex = searchPositionIndex + 16;
    3885           0 :     get_eight_horizontal_search_point_results_8x8_16x16_pu(
    3886             :                    src_ptr + blockIndex,
    3887             :                    context_ptr->sb_src_stride,
    3888             :                    refPtr + searchPositionIndex,
    3889             :                    reflumaStride,
    3890             :                    &p_best_sad8x8[12],
    3891             :                    &p_best_mv8x8[12],
    3892             :                    &p_best_sad16x16[3],
    3893             :                    &p_best_mv16x16[3],
    3894             :                    currMV,
    3895             :                    &p_sad16x16[3 * 8],
    3896             :                    sub_sad);
    3897             :     //---- 16x16_6
    3898           0 :     blockIndex = blockIndex + 16;
    3899           0 :     searchPositionIndex = searchPositionIndex + 16;
    3900           0 :     get_eight_horizontal_search_point_results_8x8_16x16_pu(
    3901             :                    src_ptr + blockIndex,
    3902             :                    context_ptr->sb_src_stride,
    3903             :                    refPtr + searchPositionIndex,
    3904             :                    reflumaStride,
    3905             :                    &p_best_sad8x8[24],
    3906             :                    &p_best_mv8x8[24],
    3907             :                    &p_best_sad16x16[6],
    3908             :                    &p_best_mv16x16[6],
    3909             :                    currMV,
    3910             :                    &p_sad16x16[6 * 8],
    3911             :                    sub_sad);
    3912             :     //---- 16x16_7
    3913           0 :     blockIndex = blockIndex + 16;
    3914           0 :     searchPositionIndex = searchPositionIndex + 16;
    3915           0 :     get_eight_horizontal_search_point_results_8x8_16x16_pu(
    3916             :                    src_ptr + blockIndex,
    3917             :                    context_ptr->sb_src_stride,
    3918             :                    refPtr + searchPositionIndex,
    3919             :                    reflumaStride,
    3920             :                    &p_best_sad8x8[28],
    3921             :                    &p_best_mv8x8[28],
    3922             :                    &p_best_sad16x16[7],
    3923             :                    &p_best_mv16x16[7],
    3924             :                    currMV,
    3925             :                    &p_sad16x16[7 * 8],
    3926             :                    sub_sad);
    3927             : 
    3928             :     //---- 16x16_8
    3929           0 :     blockIndex = (srcNext16x16Offset << 1);
    3930           0 :     searchPositionIndex = searchPositionTLIndex + (refNext16x16Offset << 1);
    3931           0 :     get_eight_horizontal_search_point_results_8x8_16x16_pu(
    3932             :                    src_ptr + blockIndex,
    3933             :                    context_ptr->sb_src_stride,
    3934             :                    refPtr + searchPositionIndex,
    3935             :                    reflumaStride,
    3936             :                    &p_best_sad8x8[32],
    3937             :                    &p_best_mv8x8[32],
    3938             :                    &p_best_sad16x16[8],
    3939             :                    &p_best_mv16x16[8],
    3940             :                    currMV,
    3941             :                    &p_sad16x16[8 * 8],
    3942             :                    sub_sad);
    3943             :     //---- 16x16_9
    3944           0 :     blockIndex = blockIndex + 16;
    3945           0 :     searchPositionIndex = searchPositionIndex + 16;
    3946           0 :     get_eight_horizontal_search_point_results_8x8_16x16_pu(
    3947             :                    src_ptr + blockIndex,
    3948             :                    context_ptr->sb_src_stride,
    3949             :                    refPtr + searchPositionIndex,
    3950             :                    reflumaStride,
    3951             :                    &p_best_sad8x8[36],
    3952             :                    &p_best_mv8x8[36],
    3953             :                    &p_best_sad16x16[9],
    3954             :                    &p_best_mv16x16[9],
    3955             :                    currMV,
    3956             :                    &p_sad16x16[9 * 8],
    3957             :                    sub_sad);
    3958             :     //---- 16x16_12
    3959           0 :     blockIndex = blockIndex + 16;
    3960           0 :     searchPositionIndex = searchPositionIndex + 16;
    3961           0 :     get_eight_horizontal_search_point_results_8x8_16x16_pu(
    3962             :                    src_ptr + blockIndex,
    3963             :                    context_ptr->sb_src_stride,
    3964             :                    refPtr + searchPositionIndex,
    3965             :                    reflumaStride,
    3966             :                    &p_best_sad8x8[48],
    3967             :                    &p_best_mv8x8[48],
    3968             :                    &p_best_sad16x16[12],
    3969             :                    &p_best_mv16x16[12],
    3970             :                    currMV,
    3971             :                    &p_sad16x16[12 * 8],
    3972             :                    sub_sad);
    3973             :     //---- 16x1_13
    3974           0 :     blockIndex = blockIndex + 16;
    3975           0 :     searchPositionIndex = searchPositionIndex + 16;
    3976           0 :     get_eight_horizontal_search_point_results_8x8_16x16_pu(
    3977             :                    src_ptr + blockIndex,
    3978             :                    context_ptr->sb_src_stride,
    3979             :                    refPtr + searchPositionIndex,
    3980             :                    reflumaStride,
    3981             :                    &p_best_sad8x8[52],
    3982             :                    &p_best_mv8x8[52],
    3983             :                    &p_best_sad16x16[13],
    3984             :                    &p_best_mv16x16[13],
    3985             :                    currMV,
    3986             :                    &p_sad16x16[13 * 8],
    3987             :                    sub_sad);
    3988             : 
    3989             :     //---- 16x16_10
    3990           0 :     blockIndex = (srcNext16x16Offset * 3);
    3991           0 :     searchPositionIndex = searchPositionTLIndex + (refNext16x16Offset * 3);
    3992           0 :     get_eight_horizontal_search_point_results_8x8_16x16_pu(
    3993             :                    src_ptr + blockIndex,
    3994             :                    context_ptr->sb_src_stride,
    3995             :                    refPtr + searchPositionIndex,
    3996             :                    reflumaStride,
    3997             :                    &p_best_sad8x8[40],
    3998             :                    &p_best_mv8x8[40],
    3999             :                    &p_best_sad16x16[10],
    4000             :                    &p_best_mv16x16[10],
    4001             :                    currMV,
    4002             :                    &p_sad16x16[10 * 8],
    4003             :                    sub_sad);
    4004             :     //---- 16x16_11
    4005           0 :     blockIndex = blockIndex + 16;
    4006           0 :     searchPositionIndex = searchPositionIndex + 16;
    4007           0 :     get_eight_horizontal_search_point_results_8x8_16x16_pu(
    4008             :                    src_ptr + blockIndex,
    4009             :                    context_ptr->sb_src_stride,
    4010             :                    refPtr + searchPositionIndex,
    4011             :                    reflumaStride,
    4012             :                    &p_best_sad8x8[44],
    4013             :                    &p_best_mv8x8[44],
    4014             :                    &p_best_sad16x16[11],
    4015             :                    &p_best_mv16x16[11],
    4016             :                    currMV,
    4017             :                    &p_sad16x16[11 * 8],
    4018             :                    sub_sad);
    4019             :     //---- 16x16_14
    4020           0 :     blockIndex = blockIndex + 16;
    4021           0 :     searchPositionIndex = searchPositionIndex + 16;
    4022           0 :     get_eight_horizontal_search_point_results_8x8_16x16_pu(
    4023             :                    src_ptr + blockIndex,
    4024             :                    context_ptr->sb_src_stride,
    4025             :                    refPtr + searchPositionIndex,
    4026             :                    reflumaStride,
    4027             :                    &p_best_sad8x8[56],
    4028             :                    &p_best_mv8x8[56],
    4029             :                    &p_best_sad16x16[14],
    4030             :                    &p_best_mv16x16[14],
    4031             :                    currMV,
    4032             :                    &p_sad16x16[14 * 8],
    4033             :                    sub_sad);
    4034             :     //---- 16x16_15
    4035           0 :     blockIndex = blockIndex + 16;
    4036           0 :     searchPositionIndex = searchPositionIndex + 16;
    4037           0 :     get_eight_horizontal_search_point_results_8x8_16x16_pu(
    4038             :                    src_ptr + blockIndex,
    4039             :                    context_ptr->sb_src_stride,
    4040             :                    refPtr + searchPositionIndex,
    4041             :                    reflumaStride,
    4042             :                    &p_best_sad8x8[60],
    4043             :                    &p_best_mv8x8[60],
    4044             :                    &p_best_sad16x16[15],
    4045             :                    &p_best_mv16x16[15],
    4046             :                    currMV,
    4047             :                    &p_sad16x16[15 * 8],
    4048             :                    sub_sad);
    4049             :     // 32x32 and 64x64
    4050           0 :     get_eight_horizontal_search_point_results_32x32_64x64_pu(
    4051             :                    p_sad16x16,
    4052             :                    p_best_sad32x32,
    4053             :                    p_best_sad64x64,
    4054             :                    p_best_mv32x32,
    4055             :                    p_best_mv64x64,
    4056             :                    currMV);
    4057           0 : }
    4058             : 
    4059             : /*******************************************
    4060             :  * FullPelSearch_LCU
    4061             :  *******************************************/
    4062           0 : static void FullPelSearch_LCU(MeContext *context_ptr, uint32_t listIndex,
    4063             :                               uint32_t ref_pic_index,
    4064             :                               int16_t x_search_area_origin,
    4065             :                               int16_t y_search_area_origin,
    4066             :                               uint32_t search_area_width,
    4067             :                               uint32_t search_area_height)
    4068             : {
    4069             :     uint32_t xSearchIndex, ySearchIndex;
    4070             : 
    4071           0 :     uint32_t searchAreaWidthRest8 = search_area_width & 7;
    4072           0 :     uint32_t searchAreaWidthMult8 = search_area_width - searchAreaWidthRest8;
    4073             : 
    4074           0 :     for (ySearchIndex = 0; ySearchIndex < search_area_height; ySearchIndex++) {
    4075           0 :         for (xSearchIndex = 0; xSearchIndex < searchAreaWidthMult8;
    4076           0 :              xSearchIndex += 8) {
    4077             :             // this function will do:  xSearchIndex, +1, +2, ..., +7
    4078           0 :             GetEightHorizontalSearchPointResultsAll85PUs(
    4079             :                 context_ptr,
    4080             :                 listIndex,
    4081             :                 ref_pic_index,
    4082             :                 xSearchIndex +
    4083           0 :                     ySearchIndex *
    4084             :                         context_ptr->interpolated_full_stride[listIndex]
    4085           0 :                                                              [ref_pic_index],
    4086           0 :                 (int32_t)xSearchIndex + x_search_area_origin,
    4087           0 :                 (int32_t)ySearchIndex + y_search_area_origin);
    4088             :         }
    4089             : 
    4090           0 :         for (xSearchIndex = searchAreaWidthMult8;
    4091             :              xSearchIndex < search_area_width;
    4092           0 :              xSearchIndex++) {
    4093           0 :             GetSearchPointResults(
    4094             :                 context_ptr,
    4095             :                 listIndex,
    4096             :                 ref_pic_index,
    4097             :                 xSearchIndex +
    4098           0 :                     ySearchIndex *
    4099             :                         context_ptr->interpolated_full_stride[listIndex]
    4100           0 :                                                              [ref_pic_index],
    4101           0 :                 (int32_t)xSearchIndex + x_search_area_origin,
    4102           0 :                 (int32_t)ySearchIndex + y_search_area_origin);
    4103             :         }
    4104             :     }
    4105           0 : }
    4106             : #if OPTIMISED_EX_SUBPEL
    4107             : /*******************************************
    4108             :  * PU_HalfPelRefinement
    4109             :  *   performs Half Pel refinement for one PU
    4110             :  *******************************************/
    4111           0 : static void half_pel_refinement_block(
    4112             :     MeContext
    4113             :     *context_ptr,  // input parameter, ME context Ptr, used to get SB Ptr
    4114             :     uint8_t *ref_buffer, uint32_t ref_stride, uint32_t *p_best_ssd,
    4115             :     uint32_t src_block_index,  // input parameter, PU origin, used to point to
    4116             :                                // source samples
    4117             :     uint8_t *pos_b_buffer,  // input parameter, position "b" interpolated search
    4118             :                             // area Ptr
    4119             :     uint8_t *pos_h_buffer,  // input parameter, position "h" interpolated search
    4120             :                             // area Ptr
    4121             :     uint8_t *pos_j_buffer,  // input parameter, position "j" interpolated search
    4122             :                             // area Ptr
    4123             :     uint32_t pu_width,      // input parameter, PU width
    4124             :     uint32_t pu_height,     // input parameter, PU height
    4125             :     int16_t x_search_area_origin,  // input parameter, search area origin in the
    4126             :                                    // horizontal direction, used to point to
    4127             :                                    // reference samples
    4128             :     int16_t y_search_area_origin,  // input parameter, search area origin in the
    4129             :                                    // vertical direction, used to point to
    4130             :                                    // reference samples
    4131             : #if OPTIMISED_EX_SUBPEL
    4132             :     uint32_t search_area_height,  // input parameter, search area height
    4133             :     uint32_t search_area_width,  // input parameter, search area width
    4134             : #endif
    4135             :     uint32_t *p_best_sad, uint32_t *p_best_mv,
    4136             :     uint8_t *p_sub_pel_direction, uint32_t *best_pervious_stage_mv,
    4137             :     uint32_t ineteger_mv) {
    4138             :     int32_t search_region_index;
    4139           0 :     uint64_t distortion_left_position = 0;
    4140           0 :     uint64_t distortion_top_position = 0;
    4141           0 :     uint64_t distortion_topleft_position = 0;
    4142           0 :     uint64_t distortion_topright_position = 0;
    4143             :     int16_t half_mv_x[8];
    4144             :     int16_t half_mv_y[8];
    4145             :     int16_t x_best_mv;
    4146             :     int16_t y_best_mv;
    4147             :     int16_t x_mv;
    4148             :     int16_t y_mv;
    4149             :     int16_t search_index_x;
    4150             :     int16_t search_index_y;
    4151             :     (void)p_sub_pel_direction;
    4152             :     (void)ineteger_mv;
    4153             :     // copute distance between best mv and the integer mv candidate
    4154             :     int16_t offset_x, offset_y;
    4155           0 :     for (offset_x = -H_PEL_SEARCH_WIND; offset_x <= H_PEL_SEARCH_WIND; offset_x++) {
    4156           0 :         for (offset_y = -H_PEL_SEARCH_WIND; offset_y <= H_PEL_SEARCH_WIND; offset_y++) {
    4157           0 :             x_best_mv = _MVXT(*best_pervious_stage_mv);
    4158           0 :             y_best_mv = _MVYT(*best_pervious_stage_mv);
    4159           0 :             x_mv = x_best_mv + (offset_x * 4);
    4160           0 :             y_mv = y_best_mv + (offset_y * 4);
    4161           0 :             search_index_x = (x_mv >> 2) - x_search_area_origin;
    4162           0 :             search_index_y = (y_mv >> 2) - y_search_area_origin;
    4163           0 :             uint32_t integer_mv1 = (((uint16_t)(y_mv >> 2)) << 18);
    4164           0 :             uint16_t integer_mv2 = (((uint16_t)(x_mv >> 2) << 2));
    4165           0 :             uint32_t integer_mv = integer_mv1 | integer_mv2;
    4166           0 :             if (search_index_x < 0 || search_index_x >(int16_t)(search_area_width - 1)) {
    4167           0 :                 continue;
    4168             :             }
    4169           0 :             if (search_index_y < 0 || search_index_y >(int16_t)(search_area_height - 1)) {
    4170           0 :                 continue;
    4171             :             }
    4172           0 :             half_mv_x[0] = x_mv - 2;  // L  position
    4173           0 :             half_mv_x[1] = x_mv + 2;  // R  position
    4174           0 :             half_mv_x[2] = x_mv;      // T  position
    4175           0 :             half_mv_x[3] = x_mv;      // B  position
    4176           0 :             half_mv_x[4] = x_mv - 2;  // TL position
    4177           0 :             half_mv_x[5] = x_mv + 2;  // TR position
    4178           0 :             half_mv_x[6] = x_mv + 2;  // BR position
    4179           0 :             half_mv_x[7] = x_mv - 2;  // BL position
    4180           0 :             half_mv_y[0] = y_mv;      // L  position
    4181           0 :             half_mv_y[1] = y_mv;      // R  position
    4182           0 :             half_mv_y[2] = y_mv - 2;  // T  position
    4183           0 :             half_mv_y[3] = y_mv + 2;  // B  position
    4184           0 :             half_mv_y[4] = y_mv - 2;  // TL position
    4185           0 :             half_mv_y[5] = y_mv - 2;  // TR position
    4186           0 :             half_mv_y[6] = y_mv + 2;  // BR position
    4187           0 :             half_mv_y[7] = y_mv + 2;  // BL position
    4188             :             // Compute SSD for the best full search candidate
    4189           0 :             if (context_ptr->fractional_search_method == SSD_SEARCH) {
    4190           0 :                 uint32_t integer_sse =
    4191           0 :                     (uint32_t)spatial_full_distortion_kernel(
    4192             :                         context_ptr->sb_src_ptr,
    4193             :                         src_block_index,
    4194             :                         context_ptr->sb_src_stride,
    4195             :                         ref_buffer,
    4196           0 :                         search_index_y * ref_stride + search_index_x,
    4197             :                         ref_stride,
    4198             :                         pu_width,
    4199             :                         pu_height);
    4200           0 :                 if (integer_sse < *p_best_ssd) {
    4201           0 :                     *p_best_ssd = integer_sse;
    4202           0 :                     *p_best_mv = integer_mv;
    4203             :                 }
    4204             :             }
    4205             :             // L position
    4206           0 :             search_region_index =
    4207           0 :                 search_index_x +
    4208           0 :                 (int16_t)context_ptr->interpolated_stride * search_index_y;
    4209           0 :             if (context_ptr->fractional_search_method == SSD_SEARCH)
    4210           0 :                 distortion_left_position = spatial_full_distortion_kernel(
    4211             :                     context_ptr->sb_src_ptr,
    4212             :                     src_block_index,
    4213             :                     context_ptr->sb_src_stride,
    4214             :                     pos_b_buffer,
    4215             :                     search_region_index,
    4216             :                     context_ptr->interpolated_stride,
    4217             :                     pu_width,
    4218             :                     pu_height);
    4219           0 :             else if (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    4220           0 :                 distortion_left_position = (nxm_sad_kernel(
    4221           0 :                     &(context_ptr->sb_src_ptr[src_block_index]),
    4222           0 :                     context_ptr->sb_src_stride << 1,
    4223           0 :                     &(pos_b_buffer[search_region_index]),
    4224           0 :                     context_ptr->interpolated_stride << 1,
    4225             :                     pu_height >> 1,
    4226           0 :                     pu_width)) << 1;
    4227             :             else
    4228           0 :                 distortion_left_position = nxm_sad_kernel(
    4229           0 :                     &(context_ptr->sb_src_ptr[src_block_index]),
    4230             :                     context_ptr->sb_src_stride,
    4231           0 :                     &(pos_b_buffer[search_region_index]),
    4232             :                     context_ptr->interpolated_stride,
    4233             :                     pu_height,
    4234             :                     pu_width);
    4235           0 :             if (context_ptr->fractional_search_method == SSD_SEARCH) {
    4236           0 :                 if (distortion_left_position < *p_best_ssd) {
    4237           0 :                     *p_best_sad = (uint32_t)
    4238           0 :                         nxm_sad_kernel(
    4239           0 :                             &(context_ptr->sb_src_ptr[src_block_index]),
    4240             :                             context_ptr->sb_src_stride,
    4241           0 :                             &(pos_b_buffer[search_region_index]),
    4242             :                             context_ptr->interpolated_stride,
    4243             :                             pu_height,
    4244             :                             pu_width);
    4245           0 :                     *p_best_mv =
    4246           0 :                         ((uint16_t)half_mv_y[0] << 16) | ((uint16_t)half_mv_x[0]);
    4247           0 :                     *p_best_ssd = (uint32_t)distortion_left_position;
    4248             :                 }
    4249             :             }
    4250             :             else {
    4251           0 :                 if (distortion_left_position < *p_best_sad) {
    4252           0 :                     *p_best_sad = (uint32_t)distortion_left_position;
    4253           0 :                     *p_best_mv =
    4254           0 :                         ((uint16_t)half_mv_y[0] << 16) | ((uint16_t)half_mv_x[0]);
    4255             :                 }
    4256             :             }
    4257             :             // T position
    4258           0 :             search_region_index =
    4259           0 :                 search_index_x +
    4260           0 :                 (int16_t)context_ptr->interpolated_stride * search_index_y;
    4261           0 :             if (context_ptr->fractional_search_method == SSD_SEARCH)
    4262           0 :                 distortion_top_position = spatial_full_distortion_kernel(
    4263             :                     context_ptr->sb_src_ptr,
    4264             :                     src_block_index,
    4265             :                     context_ptr->sb_src_stride,
    4266             :                     pos_h_buffer,
    4267             :                     search_region_index,
    4268             :                     context_ptr->interpolated_stride,
    4269             :                     pu_width,
    4270             :                     pu_height);
    4271           0 :             else if (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    4272           0 :                 distortion_top_position = (nxm_sad_kernel(
    4273           0 :                     &(context_ptr->sb_src_ptr[src_block_index]),
    4274           0 :                     context_ptr->sb_src_stride << 1,
    4275           0 :                     &(pos_h_buffer[search_region_index]),
    4276           0 :                     context_ptr->interpolated_stride << 1,
    4277             :                     pu_height >> 1,
    4278           0 :                     pu_width)) << 1;
    4279             :             else
    4280           0 :                 distortion_top_position = nxm_sad_kernel(
    4281           0 :                     &(context_ptr->sb_src_ptr[src_block_index]),
    4282             :                     context_ptr->sb_src_stride,
    4283           0 :                     &(pos_h_buffer[search_region_index]),
    4284             :                     context_ptr->interpolated_stride,
    4285             :                     pu_height,
    4286             :                     pu_width);
    4287           0 :             if (context_ptr->fractional_search_method == SSD_SEARCH) {
    4288           0 :                 if (distortion_top_position < *p_best_ssd) {
    4289           0 :                     *p_best_sad = (uint32_t)
    4290           0 :                         nxm_sad_kernel(
    4291           0 :                             &(context_ptr->sb_src_ptr[src_block_index]),
    4292             :                             context_ptr->sb_src_stride,
    4293           0 :                             &(pos_h_buffer[search_region_index]),
    4294             :                             context_ptr->interpolated_stride,
    4295             :                             pu_height,
    4296             :                             pu_width);
    4297           0 :                     *p_best_mv =
    4298           0 :                         ((uint16_t)half_mv_y[2] << 16) | ((uint16_t)half_mv_x[2]);
    4299           0 :                     *p_best_ssd = (uint32_t)distortion_top_position;
    4300             :                 }
    4301             :             }
    4302             :             else {
    4303           0 :                 if (distortion_top_position < *p_best_sad) {
    4304           0 :                     *p_best_sad = (uint32_t)distortion_top_position;
    4305           0 :                     *p_best_mv =
    4306           0 :                         ((uint16_t)half_mv_y[2] << 16) | ((uint16_t)half_mv_x[2]);
    4307             :                 }
    4308             :             }
    4309             :             // TL position
    4310           0 :             search_region_index =
    4311           0 :                 search_index_x +
    4312           0 :                 (int16_t)context_ptr->interpolated_stride * search_index_y;
    4313           0 :             if (context_ptr->fractional_search_method == SSD_SEARCH)
    4314           0 :                 distortion_topleft_position = spatial_full_distortion_kernel(
    4315             :                     context_ptr->sb_src_ptr,
    4316             :                     src_block_index,
    4317             :                     context_ptr->sb_src_stride,
    4318             :                     pos_j_buffer,
    4319             :                     search_region_index,
    4320             :                     context_ptr->interpolated_stride,
    4321             :                     pu_width,
    4322             :                     pu_height);
    4323           0 :             else if (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    4324           0 :                 distortion_topleft_position = (nxm_sad_kernel(
    4325           0 :                     &(context_ptr->sb_src_ptr[src_block_index]),
    4326           0 :                     context_ptr->sb_src_stride << 1,
    4327           0 :                     &(pos_j_buffer[search_region_index]),
    4328           0 :                     context_ptr->interpolated_stride << 1,
    4329             :                     pu_height >> 1,
    4330           0 :                     pu_width)) << 1;
    4331             :             else
    4332           0 :                 distortion_topleft_position = nxm_sad_kernel(
    4333           0 :                     &(context_ptr->sb_src_ptr[src_block_index]),
    4334             :                     context_ptr->sb_src_stride,
    4335           0 :                     &(pos_j_buffer[search_region_index]),
    4336             :                     context_ptr->interpolated_stride,
    4337             :                     pu_height,
    4338             :                     pu_width);
    4339           0 :             if (context_ptr->fractional_search_method == SSD_SEARCH) {
    4340           0 :                 if (distortion_topleft_position < *p_best_ssd) {
    4341           0 :                     *p_best_sad = (uint32_t)
    4342           0 :                         nxm_sad_kernel(
    4343           0 :                             &(context_ptr->sb_src_ptr[src_block_index]),
    4344             :                             context_ptr->sb_src_stride,
    4345           0 :                             &(pos_j_buffer[search_region_index]),
    4346             :                             context_ptr->interpolated_stride,
    4347             :                             pu_height,
    4348             :                             pu_width);
    4349           0 :                     *p_best_mv =
    4350           0 :                         ((uint16_t)half_mv_y[4] << 16) | ((uint16_t)half_mv_x[4]);
    4351           0 :                     *p_best_ssd = (uint32_t)distortion_topleft_position;
    4352             :                 }
    4353             :             }
    4354             :             else {
    4355           0 :                 if (distortion_topleft_position < *p_best_sad) {
    4356           0 :                     *p_best_sad = (uint32_t)distortion_topleft_position;
    4357           0 :                     *p_best_mv =
    4358           0 :                         ((uint16_t)half_mv_y[4] << 16) | ((uint16_t)half_mv_x[4]);
    4359             :                 }
    4360             :             }
    4361             :             // TR position
    4362           0 :             search_region_index++;
    4363           0 :             if (context_ptr->fractional_search_method == SSD_SEARCH)
    4364           0 :                 distortion_topright_position = spatial_full_distortion_kernel(
    4365             :                     context_ptr->sb_src_ptr,
    4366             :                     src_block_index,
    4367             :                     context_ptr->sb_src_stride,
    4368             :                     pos_j_buffer,
    4369             :                     search_region_index,
    4370             :                     context_ptr->interpolated_stride,
    4371             :                     pu_width,
    4372             :                     pu_height);
    4373           0 :             else if (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    4374           0 :                 distortion_topright_position = (nxm_sad_kernel(
    4375           0 :                     &(context_ptr->sb_src_ptr[src_block_index]),
    4376           0 :                     context_ptr->sb_src_stride << 1,
    4377           0 :                     &(pos_j_buffer[search_region_index]),
    4378           0 :                     context_ptr->interpolated_stride << 1,
    4379             :                     pu_height >> 1,
    4380           0 :                     pu_width)) << 1;
    4381             :             else
    4382           0 :                 distortion_topright_position = nxm_sad_kernel(
    4383           0 :                     &(context_ptr->sb_src_ptr[src_block_index]),
    4384             :                     context_ptr->sb_src_stride,
    4385           0 :                     &(pos_j_buffer[search_region_index]),
    4386             :                     context_ptr->interpolated_stride,
    4387             :                     pu_height,
    4388             :                     pu_width);
    4389           0 :             if (context_ptr->fractional_search_method == SSD_SEARCH) {
    4390           0 :                 if (distortion_topright_position < *p_best_ssd) {
    4391           0 :                     *p_best_sad = (uint32_t)
    4392           0 :                         nxm_sad_kernel(
    4393           0 :                             &(context_ptr->sb_src_ptr[src_block_index]),
    4394             :                             context_ptr->sb_src_stride,
    4395           0 :                             &(pos_j_buffer[search_region_index]),
    4396             :                             context_ptr->interpolated_stride,
    4397             :                             pu_height,
    4398             :                             pu_width);
    4399           0 :                     *p_best_mv =
    4400           0 :                         ((uint16_t)half_mv_y[5] << 16) | ((uint16_t)half_mv_x[5]);
    4401           0 :                     *p_best_ssd = (uint32_t)distortion_topright_position;
    4402             :                 }
    4403             :             }
    4404             :             else {
    4405           0 :                 if (distortion_topright_position < *p_best_sad) {
    4406           0 :                     *p_best_sad = (uint32_t)distortion_topright_position;
    4407           0 :                     *p_best_mv =
    4408           0 :                         ((uint16_t)half_mv_y[5] << 16) | ((uint16_t)half_mv_x[5]);
    4409             :                 }
    4410             :             }
    4411             :         }
    4412             :     }
    4413           0 :     return;
    4414             : }
    4415             : #else
    4416             : /*******************************************
    4417             :  * PU_HalfPelRefinement
    4418             :  *   performs Half Pel refinement for one PU
    4419             :  *******************************************/
    4420             : static void half_pel_refinement_block(
    4421             :     MeContext
    4422             :         *context_ptr,  // input parameter, ME context Ptr, used to get SB Ptr
    4423             :     uint8_t *ref_buffer, uint32_t ref_stride, uint32_t *p_best_ssd,
    4424             :     uint32_t src_block_index,  // input parameter, PU origin, used to point to
    4425             :                                // source samples
    4426             :     uint8_t *pos_b_buffer,  // input parameter, position "b" interpolated search
    4427             :                             // area Ptr
    4428             :     uint8_t *pos_h_buffer,  // input parameter, position "h" interpolated search
    4429             :                             // area Ptr
    4430             :     uint8_t *pos_j_buffer,  // input parameter, position "j" interpolated search
    4431             :                             // area Ptr
    4432             :     uint32_t pu_width,      // input parameter, PU width
    4433             :     uint32_t pu_height,     // input parameter, PU height
    4434             :     int16_t x_search_area_origin,  // input parameter, search area origin in the
    4435             :                                    // horizontal direction, used to point to
    4436             :                                    // reference samples
    4437             :     int16_t y_search_area_origin,  // input parameter, search area origin in the
    4438             :                                    // vertical direction, used to point to
    4439             :                                    // reference samples
    4440             :     uint32_t *p_best_sad, uint32_t *p_best_mv,
    4441             :     uint8_t *p_sub_pel_direction, uint32_t *best_pervious_stage_mv,
    4442             :     uint32_t ineteger_mv) {
    4443             :     int32_t search_region_index;
    4444             :     uint64_t distortion_left_position = 0;
    4445             :     uint64_t distortion_top_position = 0;
    4446             :     uint64_t distortion_topleft_position = 0;
    4447             :     uint64_t distortion_topright_position = 0;
    4448             :     int16_t half_mv_x[8];
    4449             :     int16_t half_mv_y[8];
    4450             :     // copute distance between best mv and the integer mv candidate
    4451             :     int16_t int_x_mv = _MVXT(ineteger_mv);
    4452             :     int16_t int_y_mv = _MVYT(ineteger_mv);
    4453             :     int16_t int_search_index_x = (int_x_mv >> 2) - x_search_area_origin;
    4454             :     int16_t int_search_index_y = (int_y_mv >> 2) - y_search_area_origin;
    4455             :     int16_t x_best_mv = _MVXT(*best_pervious_stage_mv);
    4456             :     int16_t y_best_mv = _MVYT(*best_pervious_stage_mv);
    4457             :     int16_t best_search_index_x = (x_best_mv >> 2) - x_search_area_origin;
    4458             :     int16_t best_search_index_y = (y_best_mv >> 2) - y_search_area_origin;
    4459             :     int16_t dis_x = ABS(int_search_index_x - best_search_index_x);
    4460             :     int16_t dis_y = ABS(int_search_index_y - best_search_index_y);
    4461             :     // Skip half pel if the integer candidate is not inside the desired window.
    4462             :     if ((dis_x) > H_PEL_SEARCH_WIND)
    4463             :         return;
    4464             :     if ((dis_y) > H_PEL_SEARCH_WIND)
    4465             :         return;
    4466             :     int16_t x_mv = _MVXT(ineteger_mv);
    4467             :     int16_t y_mv = _MVYT(ineteger_mv);
    4468             :     int16_t search_index_x = (x_mv >> 2) - x_search_area_origin;
    4469             :     int16_t search_index_y = (y_mv >> 2) - y_search_area_origin;
    4470             :     (void)p_sub_pel_direction;
    4471             :     half_mv_x[0] = x_mv - 2;  // L  position
    4472             :     half_mv_x[1] = x_mv + 2;  // R  position
    4473             :     half_mv_x[2] = x_mv;      // T  position
    4474             :     half_mv_x[3] = x_mv;      // B  position
    4475             :     half_mv_x[4] = x_mv - 2;  // TL position
    4476             :     half_mv_x[5] = x_mv + 2;  // TR position
    4477             :     half_mv_x[6] = x_mv + 2;  // BR position
    4478             :     half_mv_x[7] = x_mv - 2;  // BL position
    4479             :     half_mv_y[0] = y_mv;      // L  position
    4480             :     half_mv_y[1] = y_mv;      // R  position
    4481             :     half_mv_y[2] = y_mv - 2;  // T  position
    4482             :     half_mv_y[3] = y_mv + 2;  // B  position
    4483             :     half_mv_y[4] = y_mv - 2;  // TL position
    4484             :     half_mv_y[5] = y_mv - 2;  // TR position
    4485             :     half_mv_y[6] = y_mv + 2;  // BR position
    4486             :     half_mv_y[7] = y_mv + 2;  // BL position
    4487             :     // Compute SSD for the best full search candidate
    4488             :     if (context_ptr->fractional_search_method == SSD_SEARCH) {
    4489             :         uint32_t integer_sse =
    4490             :             (uint32_t)spatial_full_distortion_kernel(
    4491             :                     context_ptr->sb_src_ptr,
    4492             :                     src_block_index,
    4493             :                     context_ptr->sb_src_stride,
    4494             :                     ref_buffer,
    4495             :                     search_index_y * ref_stride + search_index_x,
    4496             :                     ref_stride,
    4497             :                     pu_width,
    4498             :                     pu_height);
    4499             :         if (integer_sse < *p_best_ssd) {
    4500             :             *p_best_ssd = integer_sse;
    4501             :             *p_best_mv = ineteger_mv;
    4502             :         }
    4503             :     }
    4504             :     // L position
    4505             :     search_region_index =
    4506             :         search_index_x +
    4507             :         (int16_t)context_ptr->interpolated_stride * search_index_y;
    4508             :     distortion_left_position =
    4509             :         (context_ptr->fractional_search_method == SSD_SEARCH)
    4510             :             ? spatial_full_distortion_kernel(
    4511             :                       context_ptr->sb_src_ptr,
    4512             :                       src_block_index,
    4513             :                       context_ptr->sb_src_stride,
    4514             :                       pos_b_buffer,
    4515             :                       search_region_index,
    4516             :                       context_ptr->interpolated_stride,
    4517             :                       pu_width,
    4518             :                       pu_height)
    4519             :             : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    4520             :                   ? (nxm_sad_kernel(
    4521             :                         &(context_ptr->sb_src_ptr[src_block_index]),
    4522             :                         context_ptr->sb_src_stride << 1,
    4523             :                         &(pos_b_buffer[search_region_index]),
    4524             :                         context_ptr->interpolated_stride << 1,
    4525             :                         pu_height >> 1,
    4526             :                         pu_width))
    4527             :                         << 1
    4528             :                   : nxm_sad_kernel(
    4529             :                         &(context_ptr->sb_src_ptr[src_block_index]),
    4530             :                         context_ptr->sb_src_stride,
    4531             :                         &(pos_b_buffer[search_region_index]),
    4532             :                         context_ptr->interpolated_stride,
    4533             :                         pu_height,
    4534             :                         pu_width);
    4535             :     if (context_ptr->fractional_search_method == SSD_SEARCH) {
    4536             :         if (distortion_left_position < *p_best_ssd) {
    4537             :             *p_best_sad = (uint32_t)
    4538             :                 nxm_sad_kernel(
    4539             :                     &(context_ptr->sb_src_ptr[src_block_index]),
    4540             :                     context_ptr->sb_src_stride,
    4541             :                     &(pos_b_buffer[search_region_index]),
    4542             :                     context_ptr->interpolated_stride,
    4543             :                     pu_height,
    4544             :                     pu_width);
    4545             :             *p_best_mv =
    4546             :                 ((uint16_t)half_mv_y[0] << 16) | ((uint16_t)half_mv_x[0]);
    4547             :             *p_best_ssd = (uint32_t)distortion_left_position;
    4548             :         }
    4549             :     } else {
    4550             :         if (distortion_left_position < *p_best_sad) {
    4551             :             *p_best_sad = (uint32_t)distortion_left_position;
    4552             :             *p_best_mv =
    4553             :                 ((uint16_t)half_mv_y[0] << 16) | ((uint16_t)half_mv_x[0]);
    4554             :         }
    4555             :     }
    4556             : #if !HP_REF_OPT
    4557             :     // R position
    4558             :     search_region_index++;
    4559             :     distortion_right_position =
    4560             :         (context_ptr->fractional_search_method == SSD_SEARCH)
    4561             :             ? spatial_full_distortion_kernel(
    4562             :                       &(context_ptr->sb_src_ptr[src_block_index]),
    4563             :                       context_ptr->sb_src_stride,
    4564             :                       &(pos_b_buffer[search_region_index]),
    4565             :                       context_ptr->interpolated_stride,
    4566             :                       pu_width,
    4567             :                       pu_height)
    4568             :             : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    4569             :                   ? (nxm_sad_kernel(
    4570             :                         &(context_ptr->sb_src_ptr[src_block_index]),
    4571             :                         context_ptr->sb_src_stride << 1,
    4572             :                         &(pos_b_buffer[search_region_index]),
    4573             :                         context_ptr->interpolated_stride << 1,
    4574             :                         pu_height >> 1,
    4575             :                         pu_width))
    4576             :                         << 1
    4577             :                   : nxm_sad_kernel(
    4578             :                         &(context_ptr->sb_src_ptr[src_block_index]),
    4579             :                         context_ptr->sb_src_stride,
    4580             :                         &(pos_b_buffer[search_region_index]),
    4581             :                         context_ptr->interpolated_stride,
    4582             :                         pu_height,
    4583             :                         pu_width);
    4584             :     if (context_ptr->fractional_search_method == SSD_SEARCH) {
    4585             :         if (distortion_right_position < *p_best_ssd) {
    4586             :             *p_best_sad = (uint32_t)
    4587             :                 nxm_sad_kernel(
    4588             :                     &(context_ptr->sb_src_ptr[src_block_index]),
    4589             :                     context_ptr->sb_src_stride,
    4590             :                     &(pos_b_buffer[search_region_index]),
    4591             :                     context_ptr->interpolated_stride,
    4592             :                     pu_height,
    4593             :                     pu_width);
    4594             :             *p_best_mv =
    4595             :                 ((uint16_t)half_mv_y[1] << 16) | ((uint16_t)half_mv_x[1]);
    4596             :             *p_best_ssd = (uint32_t)distortion_right_position;
    4597             :         }
    4598             :     } else {
    4599             :         if (distortion_right_position < *p_best_sad) {
    4600             :             *p_best_sad = (uint32_t)distortion_right_position;
    4601             :             *p_best_mv =
    4602             :                 ((uint16_t)half_mv_y[1] << 16) | ((uint16_t)half_mv_x[1]);
    4603             :         }
    4604             :     }
    4605             : #endif
    4606             :     // T position
    4607             :     search_region_index =
    4608             :         search_index_x +
    4609             :         (int16_t)context_ptr->interpolated_stride * search_index_y;
    4610             :     distortion_top_position =
    4611             :         (context_ptr->fractional_search_method == SSD_SEARCH)
    4612             :             ? spatial_full_distortion_kernel(
    4613             :                       context_ptr->sb_src_ptr,
    4614             :                       src_block_index,
    4615             :                       context_ptr->sb_src_stride,
    4616             :                       pos_h_buffer,
    4617             :                       search_region_index,
    4618             :                       context_ptr->interpolated_stride,
    4619             :                       pu_width,
    4620             :                       pu_height)
    4621             :             : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    4622             :                   ? (nxm_sad_kernel(
    4623             :                         &(context_ptr->sb_src_ptr[src_block_index]),
    4624             :                         context_ptr->sb_src_stride << 1,
    4625             :                         &(pos_h_buffer[search_region_index]),
    4626             :                         context_ptr->interpolated_stride << 1,
    4627             :                         pu_height >> 1,
    4628             :                         pu_width))
    4629             :                         << 1
    4630             :                   : nxm_sad_kernel(
    4631             :                         &(context_ptr->sb_src_ptr[src_block_index]),
    4632             :                         context_ptr->sb_src_stride,
    4633             :                         &(pos_h_buffer[search_region_index]),
    4634             :                         context_ptr->interpolated_stride,
    4635             :                         pu_height,
    4636             :                         pu_width);
    4637             :     if (context_ptr->fractional_search_method == SSD_SEARCH) {
    4638             :         if (distortion_top_position < *p_best_ssd) {
    4639             :             *p_best_sad = (uint32_t)
    4640             :                 nxm_sad_kernel(
    4641             :                     &(context_ptr->sb_src_ptr[src_block_index]),
    4642             :                     context_ptr->sb_src_stride,
    4643             :                     &(pos_h_buffer[search_region_index]),
    4644             :                     context_ptr->interpolated_stride,
    4645             :                     pu_height,
    4646             :                     pu_width);
    4647             :             *p_best_mv =
    4648             :                 ((uint16_t)half_mv_y[2] << 16) | ((uint16_t)half_mv_x[2]);
    4649             :             *p_best_ssd = (uint32_t)distortion_top_position;
    4650             :         }
    4651             :     } else {
    4652             :         if (distortion_top_position < *p_best_sad) {
    4653             :             *p_best_sad = (uint32_t)distortion_top_position;
    4654             :             *p_best_mv =
    4655             :                 ((uint16_t)half_mv_y[2] << 16) | ((uint16_t)half_mv_x[2]);
    4656             :         }
    4657             :     }
    4658             : #if !HP_REF_OPT
    4659             :     // B position
    4660             :     search_region_index += (int16_t)context_ptr->interpolated_stride;
    4661             :     distortion_bottom_position =
    4662             :         (context_ptr->fractional_search_method == SSD_SEARCH)
    4663             :             ? spatial_full_distortion_kernel(
    4664             :                       &(context_ptr->sb_src_ptr[src_block_index]),
    4665             :                       context_ptr->sb_src_stride,
    4666             :                       &(pos_h_buffer[search_region_index]),
    4667             :                       context_ptr->interpolated_stride,
    4668             :                       pu_width,
    4669             :                       pu_height)
    4670             :             : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    4671             :                   ? (nxm_sad_kernel(
    4672             :                         &(context_ptr->sb_src_ptr[src_block_index]),
    4673             :                         context_ptr->sb_src_stride << 1,
    4674             :                         &(pos_h_buffer[search_region_index]),
    4675             :                         context_ptr->interpolated_stride << 1,
    4676             :                         pu_height >> 1,
    4677             :                         pu_width))
    4678             :                         << 1
    4679             :                   : nxm_sad_kernel(
    4680             :                         &(context_ptr->sb_src_ptr[src_block_index]),
    4681             :                         context_ptr->sb_src_stride,
    4682             :                         &(pos_h_buffer[search_region_index]),
    4683             :                         context_ptr->interpolated_stride,
    4684             :                         pu_height,
    4685             :                         pu_width);
    4686             :     if (context_ptr->fractional_search_method == SSD_SEARCH) {
    4687             :         if (distortion_bottom_position < *p_best_ssd) {
    4688             :             *p_best_sad = (uint32_t)
    4689             :                 nxm_sad_kernel(
    4690             :                     &(context_ptr->sb_src_ptr[src_block_index]),
    4691             :                     context_ptr->sb_src_stride,
    4692             :                     &(pos_h_buffer[search_region_index]),
    4693             :                     context_ptr->interpolated_stride,
    4694             :                     pu_height,
    4695             :                     pu_width);
    4696             :             *p_best_mv =
    4697             :                 ((uint16_t)half_mv_y[3] << 16) | ((uint16_t)half_mv_x[3]);
    4698             :             *p_best_ssd = (uint32_t)distortion_bottom_position;
    4699             :         }
    4700             :     } else {
    4701             :         if (distortion_bottom_position < *p_best_sad) {
    4702             :             *p_best_sad = (uint32_t)distortionBottomPosition;
    4703             :             *p_best_mv =
    4704             :                 ((uint16_t)half_mv_y[3] << 16) | ((uint16_t)half_mv_x[3]);
    4705             :         }
    4706             :     }
    4707             : #endif
    4708             :     // TL position
    4709             :     search_region_index =
    4710             :         search_index_x +
    4711             :         (int16_t)context_ptr->interpolated_stride * search_index_y;
    4712             :     distortion_topleft_position =
    4713             :         (context_ptr->fractional_search_method == SSD_SEARCH)
    4714             :             ? spatial_full_distortion_kernel(
    4715             :                       context_ptr->sb_src_ptr,
    4716             :                       src_block_index,
    4717             :                       context_ptr->sb_src_stride,
    4718             :                       pos_j_buffer,
    4719             :                       search_region_index,
    4720             :                       context_ptr->interpolated_stride,
    4721             :                       pu_width,
    4722             :                       pu_height)
    4723             :             : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    4724             :                   ? (nxm_sad_kernel(
    4725             :                         &(context_ptr->sb_src_ptr[src_block_index]),
    4726             :                         context_ptr->sb_src_stride << 1,
    4727             :                         &(pos_j_buffer[search_region_index]),
    4728             :                         context_ptr->interpolated_stride << 1,
    4729             :                         pu_height >> 1,
    4730             :                         pu_width))
    4731             :                         << 1
    4732             :                   : nxm_sad_kernel(
    4733             :                         &(context_ptr->sb_src_ptr[src_block_index]),
    4734             :                         context_ptr->sb_src_stride,
    4735             :                         &(pos_j_buffer[search_region_index]),
    4736             :                         context_ptr->interpolated_stride,
    4737             :                         pu_height,
    4738             :                         pu_width);
    4739             :     if (context_ptr->fractional_search_method == SSD_SEARCH) {
    4740             :         if (distortion_topleft_position < *p_best_ssd) {
    4741             :             *p_best_sad = (uint32_t)
    4742             :                 nxm_sad_kernel(
    4743             :                     &(context_ptr->sb_src_ptr[src_block_index]),
    4744             :                     context_ptr->sb_src_stride,
    4745             :                     &(pos_j_buffer[search_region_index]),
    4746             :                     context_ptr->interpolated_stride,
    4747             :                     pu_height,
    4748             :                     pu_width);
    4749             :             *p_best_mv =
    4750             :                 ((uint16_t)half_mv_y[4] << 16) | ((uint16_t)half_mv_x[4]);
    4751             :             *p_best_ssd = (uint32_t)distortion_topleft_position;
    4752             :         }
    4753             :     } else {
    4754             :         if (distortion_topleft_position < *p_best_sad) {
    4755             :             *p_best_sad = (uint32_t)distortion_topleft_position;
    4756             :             *p_best_mv =
    4757             :                 ((uint16_t)half_mv_y[4] << 16) | ((uint16_t)half_mv_x[4]);
    4758             :         }
    4759             :     }
    4760             :     // TR position
    4761             :     search_region_index++;
    4762             :     distortion_topright_position =
    4763             :         (context_ptr->fractional_search_method == SSD_SEARCH)
    4764             :             ? spatial_full_distortion_kernel(
    4765             :                       context_ptr->sb_src_ptr,
    4766             :                       src_block_index,
    4767             :                       context_ptr->sb_src_stride,
    4768             :                       pos_j_buffer,
    4769             :                       search_region_index,
    4770             :                       context_ptr->interpolated_stride,
    4771             :                       pu_width,
    4772             :                       pu_height)
    4773             :             : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    4774             :                   ? (nxm_sad_kernel(
    4775             :                         &(context_ptr->sb_src_ptr[src_block_index]),
    4776             :                         context_ptr->sb_src_stride << 1,
    4777             :                         &(pos_j_buffer[search_region_index]),
    4778             :                         context_ptr->interpolated_stride << 1,
    4779             :                         pu_height >> 1,
    4780             :                         pu_width))
    4781             :                         << 1
    4782             :                   : nxm_sad_kernel(
    4783             :                         &(context_ptr->sb_src_ptr[src_block_index]),
    4784             :                         context_ptr->sb_src_stride,
    4785             :                         &(pos_j_buffer[search_region_index]),
    4786             :                         context_ptr->interpolated_stride,
    4787             :                         pu_height,
    4788             :                         pu_width);
    4789             : 
    4790             :     if (context_ptr->fractional_search_method == SSD_SEARCH) {
    4791             :         if (distortion_topright_position < *p_best_ssd) {
    4792             :             *p_best_sad = (uint32_t)
    4793             :                 nxm_sad_kernel(
    4794             :                     &(context_ptr->sb_src_ptr[src_block_index]),
    4795             :                     context_ptr->sb_src_stride,
    4796             :                     &(pos_j_buffer[search_region_index]),
    4797             :                     context_ptr->interpolated_stride,
    4798             :                     pu_height,
    4799             :                     pu_width);
    4800             :             *p_best_mv =
    4801             :                 ((uint16_t)half_mv_y[5] << 16) | ((uint16_t)half_mv_x[5]);
    4802             :             *p_best_ssd = (uint32_t)distortion_topright_position;
    4803             :         }
    4804             :     } else {
    4805             :         if (distortion_topright_position < *p_best_sad) {
    4806             :             *p_best_sad = (uint32_t)distortion_topright_position;
    4807             :             *p_best_mv =
    4808             :                 ((uint16_t)half_mv_y[5] << 16) | ((uint16_t)half_mv_x[5]);
    4809             :         }
    4810             :     }
    4811             : #if !HP_REF_OPT
    4812             :     // BR position
    4813             :     search_region_index += (int16_t)context_ptr->interpolated_stride;
    4814             :     distortion_bottomright_position =
    4815             :         (context_ptr->fractional_search_method == SSD_SEARCH)
    4816             :             ? spatial_full_distortion_kernel(
    4817             :                       context_ptr->sb_src_ptr,
    4818             :                       src_block_index,
    4819             :                       context_ptr->sb_src_stride,
    4820             :                       pos_j_buffer,
    4821             :                       search_region_index,
    4822             :                       context_ptr->interpolated_stride,
    4823             :                       pu_width,
    4824             :                       pu_height)
    4825             :             : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    4826             :                   ? (nxm_sad_kernel(
    4827             :                         &(context_ptr->sb_src_ptr[src_block_index]),
    4828             :                         context_ptr->sb_src_stride << 1,
    4829             :                         &(pos_j_buffer[search_region_index]),
    4830             :                         context_ptr->interpolated_stride << 1,
    4831             :                         pu_height >> 1,
    4832             :                         pu_width))
    4833             :                         << 1
    4834             :                   : nxm_sad_kernel(
    4835             :                         &(context_ptr->sb_src_ptr[src_block_index]),
    4836             :                         context_ptr->sb_src_stride,
    4837             :                         &(pos_j_buffer[search_region_index]),
    4838             :                         context_ptr->interpolated_stride,
    4839             :                         pu_height,
    4840             :                         pu_width);
    4841             :     if (context_ptr->fractional_search_method == SSD_SEARCH) {
    4842             :         if (distortion_bottomright_position < *p_best_ssd) {
    4843             :             *p_best_sad = (uint32_t)
    4844             :                 nxm_sad_kernel(
    4845             :                     &(context_ptr->sb_src_ptr[src_block_index]),
    4846             :                     context_ptr->sb_src_stride,
    4847             :                     &(pos_j_buffer[search_region_index]),
    4848             :                     context_ptr->interpolated_stride,
    4849             :                     pu_height,
    4850             :                     pu_width);
    4851             :             *p_best_mv =
    4852             :                 ((uint16_t)half_mv_y[6] << 16) | ((uint16_t)half_mv_x[6]);
    4853             :             *p_best_ssd = (uint32_t)distortion_bottomright_position;
    4854             :         }
    4855             :     } else {
    4856             :         if (distortion_bottomright_position < *p_best_sad) {
    4857             :             *p_best_sad = (uint32_t)distortion_bottomright_position;
    4858             :             *p_best_mv =
    4859             :                 ((uint16_t)half_mv_y[6] << 16) | ((uint16_t)half_mv_x[6]);
    4860             :         }
    4861             :     }
    4862             :     // BL position
    4863             :     search_region_index--;
    4864             :     distortion_bottomleft_position =
    4865             :         (context_ptr->fractional_search_method == SSD_SEARCH)
    4866             :             ? spatial_full_distortion_kernel(
    4867             :                       &(context_ptr->sb_src_ptr[src_block_index]),
    4868             :                       context_ptr->sb_src_stride,
    4869             :                       &(pos_j_buffer[search_region_index]),
    4870             :                       context_ptr->interpolated_stride,
    4871             :                       pu_width,
    4872             :                       pu_height)
    4873             :             : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    4874             :                   ? (nxm_sad_kernel(
    4875             :                         &(context_ptr->sb_src_ptr[src_block_index]),
    4876             :                         context_ptr->sb_src_stride << 1,
    4877             :                         &(pos_j_buffer[search_region_index]),
    4878             :                         context_ptr->interpolated_stride << 1,
    4879             :                         pu_height >> 1,
    4880             :                         pu_width))
    4881             :                         << 1
    4882             :                   : (nxm_sad_kernel(
    4883             :                         &(context_ptr->sb_src_ptr[src_block_index]),
    4884             :                         context_ptr->sb_src_stride,
    4885             :                         &(pos_j_buffer[search_region_index]),
    4886             :                         context_ptr->interpolated_stride,
    4887             :                         pu_height,
    4888             :                         pu_width));
    4889             : 
    4890             :     if (context_ptr->fractional_search_method == SSD_SEARCH) {
    4891             :         if (distortion_bottomleft_position < *p_best_ssd) {
    4892             :             *p_best_sad = (uint32_t)(
    4893             :                 nxm_sad_kernel(
    4894             :                     &(context_ptr->sb_src_ptr[src_block_index]),
    4895             :                     context_ptr->sb_src_stride,
    4896             :                     &(pos_j_buffer[search_region_index]),
    4897             :                     context_ptr->interpolated_stride,
    4898             :                     pu_height,
    4899             :                     pu_width));
    4900             :             *p_best_mv =
    4901             :                 ((uint16_t)half_mv_y[7] << 16) | ((uint16_t)half_mv_x[7]);
    4902             :             *p_best_ssd = (uint32_t)distortion_bottomleft_position;
    4903             :         }
    4904             :     } else {
    4905             :         if (distortion_bottomleft_position < *p_best_sad) {
    4906             :             *p_best_sad = (uint32_t)distortion_bottomleft_position;
    4907             :             *p_best_mv =
    4908             :                 ((uint16_t)half_mv_y[7] << 16) | ((uint16_t)half_mv_x[7]);
    4909             :         }
    4910             :     }
    4911             : #endif
    4912             :     return;
    4913             : }
    4914             : #endif
    4915             : /*******************************************
    4916             :  * HalfPelSearch_LCU
    4917             :  *   performs Half Pel refinement for the 85 PUs
    4918             :  *******************************************/
    4919           0 : void half_pel_refinement_sb(
    4920             :     PictureParentControlSet *picture_control_set_ptr,
    4921             :     MeContext *context_ptr,  // input/output parameter, ME context Ptr, used to
    4922             :                              // get/update ME results
    4923             :     uint8_t *refBuffer, uint32_t ref_stride,
    4924             :     uint8_t *pos_b_buffer,  // input parameter, position "b" interpolated search
    4925             :                             // area Ptr
    4926             :     uint8_t *pos_h_buffer,  // input parameter, position "h" interpolated search
    4927             :                             // area Ptr
    4928             :     uint8_t *pos_j_buffer,  // input parameter, position "j" interpolated search
    4929             :                             // area Ptr
    4930             :     int16_t x_search_area_origin,  // input parameter, search area origin in the
    4931             :                                    // horizontal direction, used to point to
    4932             :                                    // reference samples
    4933             :     int16_t y_search_area_origin,  // input parameter, search area origin in the
    4934             :                                    // vertical direction, used to point to
    4935             :                                    // reference samples
    4936             : #if OPTIMISED_EX_SUBPEL
    4937             :     uint32_t search_area_height,  // input parameter, search area height
    4938             :     uint32_t search_area_width,  // input parameter, search area width
    4939             : #endif
    4940             :     uint32_t inetger_mv)
    4941             : {
    4942             :     uint32_t idx;
    4943             :     uint32_t pu_index;
    4944             :     uint32_t block_index_shift_x;
    4945             :     uint32_t block_index_shift_y;
    4946             :     uint32_t src_block_index;
    4947             :     uint32_t posb_buffer_index;
    4948             :     uint32_t posh_buffer_index;
    4949             :     uint32_t posj_buffer_index;
    4950           0 :     if (context_ptr->fractional_search64x64)
    4951           0 :         half_pel_refinement_block(context_ptr,
    4952             :                                   &(refBuffer[0]),
    4953             :                                   ref_stride,
    4954             :                                   context_ptr->p_best_ssd64x64,
    4955             :                                   0,
    4956             :                                   &(pos_b_buffer[0]),
    4957             :                                   &(pos_h_buffer[0]),
    4958             :                                   &(pos_j_buffer[0]),
    4959             :                                   64,
    4960             :                                   64,
    4961             :                                   x_search_area_origin,
    4962             :                                   y_search_area_origin,
    4963             : #if OPTIMISED_EX_SUBPEL
    4964             :                                   search_area_height,
    4965             :                                   search_area_width,
    4966             : #endif
    4967             :                                   context_ptr->p_best_sad64x64,
    4968             :                                   context_ptr->p_best_mv64x64,
    4969             :                                   &context_ptr->psub_pel_direction64x64,
    4970             :                                   context_ptr->p_best_full_pel_mv64x64,
    4971             :                                   inetger_mv);
    4972             :     // 32x32 [4 partitions]
    4973           0 :     for (pu_index = 0; pu_index < 4; ++pu_index) {
    4974           0 :         block_index_shift_x = (pu_index & 0x01) << 5;
    4975           0 :         block_index_shift_y = (pu_index >> 1) << 5;
    4976           0 :         src_block_index = block_index_shift_x +
    4977           0 :                           block_index_shift_y * context_ptr->sb_src_stride;
    4978           0 :         posb_buffer_index =
    4979             :             block_index_shift_x +
    4980           0 :             block_index_shift_y * context_ptr->interpolated_stride;
    4981           0 :         posh_buffer_index =
    4982             :             block_index_shift_x +
    4983           0 :             block_index_shift_y * context_ptr->interpolated_stride;
    4984           0 :         posj_buffer_index =
    4985             :             block_index_shift_x +
    4986           0 :             block_index_shift_y * context_ptr->interpolated_stride;
    4987           0 :         half_pel_refinement_block(
    4988             :             context_ptr,
    4989           0 :             &(refBuffer[block_index_shift_y * ref_stride +
    4990             :                         block_index_shift_x]),
    4991             :             ref_stride,
    4992           0 :             &context_ptr->p_best_ssd32x32[pu_index],
    4993             :             src_block_index,
    4994             :             &(pos_b_buffer[posb_buffer_index]),
    4995             :             &(pos_h_buffer[posh_buffer_index]),
    4996             :             &(pos_j_buffer[posj_buffer_index]),
    4997             :             32,
    4998             :             32,
    4999             :             x_search_area_origin,
    5000             :             y_search_area_origin,
    5001             : #if OPTIMISED_EX_SUBPEL
    5002             :             search_area_height,
    5003             :             search_area_width,
    5004             : #endif
    5005           0 :             &context_ptr->p_best_sad32x32[pu_index],
    5006           0 :             &context_ptr->p_best_mv32x32[pu_index],
    5007             :             &context_ptr->psub_pel_direction32x32[pu_index],
    5008           0 :             &context_ptr->p_best_full_pel_mv32x32[pu_index],
    5009             :             inetger_mv);
    5010             :     }
    5011             :     // 16x16 [16 partitions]
    5012           0 :     for (pu_index = 0; pu_index < 16; ++pu_index) {
    5013           0 :         idx = tab16x16[pu_index];
    5014           0 :         block_index_shift_x = (pu_index & 0x03) << 4;
    5015           0 :         block_index_shift_y = (pu_index >> 2) << 4;
    5016           0 :         src_block_index = block_index_shift_x +
    5017           0 :                           block_index_shift_y * context_ptr->sb_src_stride;
    5018           0 :         posb_buffer_index =
    5019             :             block_index_shift_x +
    5020           0 :             block_index_shift_y * context_ptr->interpolated_stride;
    5021           0 :         posh_buffer_index =
    5022             :             block_index_shift_x +
    5023           0 :             block_index_shift_y * context_ptr->interpolated_stride;
    5024           0 :         posj_buffer_index =
    5025             :             block_index_shift_x +
    5026           0 :             block_index_shift_y * context_ptr->interpolated_stride;
    5027           0 :         half_pel_refinement_block(context_ptr,
    5028           0 :                                   &(refBuffer[block_index_shift_y * ref_stride +
    5029             :                                               block_index_shift_x]),
    5030             :                                   ref_stride,
    5031           0 :                                   &context_ptr->p_best_ssd16x16[idx],
    5032             :                                   src_block_index,
    5033             :                                   &(pos_b_buffer[posb_buffer_index]),
    5034             :                                   &(pos_h_buffer[posh_buffer_index]),
    5035             :                                   &(pos_j_buffer[posj_buffer_index]),
    5036             :                                   16,
    5037             :                                   16,
    5038             :                                   x_search_area_origin,
    5039             :                                   y_search_area_origin,
    5040             : #if OPTIMISED_EX_SUBPEL
    5041             :                                   search_area_height,
    5042             :                                   search_area_width,
    5043             : #endif
    5044           0 :                                   &context_ptr->p_best_sad16x16[idx],
    5045           0 :                                   &context_ptr->p_best_mv16x16[idx],
    5046             :                                   &context_ptr->psub_pel_direction16x16[idx],
    5047           0 :                                   &context_ptr->p_best_full_pel_mv16x16[idx],
    5048             :                                   inetger_mv);
    5049             :     }
    5050             :     // 8x8   [64 partitions]
    5051           0 :     for (pu_index = 0; pu_index < 64; ++pu_index) {
    5052           0 :         idx = tab8x8[pu_index];  // TODO bitwise this
    5053           0 :         block_index_shift_x = (pu_index & 0x07) << 3;
    5054           0 :         block_index_shift_y = (pu_index >> 3) << 3;
    5055           0 :         src_block_index = block_index_shift_x +
    5056           0 :                           block_index_shift_y * context_ptr->sb_src_stride;
    5057           0 :         posb_buffer_index =
    5058             :             block_index_shift_x +
    5059           0 :             block_index_shift_y * context_ptr->interpolated_stride;
    5060           0 :         posh_buffer_index =
    5061             :             block_index_shift_x +
    5062           0 :             block_index_shift_y * context_ptr->interpolated_stride;
    5063           0 :         posj_buffer_index =
    5064             :             block_index_shift_x +
    5065           0 :             block_index_shift_y * context_ptr->interpolated_stride;
    5066           0 :         half_pel_refinement_block(context_ptr,
    5067           0 :                                   &(refBuffer[block_index_shift_y * ref_stride +
    5068             :                                               block_index_shift_x]),
    5069             :                                   ref_stride,
    5070           0 :                                   &context_ptr->p_best_ssd8x8[idx],
    5071             :                                   src_block_index,
    5072             :                                   &(pos_b_buffer[posb_buffer_index]),
    5073             :                                   &(pos_h_buffer[posh_buffer_index]),
    5074             :                                   &(pos_j_buffer[posj_buffer_index]),
    5075             :                                   8,
    5076             :                                   8,
    5077             :                                   x_search_area_origin,
    5078             :                                   y_search_area_origin,
    5079             : #if OPTIMISED_EX_SUBPEL
    5080             :                                   search_area_height,
    5081             :                                   search_area_width,
    5082             : #endif
    5083           0 :                                   &context_ptr->p_best_sad8x8[idx],
    5084           0 :                                   &context_ptr->p_best_mv8x8[idx],
    5085             :                                   &context_ptr->psub_pel_direction8x8[idx],
    5086           0 :                                   &context_ptr->p_best_full_pel_mv8x8[idx],
    5087             :                                   inetger_mv);
    5088             :     }
    5089           0 :     if (picture_control_set_ptr->pic_depth_mode <= PIC_ALL_C_DEPTH_MODE) {
    5090             :         // 64x32
    5091           0 :         for (pu_index = 0; pu_index < 2; ++pu_index) {
    5092           0 :             block_index_shift_x = 0;
    5093           0 :             block_index_shift_y = pu_index << 5;
    5094           0 :             src_block_index = block_index_shift_x +
    5095           0 :                               block_index_shift_y * context_ptr->sb_src_stride;
    5096           0 :             posb_buffer_index =
    5097             :                 block_index_shift_x +
    5098           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5099           0 :             posh_buffer_index =
    5100             :                 block_index_shift_x +
    5101           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5102           0 :             posj_buffer_index =
    5103             :                 block_index_shift_x +
    5104           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5105           0 :             half_pel_refinement_block(
    5106             :                 context_ptr,
    5107           0 :                 &(refBuffer[block_index_shift_y * ref_stride +
    5108             :                             block_index_shift_x]),
    5109             :                 ref_stride,
    5110           0 :                 &context_ptr->p_best_ssd64x32[pu_index],
    5111             :                 src_block_index,
    5112             :                 &(pos_b_buffer[posb_buffer_index]),
    5113             :                 &(pos_h_buffer[posh_buffer_index]),
    5114             :                 &(pos_j_buffer[posj_buffer_index]),
    5115             :                 64,
    5116             :                 32,
    5117             :                 x_search_area_origin,
    5118             :                 y_search_area_origin,
    5119             : #if OPTIMISED_EX_SUBPEL
    5120             :                 search_area_height,
    5121             :                 search_area_width,
    5122             : #endif
    5123           0 :                 &context_ptr->p_best_sad64x32[pu_index],
    5124           0 :                 &context_ptr->p_best_mv64x32[pu_index],
    5125             :                 &context_ptr->psub_pel_direction64x32[pu_index],
    5126           0 :                 &context_ptr->p_best_full_pel_mv64x32[pu_index],
    5127             :                 inetger_mv);
    5128             :         }
    5129             :         // 32x16
    5130           0 :         for (pu_index = 0; pu_index < 8; ++pu_index) {
    5131           0 :             idx = tab32x16[pu_index];  // TODO bitwise this
    5132           0 :             block_index_shift_x = (pu_index & 0x01) << 5;
    5133           0 :             block_index_shift_y = (pu_index >> 1) << 4;
    5134           0 :             src_block_index = block_index_shift_x +
    5135           0 :                               block_index_shift_y * context_ptr->sb_src_stride;
    5136           0 :             posb_buffer_index =
    5137             :                 block_index_shift_x +
    5138           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5139           0 :             posh_buffer_index =
    5140             :                 block_index_shift_x +
    5141           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5142           0 :             posj_buffer_index =
    5143             :                 block_index_shift_x +
    5144           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5145           0 :             half_pel_refinement_block(
    5146             :                 context_ptr,
    5147           0 :                 &(refBuffer[block_index_shift_y * ref_stride +
    5148             :                             block_index_shift_x]),
    5149             :                 ref_stride,
    5150           0 :                 &context_ptr->p_best_ssd32x16[idx],
    5151             :                 src_block_index,
    5152             :                 &(pos_b_buffer[posb_buffer_index]),
    5153             :                 &(pos_h_buffer[posh_buffer_index]),
    5154             :                 &(pos_j_buffer[posj_buffer_index]),
    5155             :                 32,
    5156             :                 16,
    5157             :                 x_search_area_origin,
    5158             :                 y_search_area_origin,
    5159             : #if OPTIMISED_EX_SUBPEL
    5160             :                 search_area_height,
    5161             :                 search_area_width,
    5162             : #endif
    5163           0 :                 &context_ptr->p_best_sad32x16[idx],
    5164           0 :                 &context_ptr->p_best_mv32x16[idx],
    5165             :                 &context_ptr->psub_pel_direction32x16[idx],
    5166           0 :                 &context_ptr->p_best_full_pel_mv32x16[idx],
    5167             :                 inetger_mv);
    5168             :         }
    5169             :         // 16x8
    5170           0 :         for (pu_index = 0; pu_index < 32; ++pu_index) {
    5171           0 :             idx = tab16x8[pu_index];
    5172           0 :             block_index_shift_x = (pu_index & 0x03) << 4;
    5173           0 :             block_index_shift_y = (pu_index >> 2) << 3;
    5174           0 :             src_block_index = block_index_shift_x +
    5175           0 :                               block_index_shift_y * context_ptr->sb_src_stride;
    5176           0 :             posb_buffer_index =
    5177             :                 block_index_shift_x +
    5178           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5179           0 :             posh_buffer_index =
    5180             :                 block_index_shift_x +
    5181           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5182           0 :             posj_buffer_index =
    5183             :                 block_index_shift_x +
    5184           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5185           0 :             half_pel_refinement_block(
    5186             :                 context_ptr,
    5187           0 :                 &(refBuffer[block_index_shift_y * ref_stride +
    5188             :                             block_index_shift_x]),
    5189             :                 ref_stride,
    5190           0 :                 &context_ptr->p_best_ssd16x8[idx],
    5191             :                 src_block_index,
    5192             :                 &(pos_b_buffer[posb_buffer_index]),
    5193             :                 &(pos_h_buffer[posh_buffer_index]),
    5194             :                 &(pos_j_buffer[posj_buffer_index]),
    5195             :                 16,
    5196             :                 8,
    5197             :                 x_search_area_origin,
    5198             :                 y_search_area_origin,
    5199             : #if OPTIMISED_EX_SUBPEL
    5200             :                 search_area_height,
    5201             :                 search_area_width,
    5202             : #endif
    5203           0 :                 &context_ptr->p_best_sad16x8[idx],
    5204           0 :                 &context_ptr->p_best_mv16x8[idx],
    5205             :                 &context_ptr->psub_pel_direction16x8[idx],
    5206           0 :                 &context_ptr->p_best_full_pel_mv16x8[idx],
    5207             :                 inetger_mv);
    5208             :         }
    5209             :         // 32x64
    5210           0 :         for (pu_index = 0; pu_index < 2; ++pu_index) {
    5211           0 :             block_index_shift_x = pu_index << 5;
    5212           0 :             block_index_shift_y = 0;
    5213           0 :             src_block_index = block_index_shift_x +
    5214           0 :                               block_index_shift_y * context_ptr->sb_src_stride;
    5215           0 :             posb_buffer_index =
    5216             :                 block_index_shift_x +
    5217           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5218           0 :             posh_buffer_index =
    5219             :                 block_index_shift_x +
    5220           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5221           0 :             posj_buffer_index =
    5222             :                 block_index_shift_x +
    5223           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5224           0 :             half_pel_refinement_block(
    5225             :                 context_ptr,
    5226           0 :                 &(refBuffer[block_index_shift_y * ref_stride +
    5227             :                             block_index_shift_x]),
    5228             :                 ref_stride,
    5229           0 :                 &context_ptr->p_best_ssd32x64[pu_index],
    5230             :                 src_block_index,
    5231             :                 &(pos_b_buffer[posb_buffer_index]),
    5232             :                 &(pos_h_buffer[posh_buffer_index]),
    5233             :                 &(pos_j_buffer[posj_buffer_index]),
    5234             :                 32,
    5235             :                 64,
    5236             :                 x_search_area_origin,
    5237             :                 y_search_area_origin,
    5238             : #if OPTIMISED_EX_SUBPEL
    5239             :                 search_area_height,
    5240             :                 search_area_width,
    5241             : #endif
    5242           0 :                 &context_ptr->p_best_sad32x64[pu_index],
    5243           0 :                 &context_ptr->p_best_mv32x64[pu_index],
    5244             :                 &context_ptr->psub_pel_direction32x64[pu_index],
    5245           0 :                 &context_ptr->p_best_full_pel_mv32x64[pu_index],
    5246             :                 inetger_mv);
    5247             :         }
    5248             :         // 16x32
    5249           0 :         for (pu_index = 0; pu_index < 8; ++pu_index) {
    5250           0 :             idx = tab16x32[pu_index];
    5251           0 :             block_index_shift_x = (pu_index & 0x03) << 4;
    5252           0 :             block_index_shift_y = (pu_index >> 2) << 5;
    5253           0 :             src_block_index = block_index_shift_x +
    5254           0 :                               block_index_shift_y * context_ptr->sb_src_stride;
    5255           0 :             posb_buffer_index =
    5256             :                 block_index_shift_x +
    5257           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5258           0 :             posh_buffer_index =
    5259             :                 block_index_shift_x +
    5260           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5261           0 :             posj_buffer_index =
    5262             :                 block_index_shift_x +
    5263           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5264           0 :             half_pel_refinement_block(
    5265             :                 context_ptr,
    5266           0 :                 &(refBuffer[block_index_shift_y * ref_stride +
    5267             :                             block_index_shift_x]),
    5268             :                 ref_stride,
    5269           0 :                 &context_ptr->p_best_ssd16x32[idx],
    5270             :                 src_block_index,
    5271             :                 &(pos_b_buffer[posb_buffer_index]),
    5272             :                 &(pos_h_buffer[posh_buffer_index]),
    5273             :                 &(pos_j_buffer[posj_buffer_index]),
    5274             :                 16,
    5275             :                 32,
    5276             :                 x_search_area_origin,
    5277             :                 y_search_area_origin,
    5278             : #if OPTIMISED_EX_SUBPEL
    5279             :                 search_area_height,
    5280             :                 search_area_width,
    5281             : #endif
    5282           0 :                 &context_ptr->p_best_sad16x32[idx],
    5283           0 :                 &context_ptr->p_best_mv16x32[idx],
    5284             :                 &context_ptr->psub_pel_direction16x32[idx],
    5285           0 :                 &context_ptr->p_best_full_pel_mv16x32[idx],
    5286             :                 inetger_mv);
    5287             :         }
    5288             :         // 8x16
    5289           0 :         for (pu_index = 0; pu_index < 32; ++pu_index) {
    5290           0 :             idx = tab8x16[pu_index];
    5291           0 :             block_index_shift_x = (pu_index & 0x07) << 3;
    5292           0 :             block_index_shift_y = (pu_index >> 3) << 4;
    5293           0 :             src_block_index = block_index_shift_x +
    5294           0 :                               block_index_shift_y * context_ptr->sb_src_stride;
    5295           0 :             posb_buffer_index =
    5296             :                 block_index_shift_x +
    5297           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5298           0 :             posh_buffer_index =
    5299             :                 block_index_shift_x +
    5300           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5301           0 :             posj_buffer_index =
    5302             :                 block_index_shift_x +
    5303           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5304           0 :             half_pel_refinement_block(
    5305             :                 context_ptr,
    5306           0 :                 &(refBuffer[block_index_shift_y * ref_stride +
    5307             :                             block_index_shift_x]),
    5308             :                 ref_stride,
    5309           0 :                 &context_ptr->p_best_ssd8x16[idx],
    5310             :                 src_block_index,
    5311             :                 &(pos_b_buffer[posb_buffer_index]),
    5312             :                 &(pos_h_buffer[posh_buffer_index]),
    5313             :                 &(pos_j_buffer[posj_buffer_index]),
    5314             :                 8,
    5315             :                 16,
    5316             :                 x_search_area_origin,
    5317             :                 y_search_area_origin,
    5318             : #if OPTIMISED_EX_SUBPEL
    5319             :                 search_area_height,
    5320             :                 search_area_width,
    5321             : #endif
    5322           0 :                 &context_ptr->p_best_sad8x16[idx],
    5323           0 :                 &context_ptr->p_best_mv8x16[idx],
    5324             :                 &context_ptr->psub_pel_direction8x16[idx],
    5325           0 :                 &context_ptr->p_best_full_pel_mv8x16[idx],
    5326             :                 inetger_mv);
    5327             :         }
    5328             :         // 32x8
    5329           0 :         for (pu_index = 0; pu_index < 16; ++pu_index) {
    5330           0 :             idx = tab32x8[pu_index];
    5331           0 :             block_index_shift_x = (pu_index & 0x01) << 5;
    5332           0 :             block_index_shift_y = (pu_index >> 1) << 3;
    5333           0 :             src_block_index = block_index_shift_x +
    5334           0 :                               block_index_shift_y * context_ptr->sb_src_stride;
    5335           0 :             posb_buffer_index =
    5336             :                 block_index_shift_x +
    5337           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5338           0 :             posh_buffer_index =
    5339             :                 block_index_shift_x +
    5340           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5341           0 :             posj_buffer_index =
    5342             :                 block_index_shift_x +
    5343           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5344           0 :             half_pel_refinement_block(
    5345             :                 context_ptr,
    5346           0 :                 &(refBuffer[block_index_shift_y * ref_stride +
    5347             :                             block_index_shift_x]),
    5348             :                 ref_stride,
    5349           0 :                 &context_ptr->p_best_ssd32x8[idx],
    5350             :                 src_block_index,
    5351             :                 &(pos_b_buffer[posb_buffer_index]),
    5352             :                 &(pos_h_buffer[posh_buffer_index]),
    5353             :                 &(pos_j_buffer[posj_buffer_index]),
    5354             :                 32,
    5355             :                 8,
    5356             :                 x_search_area_origin,
    5357             :                 y_search_area_origin,
    5358             : #if OPTIMISED_EX_SUBPEL
    5359             :                 search_area_height,
    5360             :                 search_area_width,
    5361             : #endif
    5362           0 :                 &context_ptr->p_best_sad32x8[idx],
    5363           0 :                 &context_ptr->p_best_mv32x8[idx],
    5364             :                 &context_ptr->psub_pel_direction32x8[idx],
    5365           0 :                 &context_ptr->p_best_full_pel_mv32x8[idx],
    5366             :                 inetger_mv);
    5367             :         }
    5368           0 :         for (pu_index = 0; pu_index < 16; ++pu_index) {
    5369           0 :             idx = tab8x32[pu_index];
    5370           0 :             block_index_shift_x = (pu_index & 0x07) << 3;
    5371           0 :             block_index_shift_y = (pu_index >> 3) << 5;
    5372           0 :             src_block_index = block_index_shift_x +
    5373           0 :                               block_index_shift_y * context_ptr->sb_src_stride;
    5374           0 :             posb_buffer_index =
    5375             :                 block_index_shift_x +
    5376           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5377           0 :             posh_buffer_index =
    5378             :                 block_index_shift_x +
    5379           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5380           0 :             posj_buffer_index =
    5381             :                 block_index_shift_x +
    5382           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5383           0 :             half_pel_refinement_block(
    5384             :                 context_ptr,
    5385           0 :                 &(refBuffer[block_index_shift_y * ref_stride +
    5386             :                             block_index_shift_x]),
    5387             :                 ref_stride,
    5388           0 :                 &context_ptr->p_best_ssd8x32[idx],
    5389             :                 src_block_index,
    5390             :                 &(pos_b_buffer[posb_buffer_index]),
    5391             :                 &(pos_h_buffer[posh_buffer_index]),
    5392             :                 &(pos_j_buffer[posj_buffer_index]),
    5393             :                 8,
    5394             :                 32,
    5395             :                 x_search_area_origin,
    5396             :                 y_search_area_origin,
    5397             : #if OPTIMISED_EX_SUBPEL
    5398             :                 search_area_height,
    5399             :                 search_area_width,
    5400             : #endif
    5401           0 :                 &context_ptr->p_best_sad8x32[idx],
    5402           0 :                 &context_ptr->p_best_mv8x32[idx],
    5403             :                 &context_ptr->psub_pel_direction8x32[idx],
    5404           0 :                 &context_ptr->p_best_full_pel_mv8x32[idx],
    5405             :                 inetger_mv);
    5406             :         }
    5407           0 :         for (pu_index = 0; pu_index < 4; ++pu_index) {
    5408           0 :             idx = pu_index;
    5409           0 :             block_index_shift_x = 0;
    5410           0 :             block_index_shift_y = pu_index << 4;
    5411           0 :             src_block_index = block_index_shift_x +
    5412           0 :                               block_index_shift_y * context_ptr->sb_src_stride;
    5413           0 :             posb_buffer_index =
    5414             :                 block_index_shift_x +
    5415           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5416           0 :             posh_buffer_index =
    5417             :                 block_index_shift_x +
    5418           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5419           0 :             posj_buffer_index =
    5420             :                 block_index_shift_x +
    5421           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5422           0 :             half_pel_refinement_block(
    5423             :                 context_ptr,
    5424           0 :                 &(refBuffer[block_index_shift_y * ref_stride +
    5425             :                             block_index_shift_x]),
    5426             :                 ref_stride,
    5427           0 :                 &context_ptr->p_best_ssd64x16[idx],
    5428             :                 src_block_index,
    5429             :                 &(pos_b_buffer[posb_buffer_index]),
    5430             :                 &(pos_h_buffer[posh_buffer_index]),
    5431             :                 &(pos_j_buffer[posj_buffer_index]),
    5432             :                 64,
    5433             :                 16,
    5434             :                 x_search_area_origin,
    5435             :                 y_search_area_origin,
    5436             : #if OPTIMISED_EX_SUBPEL
    5437             :                 search_area_height,
    5438             :                 search_area_width,
    5439             : #endif
    5440           0 :                 &context_ptr->p_best_sad64x16[idx],
    5441           0 :                 &context_ptr->p_best_mv64x16[idx],
    5442             :                 &context_ptr->psub_pel_direction64x16[idx],
    5443           0 :                 &context_ptr->p_best_full_pel_mv64x16[idx],
    5444             :                 inetger_mv);
    5445             :         }
    5446           0 :         for (pu_index = 0; pu_index < 4; ++pu_index) {
    5447           0 :             idx = pu_index;
    5448           0 :             block_index_shift_x = pu_index << 4;
    5449           0 :             block_index_shift_y = 0;
    5450           0 :             src_block_index = block_index_shift_x +
    5451           0 :                               block_index_shift_y * context_ptr->sb_src_stride;
    5452           0 :             posb_buffer_index =
    5453             :                 block_index_shift_x +
    5454           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5455           0 :             posh_buffer_index =
    5456             :                 block_index_shift_x +
    5457           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5458           0 :             posj_buffer_index =
    5459             :                 block_index_shift_x +
    5460           0 :                 block_index_shift_y * context_ptr->interpolated_stride;
    5461           0 :             half_pel_refinement_block(
    5462             :                 context_ptr,
    5463           0 :                 &(refBuffer[block_index_shift_y * ref_stride +
    5464             :                             block_index_shift_x]),
    5465             :                 ref_stride,
    5466           0 :                 &context_ptr->p_best_ssd16x64[idx],
    5467             :                 src_block_index,
    5468             :                 &(pos_b_buffer[posb_buffer_index]),
    5469             :                 &(pos_h_buffer[posh_buffer_index]),
    5470             :                 &(pos_j_buffer[posj_buffer_index]),
    5471             :                 16,
    5472             :                 64,
    5473             :                 x_search_area_origin,
    5474             :                 y_search_area_origin,
    5475             : #if OPTIMISED_EX_SUBPEL
    5476             :                 search_area_height,
    5477             :                 search_area_width,
    5478             : #endif
    5479           0 :                 &context_ptr->p_best_sad16x64[idx],
    5480           0 :                 &context_ptr->p_best_mv16x64[idx],
    5481             :                 &context_ptr->psub_pel_direction16x64[idx],
    5482           0 :                 &context_ptr->p_best_full_pel_mv16x64[idx],
    5483             :                 inetger_mv);
    5484             :         }
    5485             :     }
    5486           0 :     return;
    5487             : }
    5488             : /*******************************************
    5489             :  * open_loop_me_half_pel_search_sblock
    5490             :  *******************************************/
    5491             : #if OPTIMISED_EX_SUBPEL
    5492           0 : static void open_loop_me_half_pel_search_sblock(
    5493             :     PictureParentControlSet *picture_control_set_ptr, MeContext *context_ptr,
    5494             :     uint32_t list_index, uint32_t ref_pic_index, int16_t x_search_area_origin,
    5495             :     int16_t y_search_area_origin, uint32_t search_area_width,
    5496             :     uint32_t search_area_height)
    5497             : {
    5498             : 
    5499           0 :     half_pel_refinement_sb(
    5500             :         picture_control_set_ptr,
    5501             :         context_ptr,
    5502             : #if M0_HIGH_PRECISION_INTERPOLATION
    5503             :         context_ptr->integer_buffer_ptr[list_index][ref_pic_index] +
    5504             :         (ME_FILTER_PAD_DISTANCE >> 1) +
    5505             :         ((ME_FILTER_PAD_DISTANCE >> 1) *
    5506             :             context_ptr
    5507             :             ->interpolated_full_stride[listIndex][ref_pic_index]),
    5508             :         context_ptr
    5509             :         ->interpolated_full_stride[list_index][ref_pic_index],
    5510             :         &(context_ptr->pos_b_buffer[list_index][ref_pic_index]
    5511             :             [(ME_FILTER_PAD_DISTANCE >> 1) *
    5512             :             context_ptr->interpolated_stride]),
    5513             : #else
    5514           0 :         context_ptr->integer_buffer_ptr[list_index][ref_pic_index] +
    5515           0 :         (ME_FILTER_TAP >> 1) +
    5516           0 :         ((ME_FILTER_TAP >> 1) *
    5517             :             context_ptr
    5518           0 :             ->interpolated_full_stride[list_index][ref_pic_index]),
    5519             :         context_ptr
    5520             :         ->interpolated_full_stride[list_index][ref_pic_index],
    5521           0 :         &(context_ptr->pos_b_buffer[list_index][ref_pic_index]
    5522           0 :             [(ME_FILTER_TAP >> 1) *
    5523           0 :             context_ptr->interpolated_stride]),
    5524             : #endif
    5525           0 :         &(context_ptr->pos_h_buffer[list_index][ref_pic_index][1]),
    5526             :         &(context_ptr->pos_j_buffer[list_index][ref_pic_index][0]),
    5527             :         x_search_area_origin,
    5528             :         y_search_area_origin,
    5529             :         search_area_height,
    5530             :         search_area_width,
    5531             :         0);
    5532           0 : }
    5533             : #else
    5534             : static void open_loop_me_half_pel_search_sblock(
    5535             :     PictureParentControlSet *picture_control_set_ptr, MeContext *context_ptr,
    5536             :     uint32_t list_index, uint32_t ref_pic_index, int16_t x_search_area_origin,
    5537             :     int16_t y_search_area_origin, uint32_t search_area_width,
    5538             :     uint32_t search_area_height)
    5539             : {
    5540             :     uint32_t search_index_x, search_index_y;
    5541             :     for (search_index_y = 0; search_index_y < search_area_height;
    5542             :          search_index_y++) {
    5543             :         for (search_index_x = 0; search_index_x < search_area_width;
    5544             :              search_index_x++) {
    5545             :             int32_t mvx = (int32_t)search_index_y + x_search_area_origin;
    5546             :             int32_t mvy = (int32_t)search_index_x + y_search_area_origin;
    5547             :             uint32_t inetger_mv1 = (((uint16_t)mvy) << 18);
    5548             :             uint16_t inetger_mv2 = (((uint16_t)mvx << 2));
    5549             :             uint32_t inetger_mv = inetger_mv1 | inetger_mv2;
    5550             :             half_pel_refinement_sb(
    5551             :                 picture_control_set_ptr,
    5552             :                 context_ptr,
    5553             : #if M0_HIGH_PRECISION_INTERPOLATION
    5554             :                 context_ptr->integer_buffer_ptr[list_index][ref_pic_index] +
    5555             :                     (ME_FILTER_PAD_DISTANCE >> 1) +
    5556             :                     ((ME_FILTER_PAD_DISTANCE >> 1) *
    5557             :                      context_ptr
    5558             :                          ->interpolated_full_stride[listIndex][ref_pic_index]),
    5559             :                 context_ptr
    5560             :                     ->interpolated_full_stride[list_index][ref_pic_index],
    5561             :                 &(context_ptr->pos_b_buffer[list_index][ref_pic_index]
    5562             :                                            [(ME_FILTER_PAD_DISTANCE >> 1) *
    5563             :                                             context_ptr->interpolated_stride]),
    5564             : #else
    5565             :                 context_ptr->integer_buffer_ptr[list_index][ref_pic_index] +
    5566             :                     (ME_FILTER_TAP >> 1) +
    5567             :                     ((ME_FILTER_TAP >> 1) *
    5568             :                      context_ptr
    5569             :                          ->interpolated_full_stride[list_index][ref_pic_index]),
    5570             :                 context_ptr
    5571             :                     ->interpolated_full_stride[list_index][ref_pic_index],
    5572             :                 &(context_ptr->pos_b_buffer[list_index][ref_pic_index]
    5573             :                                            [(ME_FILTER_TAP >> 1) *
    5574             :                                             context_ptr->interpolated_stride]),
    5575             : #endif
    5576             :                 &(context_ptr->pos_h_buffer[list_index][ref_pic_index][1]),
    5577             :                 &(context_ptr->pos_j_buffer[list_index][ref_pic_index][0]),
    5578             :                 x_search_area_origin,
    5579             :                 y_search_area_origin,
    5580             :                 inetger_mv);
    5581             :         }
    5582             :     }
    5583             : }
    5584             : #endif
    5585             : static void quarter_pel_refinement_sb(
    5586             :     MeContext
    5587             :         *context_ptr,  //[IN/OUT]  ME context Ptr, used to get/update ME results
    5588             :     uint8_t *pos_full,     //[IN]
    5589             :     uint32_t full_stride,  //[IN]
    5590             :     uint8_t *pos_b,        //[IN]
    5591             :     uint8_t *pos_h,        //[IN]
    5592             :     uint8_t *pos_j,        //[IN]
    5593             :     int16_t
    5594             :         x_search_area_origin,  //[IN] search area origin in the horizontal
    5595             :                                // direction, used to point to reference samples
    5596             :     int16_t
    5597             :         y_search_area_origin,  //[IN] search area origin in the vertical
    5598             :                                // direction, used to point to reference samples
    5599             :     uint32_t integer_mv);
    5600             : 
    5601             : /*******************************************
    5602             :  * open_loop_me_quarter_pel_search_sblock
    5603             :  *******************************************/
    5604           0 : static void open_loop_me_quarter_pel_search_sblock(
    5605             :     MeContext *context_ptr,
    5606             :     uint32_t list_index, uint32_t ref_pic_index, int16_t x_search_area_origin,
    5607             :     int16_t y_search_area_origin, uint32_t search_area_width,
    5608             :     uint32_t search_area_height)
    5609             : {
    5610             :     uint32_t search_index_x, search_index_y;
    5611           0 :     for (search_index_y = 0; search_index_y < search_area_height;
    5612           0 :          search_index_y++) {
    5613           0 :         for (search_index_x = 0; search_index_x < search_area_width;
    5614           0 :              search_index_x++) {
    5615           0 :             int32_t mvx = (int32_t)search_index_x + x_search_area_origin;
    5616           0 :             int32_t mvy = (int32_t)search_index_y + y_search_area_origin;
    5617           0 :             uint32_t mv1 = (((uint16_t)mvy) << 18);
    5618           0 :             uint16_t mv2 = (((uint16_t)mvx << 2));
    5619           0 :             uint32_t mv0 = mv1 | mv2;
    5620           0 :             int16_t x_mv = _MVXT(mv0);
    5621           0 :             int16_t y_mv = _MVYT(mv0);
    5622           0 :             uint32_t inetger_mv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    5623           0 :             quarter_pel_refinement_sb(
    5624             :                 context_ptr,
    5625             : #if M0_HIGH_PRECISION_INTERPOLATION
    5626             :                 context_ptr->integer_buffer_ptr[listIndex][ref_pic_index] +
    5627             :                     (ME_FILTER_PAD_DISTANCE >> 1) +
    5628             :                     ((ME_FILTER_PAD_DISTANCE >> 1) *
    5629             :                      context_ptr
    5630             :                          ->interpolated_full_stride[listIndex][ref_pic_index]),
    5631             :                 context_ptr->interpolated_full_stride[listIndex][ref_pic_index],
    5632             :                 &(context_ptr->pos_b_buffer
    5633             :                       [listIndex][ref_pic_index]
    5634             :                       [(ME_FILTER_PAD_DISTANCE >> 1) *
    5635             :                        context_ptr->interpolated_stride]),  // points to b
    5636             :                                                             // position of the
    5637             :                                                             // figure above
    5638             : #else
    5639           0 :                 context_ptr->integer_buffer_ptr[list_index][ref_pic_index] +
    5640           0 :                     (ME_FILTER_TAP >> 1) +
    5641           0 :                     ((ME_FILTER_TAP >> 1) *
    5642             :                      context_ptr
    5643           0 :                          ->interpolated_full_stride[list_index][ref_pic_index]),
    5644             :                 context_ptr
    5645             :                     ->interpolated_full_stride[list_index][ref_pic_index],
    5646             :                 &(context_ptr->pos_b_buffer
    5647           0 :                       [list_index][ref_pic_index]
    5648           0 :                       [(ME_FILTER_TAP >> 1) *
    5649           0 :                        context_ptr->interpolated_stride]),  // points to b
    5650             :                                                             // position of the
    5651             :                                                             // figure above
    5652             : #endif
    5653           0 :                 &(context_ptr->pos_h_buffer[list_index][ref_pic_index]
    5654             :                                            [1]),  // points to h position of the
    5655             :                                                   // figure above
    5656             :                 &(context_ptr->pos_j_buffer[list_index][ref_pic_index]
    5657             :                                            [0]),  // points to j position of the
    5658             :                                                   // figure above
    5659             :                 x_search_area_origin,
    5660             :                 y_search_area_origin,
    5661             :                 inetger_mv);
    5662             :         }
    5663             :     }
    5664           0 : }
    5665             : /*******************************************
    5666             :  * open_loop_me_fullpel_search_sblock
    5667             :  *******************************************/
    5668           0 : static void open_loop_me_fullpel_search_sblock(
    5669             :     MeContext *context_ptr, uint32_t listIndex,
    5670             :     uint32_t ref_pic_index,
    5671             :     int16_t x_search_area_origin, int16_t y_search_area_origin,
    5672             :     uint32_t search_area_width, uint32_t search_area_height)
    5673             : {
    5674             :     uint32_t xSearchIndex, ySearchIndex;
    5675           0 :     uint32_t searchAreaWidthRest8 = search_area_width & 7;
    5676           0 :     uint32_t searchAreaWidthMult8 = search_area_width - searchAreaWidthRest8;
    5677             : 
    5678           0 :     for (ySearchIndex = 0; ySearchIndex < search_area_height; ySearchIndex++) {
    5679           0 :         for (xSearchIndex = 0; xSearchIndex < searchAreaWidthMult8;
    5680           0 :              xSearchIndex += 8) {
    5681             :             // this function will do:  xSearchIndex, +1, +2, ..., +7
    5682           0 :             open_loop_me_get_eight_search_point_results_block(
    5683             :                 context_ptr,
    5684             :                 listIndex,
    5685             :                 ref_pic_index,
    5686             :                 xSearchIndex +
    5687           0 :                     ySearchIndex *
    5688             :                         context_ptr->interpolated_full_stride[listIndex]
    5689           0 :                                                              [ref_pic_index],
    5690           0 :                 (int32_t)xSearchIndex + x_search_area_origin,
    5691           0 :                 (int32_t)ySearchIndex + y_search_area_origin);
    5692             :         }
    5693             : 
    5694           0 :         for (xSearchIndex = searchAreaWidthMult8;
    5695             :              xSearchIndex < search_area_width;
    5696           0 :              xSearchIndex++) {
    5697             : 
    5698           0 :             open_loop_me_get_search_point_results_block(
    5699             :                 context_ptr,
    5700             :                 listIndex,
    5701             :                 ref_pic_index,
    5702             :                 xSearchIndex +
    5703           0 :                     ySearchIndex *
    5704             :                         context_ptr->interpolated_full_stride[listIndex]
    5705           0 :                                                              [ref_pic_index],
    5706           0 :                 (int32_t)xSearchIndex + x_search_area_origin,
    5707           0 :                 (int32_t)ySearchIndex + y_search_area_origin);
    5708             :         }
    5709             :     }
    5710           0 : }
    5711             : 
    5712             : #ifndef AVCCODEL
    5713             : /*******************************************
    5714             :  * HorizontalPelInterpolation
    5715             :  *   interpolates the search region in the horizontal direction
    5716             :  *******************************************/
    5717             : static void HorizontalPelInterpolation(
    5718             :     uint8_t *src,         // input parameter, input samples Ptr
    5719             :     uint32_t src_stride,  // input parameter, input stride
    5720             :     uint32_t width,       // input parameter, input area width
    5721             :     uint32_t height,      // input parameter, input area height
    5722             :     const int32_t
    5723             :         *ifCoeff,  // input parameter, interpolation filter coefficients Ptr
    5724             :     uint32_t inputBitDepth,  // input parameter, input sample bit depth
    5725             :     uint32_t dst_stride,     // input parameter, output stride
    5726             :     uint8_t *dst)            // output parameter, interpolated samples Ptr
    5727             : {
    5728             :     uint32_t x, y;
    5729             :     const int32_t maxSampleValue = (1 << inputBitDepth) - 1;
    5730             :     const int32_t ifOffset = 1 << (IFShift - 1);
    5731             :     for (y = 0; y < height; ++y) {
    5732             :         for (x = 0; x < width; ++x) {
    5733             :             dst[x] = (uint8_t)CLIP3(
    5734             :                 0,
    5735             :                 (int32_t)maxSampleValue,
    5736             :                 ((((int32_t)src[x] + (int32_t)src[x + 3]) * ifCoeff[0] +
    5737             :                   ((int32_t)src[x + 1] + (int32_t)src[x + 2]) * ifCoeff[1] +
    5738             :                   ifOffset) >>
    5739             :                  IFShift));
    5740             :         }
    5741             :         src += src_stride;
    5742             :         dst += dst_stride;
    5743             :     }
    5744             : 
    5745             :     return;
    5746             : }
    5747             : 
    5748             : /*******************************************
    5749             :  * VerticalPelInterpolation
    5750             :  *   interpolates the serach region in the vertical direction
    5751             :  *******************************************/
    5752             : static void VerticalPelInterpolation(
    5753             :     uint8_t *src,         // input parameter, input samples ptr
    5754             :     uint32_t src_stride,  // input parameter, input stride
    5755             :     uint32_t width,       // input parameter, input area width
    5756             :     uint32_t height,      // input parameter, input area height
    5757             :     const int32_t
    5758             :         ifCoeff[4],  // input parameter, interpolation filter coefficients Ptr
    5759             :     uint32_t inputBitDepth,  // input parameter, input sample bit depth
    5760             :     uint32_t dst_stride,     // input parameter, output stride
    5761             :     uint8_t *dst)            // output parameter, interpolated samples Ptr
    5762             : {
    5763             :     uint32_t x, y;
    5764             : 
    5765             :     const int32_t maxSampleValue = (1 << inputBitDepth) - 1;
    5766             :     const int32_t ifOffset = 1 << (IFShift - 1);
    5767             : 
    5768             :     const uint32_t srcStride2 = src_stride << 1;
    5769             :     const uint32_t srcStride3 = srcStride2 + src_stride;
    5770             : 
    5771             :     for (y = 0; y < height; y++) {
    5772             :         for (x = 0; x < width; x++) {
    5773             :             dst[x] = (uint8_t)CLIP3(
    5774             :                 0,
    5775             :                 maxSampleValue,
    5776             :                 ((((int32_t)src[x] + (int32_t)src[x + srcStride3]) *
    5777             :                       ifCoeff[0] +
    5778             :                   ((int32_t)src[x + src_stride] +
    5779             :                    (int32_t)src[x + srcStride2]) *
    5780             :                       ifCoeff[1] +
    5781             :                   ifOffset) >>
    5782             :                  IFShift));
    5783             :         }
    5784             :         src += src_stride;
    5785             :         dst += dst_stride;
    5786             :     }
    5787             : 
    5788             :     return;
    5789             : }
    5790             : 
    5791             : /*******************************************
    5792             :  * AvcStyleInterpolation
    5793             :  *   interpolates the search region in the horizontal direction
    5794             :  *******************************************/
    5795             : static void AvcStyleInterpolation(
    5796             :     uint8_t *srcOne,         // input parameter, input samples Ptr
    5797             :     uint32_t srcOneStride,   // input parameter, input stride
    5798             :     uint8_t *srcTwo,         // input parameter, input samples Ptr
    5799             :     uint32_t srcTwoStride,   // input parameter, input stride
    5800             :     uint32_t width,          // input parameter, input area width
    5801             :     uint32_t height,         // input parameter, input area height
    5802             :     uint32_t inputBitDepth,  // input parameter, input sample bit depth
    5803             :     uint32_t dst_stride,     // input parameter, output stride
    5804             :     uint8_t *dst)            // output parameter, interpolated samples Ptr
    5805             : {
    5806             :     uint32_t x, y;
    5807             :     int32_t maxSampleValue = POW2(inputBitDepth) - 1;
    5808             : 
    5809             :     for (y = 0; y < height; ++y) {
    5810             :         for (x = 0; x < width; ++x) {
    5811             :             dst[x] =
    5812             :                 (uint8_t)CLIP3(0,
    5813             :                                (int32_t)maxSampleValue,
    5814             :                                (((int32_t)srcOne[x] + (int32_t)srcTwo[x] + 1) >>
    5815             :                                 IFShiftAvcStyle));
    5816             :         }
    5817             :         srcOne += srcOneStride;
    5818             :         srcTwo += srcTwoStride;
    5819             :         dst += dst_stride;
    5820             :     }
    5821             : 
    5822             :     return;
    5823             : }
    5824             : #endif
    5825             : /*******************************************
    5826             :  * InterpolateSearchRegion AVC
    5827             :  *   interpolates the search area
    5828             :  *   the whole search area is interpolated 15 times
    5829             :  *   for each sub position an interpolation is done
    5830             :  *   15 buffers are required for the storage of the interpolated samples.
    5831             :  *   F0: {-4, 54, 16, -2}
    5832             :  *   F1: {-4, 36, 36, -4}
    5833             :  *   F2: {-2, 16, 54, -4}
    5834             :  ********************************************/
    5835           0 : void InterpolateSearchRegionAVC(
    5836             :     MeContext *context_ptr,  // input/output parameter, ME context ptr, used to
    5837             :                              // get/set interpolated search area Ptr
    5838             :     uint32_t listIndex,      // Refrence picture list index
    5839             :     uint32_t ref_pic_index,
    5840             :     uint8_t *searchRegionBuffer,  // input parameter, search region index, used
    5841             :                                   // to point to reference samples
    5842             :     uint32_t lumaStride,          // input parameter, reference Picture stride
    5843             :     uint32_t search_area_width,   // input parameter, search area width
    5844             :     uint32_t search_area_height,  // input parameter, search area height
    5845             :     uint32_t inputBitDepth)       // input parameter, input sample bit depth
    5846             : {
    5847             :     //      0    1    2    3
    5848             :     // 0    A    a    b    c
    5849             :     // 1    d    e    f    g
    5850             :     // 2    h    i    j    k
    5851             :     // 3    n    p    q    r
    5852             : 
    5853             :     // Position  Frac-pos Y  Frac-pos X  Horizontal filter  Vertical filter
    5854             :     // A         0           0           -                  -
    5855             :     // a         0           1           F0                 -
    5856             :     // b         0           2           F1                 -
    5857             :     // c         0           3           F2                 -
    5858             :     // d         1           0           -                  F0
    5859             :     // e         1           1           F0                 F0
    5860             :     // f         1           2           F1                 F0
    5861             :     // g         1           3           F2                 F0
    5862             :     // h         2           0           -                  F1
    5863             :     // i         2           1           F0                 F1
    5864             :     // j         2           2           F1                 F1
    5865             :     // k         2           3           F2                 F1
    5866             :     // n         3           0           -                  F2
    5867             :     // p         3           1           F0                 F2
    5868             :     // q         3           2           F1                 F2
    5869             :     // r         3           3           F2                 F2
    5870             : 
    5871             :     // Start a b c
    5872             : 
    5873             :     // The Search area needs to be a multiple of 8 to align with the ASM kernel
    5874             :     // Also the search area must be oversized by 2 to account for edge
    5875             :     // conditions
    5876           0 :     uint32_t searchAreaWidthForAsm = ROUND_UP_MUL_8(search_area_width + 2);
    5877             : 
    5878             : #ifdef AVCCODEL
    5879             : 
    5880             :     (void)inputBitDepth;
    5881             :     // Half pel interpolation of the search region using f1 -> pos_b_buffer
    5882           0 :     if (searchAreaWidthForAsm) {
    5883           0 :         avc_style_luma_interpolation_filter(
    5884           0 :             searchRegionBuffer - (ME_FILTER_TAP >> 1) * lumaStride -
    5885           0 :                 (ME_FILTER_TAP >> 1) + 1,
    5886             :             lumaStride,
    5887             :             context_ptr->pos_b_buffer[listIndex][ref_pic_index],
    5888             :             context_ptr->interpolated_stride,
    5889             :             searchAreaWidthForAsm,
    5890             :             search_area_height + ME_FILTER_TAP,
    5891             :             context_ptr->avctemp_buffer,
    5892             :             EB_FALSE,
    5893             :             2,
    5894             :             2);
    5895             :     }
    5896             : 
    5897             :     // Half pel interpolation of the search region using f1 -> pos_h_buffer
    5898           0 :     if (searchAreaWidthForAsm) {
    5899           0 :         avc_style_luma_interpolation_filter(
    5900           0 :             searchRegionBuffer - (ME_FILTER_TAP >> 1) * lumaStride - 1 +
    5901             :                 lumaStride,
    5902             :             lumaStride,
    5903             :             context_ptr->pos_h_buffer[listIndex][ref_pic_index],
    5904             :             context_ptr->interpolated_stride,
    5905             :             searchAreaWidthForAsm,
    5906             :             search_area_height + 1,
    5907             :             context_ptr->avctemp_buffer,
    5908             :             EB_FALSE,
    5909             :             2,
    5910             :             8);
    5911             :     }
    5912             : 
    5913           0 :     if (searchAreaWidthForAsm) {
    5914             :         // Half pel interpolation of the search region using f1 -> pos_j_buffer
    5915           0 :         avc_style_luma_interpolation_filter(
    5916           0 :             context_ptr->pos_b_buffer[listIndex][ref_pic_index] +
    5917           0 :                 context_ptr->interpolated_stride,
    5918             :             context_ptr->interpolated_stride,
    5919             :             context_ptr->pos_j_buffer[listIndex][ref_pic_index],
    5920             :             context_ptr->interpolated_stride,
    5921             :             searchAreaWidthForAsm,
    5922             :             search_area_height + 1,
    5923             :             context_ptr->avctemp_buffer,
    5924             :             EB_FALSE,
    5925             :             2,
    5926             :             8);
    5927             :     }
    5928             : 
    5929             : #else
    5930             : 
    5931             :     // Half pel interpolation of the search region using f1 -> pos_b_buffer
    5932             :     HorizontalPelInterpolation(searchRegionBuffer -
    5933             :                                    (ME_FILTER_TAP >> 1) * lumaStride -
    5934             :                                    (ME_FILTER_TAP >> 1),
    5935             :                                lumaStride,
    5936             :                                search_area_width + 1,
    5937             :                                search_area_height + ME_FILTER_TAP,
    5938             :                                &(me_if_coeff[F1][0]),
    5939             :                                inputBitDepth,
    5940             :                                context_ptr->interpolated_stride,
    5941             :                                context_ptr->pos_b_buffer);
    5942             : 
    5943             :     // Half pel interpolation of the search region using f1 -> pos_h_buffer
    5944             :     VerticalPelInterpolation(
    5945             :         searchRegionBuffer - (ME_FILTER_TAP >> 1) * lumaStride - 1,
    5946             :         lumaStride,
    5947             :         search_area_width + 2,
    5948             :         search_area_height + 1,
    5949             :         &(me_if_coeff[F1][0]),
    5950             :         inputBitDepth,
    5951             :         context_ptr->interpolated_stride,
    5952             :         context_ptr->pos_h_buffer);
    5953             : 
    5954             :     // Half pel interpolation of the search region using f1 -> pos_j_buffer
    5955             :     VerticalPelInterpolation(context_ptr->pos_b_buffer,
    5956             :                              context_ptr->interpolated_stride,
    5957             :                              search_area_width + 1,
    5958             :                              search_area_height + 1,
    5959             :                              &(me_if_coeff[F1][0]),
    5960             :                              inputBitDepth,
    5961             :                              context_ptr->interpolated_stride,
    5962             :                              context_ptr->pos_j_buffer);
    5963             : 
    5964             : #endif
    5965             : 
    5966           0 :     return;
    5967             : }
    5968             : 
    5969             : /*******************************************
    5970             :  * InterpolateSearchRegion AVC
    5971             :  *   interpolates the search area
    5972             :  *   the whole search area is interpolated 15 times
    5973             :  *   for each sub position an interpolation is done
    5974             :  *   15 buffers are required for the storage of the interpolated samples.
    5975             :  *   F0: {-4, 54, 16, -2}
    5976             :  *   F1: {-4, 36, 36, -4}
    5977             :  *   F2: {-2, 16, 54, -4}
    5978             :  ********************************************/
    5979           0 : void interpolate_search_region_AVC_chroma(
    5980             :     MeContext *context_ptr,  // input/output parameter, ME context ptr, used to
    5981             :                              // get/set interpolated search area Ptr
    5982             :     uint8_t *search_region_buffer_cb,  // input parameter, search region buffer
    5983             :                                        // cb, used to point to reference samples
    5984             :     uint8_t *search_region_buffer_cr,  // input parameter, search region buffer
    5985             :                                        // cr, used to point to reference samples
    5986             :     uint8_t **pos_b_buffer_ch, uint8_t **pos_h_buffer_ch,
    5987             :     uint8_t **pos_j_buffer_ch, uint32_t interpolated_stride_ch,
    5988             :     uint32_t interpolated_full_stride_ch,  // input parameter, reference Picture
    5989             :                                            // stride
    5990             :     uint32_t search_area_width,            // input parameter, search area width
    5991             :     uint32_t search_area_height,  // input parameter, search area height
    5992             :     uint32_t input_bit_depth)     // input parameter, input sample bit depth
    5993             : {
    5994             :     //      0    1    2    3
    5995             :     // 0    A    a    b    c
    5996             :     // 1    d    e    f    g
    5997             :     // 2    h    i    j    k
    5998             :     // 3    n    p    q    r
    5999             : 
    6000             :     // Position  Frac-pos Y  Frac-pos X  Horizontal filter  Vertical filter
    6001             :     // A         0           0           -                  -
    6002             :     // a         0           1           F0                 -
    6003             :     // b         0           2           F1                 -
    6004             :     // c         0           3           F2                 -
    6005             :     // d         1           0           -                  F0
    6006             :     // e         1           1           F0                 F0
    6007             :     // f         1           2           F1                 F0
    6008             :     // g         1           3           F2                 F0
    6009             :     // h         2           0           -                  F1
    6010             :     // i         2           1           F0                 F1
    6011             :     // j         2           2           F1                 F1
    6012             :     // k         2           3           F2                 F1
    6013             :     // n         3           0           -                  F2
    6014             :     // p         3           1           F0                 F2
    6015             :     // q         3           2           F1                 F2
    6016             :     // r         3           3           F2                 F2
    6017             : 
    6018             :     // Start a b c
    6019             : 
    6020             :     // The Search area needs to be a multiple of 8 to align with the ASM kernel
    6021             :     // Also the search area must be oversized by 2 to account for edge
    6022             :     // conditions
    6023           0 :     uint32_t searchAreaWidthForAsm = ROUND_UP_MUL_8(search_area_width + 2);
    6024             : 
    6025             :     (void)input_bit_depth;
    6026             :     // Half pel interpolation of the search region using f1 -> pos_b_buffer
    6027           0 :     if (searchAreaWidthForAsm) {
    6028             :         // Cb
    6029           0 :         avc_style_luma_interpolation_filter(
    6030             :             search_region_buffer_cb -
    6031           0 :                 (ME_FILTER_TAP >> 1) * interpolated_full_stride_ch -
    6032           0 :                 (ME_FILTER_TAP >> 1) + 1,
    6033             :             interpolated_full_stride_ch,
    6034             :             pos_b_buffer_ch[0],
    6035             :             interpolated_stride_ch,
    6036             :             searchAreaWidthForAsm,
    6037             :             search_area_height + ME_FILTER_TAP,
    6038             :             context_ptr->avctemp_buffer,
    6039             :             EB_FALSE,
    6040             :             2,
    6041             :             2);
    6042             :         // Cr
    6043           0 :         avc_style_luma_interpolation_filter(
    6044             :             search_region_buffer_cr -
    6045           0 :                 (ME_FILTER_TAP >> 1) * interpolated_full_stride_ch -
    6046           0 :                 (ME_FILTER_TAP >> 1) + 1,
    6047             :             interpolated_full_stride_ch,
    6048           0 :             pos_b_buffer_ch[1],
    6049             :             interpolated_stride_ch,
    6050             :             searchAreaWidthForAsm,
    6051             :             search_area_height + ME_FILTER_TAP,
    6052             :             context_ptr->avctemp_buffer,
    6053             :             EB_FALSE,
    6054             :             2,
    6055             :             2);
    6056             :     }
    6057             : 
    6058             :     // Half pel interpolation of the search region using f1 -> pos_h_buffer
    6059           0 :     if (searchAreaWidthForAsm) {
    6060             :         // Cb
    6061           0 :         avc_style_luma_interpolation_filter(
    6062             :             search_region_buffer_cb -
    6063           0 :                 (ME_FILTER_TAP >> 1) * interpolated_full_stride_ch - 1 +
    6064             :                 interpolated_full_stride_ch,
    6065             :             interpolated_full_stride_ch,
    6066             :             pos_h_buffer_ch[0],
    6067             :             interpolated_stride_ch,
    6068             :             searchAreaWidthForAsm,
    6069             :             search_area_height + 1,
    6070             :             context_ptr->avctemp_buffer,
    6071             :             EB_FALSE,
    6072             :             2,
    6073             :             8);
    6074             :         // Cr
    6075           0 :         avc_style_luma_interpolation_filter(
    6076             :             search_region_buffer_cr -
    6077           0 :                 (ME_FILTER_TAP >> 1) * interpolated_full_stride_ch - 1 +
    6078             :                 interpolated_full_stride_ch,
    6079             :             interpolated_full_stride_ch,
    6080           0 :             pos_h_buffer_ch[1],
    6081             :             interpolated_stride_ch,
    6082             :             searchAreaWidthForAsm,
    6083             :             search_area_height + 1,
    6084             :             context_ptr->avctemp_buffer,
    6085             :             EB_FALSE,
    6086             :             2,
    6087             :             8);
    6088             :     }
    6089             : 
    6090             :     // Half pel interpolation of the search region using f1 -> pos_j_buffer
    6091           0 :     if (searchAreaWidthForAsm) {
    6092             :         // Cb
    6093           0 :         avc_style_luma_interpolation_filter(
    6094           0 :             pos_b_buffer_ch[0] + interpolated_stride_ch,
    6095             :             interpolated_stride_ch,
    6096             :             pos_j_buffer_ch[0],
    6097             :             interpolated_stride_ch,
    6098             :             searchAreaWidthForAsm,
    6099             :             search_area_height + 1,
    6100             :             context_ptr->avctemp_buffer,
    6101             :             EB_FALSE,
    6102             :             2,
    6103             :             8);
    6104             :         // Cr
    6105           0 :         avc_style_luma_interpolation_filter(
    6106           0 :             pos_b_buffer_ch[1] + interpolated_stride_ch,
    6107             :             interpolated_stride_ch,
    6108           0 :             pos_j_buffer_ch[1],
    6109             :             interpolated_stride_ch,
    6110             :             searchAreaWidthForAsm,
    6111             :             search_area_height + 1,
    6112             :             context_ptr->avctemp_buffer,
    6113             :             EB_FALSE,
    6114             :             2,
    6115             :             8);
    6116             :     }
    6117           0 : }
    6118             : 
    6119             : /*******************************************
    6120             :  * PU_HalfPelRefinement
    6121             :  *   performs Half Pel refinement for one PU
    6122             :  *******************************************/
    6123           0 : static void PU_HalfPelRefinement(
    6124             :     SequenceControlSet
    6125             :         *sequence_control_set_ptr,  // input parameter, Sequence control set Ptr
    6126             :     MeContext
    6127             :         *context_ptr,  // input parameter, ME context Ptr, used to get SB Ptr
    6128             :     uint8_t *refBuffer, uint32_t ref_stride, uint32_t *pBestSsd,
    6129             :     uint32_t puLcuBufferIndex,  // input parameter, PU origin, used to point to
    6130             :                                 // source samples
    6131             :     uint8_t *pos_b_buffer,  // input parameter, position "b" interpolated search
    6132             :                             // area Ptr
    6133             :     uint8_t *pos_h_buffer,  // input parameter, position "h" interpolated search
    6134             :                             // area Ptr
    6135             :     uint8_t *pos_j_buffer,  // input parameter, position "j" interpolated search
    6136             :                             // area Ptr
    6137             :     uint32_t pu_width,      // input parameter, PU width
    6138             :     uint32_t pu_height,     // input parameter, PU height
    6139             :     int16_t x_search_area_origin,  // input parameter, search area origin in the
    6140             :                                    // horizontal direction, used to point to
    6141             :                                    // reference samples
    6142             :     int16_t y_search_area_origin,  // input parameter, search area origin in the
    6143             :                                    // vertical direction, used to point to
    6144             :                                    // reference samples
    6145             :     uint32_t *pBestSad, uint32_t *pBestMV,
    6146             :     uint8_t *psubPelDirection)
    6147             : {
    6148           0 :     EncodeContext *encode_context_ptr =
    6149             :         sequence_control_set_ptr->encode_context_ptr;
    6150             : 
    6151             :     int32_t searchRegionIndex;
    6152           0 :     uint64_t bestHalfSad = 0;
    6153           0 :     uint64_t distortionLeftPosition = 0;
    6154           0 :     uint64_t distortionRightPosition = 0;
    6155           0 :     uint64_t distortionTopPosition = 0;
    6156           0 :     uint64_t distortionBottomPosition = 0;
    6157           0 :     uint64_t distortionTopLeftPosition = 0;
    6158           0 :     uint64_t distortionTopRightPosition = 0;
    6159           0 :     uint64_t distortionBottomLeftPosition = 0;
    6160           0 :     uint64_t distortionBottomRightPosition = 0;
    6161             : 
    6162             :     int16_t xMvHalf[8];
    6163             :     int16_t yMvHalf[8];
    6164             : 
    6165           0 :     int16_t x_mv = _MVXT(*pBestMV);
    6166           0 :     int16_t y_mv = _MVYT(*pBestMV);
    6167           0 :     int16_t xSearchIndex = (x_mv >> 2) - x_search_area_origin;
    6168           0 :     int16_t ySearchIndex = (y_mv >> 2) - y_search_area_origin;
    6169             : 
    6170             :     (void)sequence_control_set_ptr;
    6171             :     (void)encode_context_ptr;
    6172             : 
    6173             :     // TODO : remove these, and update the MV by just shifts
    6174             : 
    6175           0 :     xMvHalf[0] = x_mv - 2;  // L  position
    6176           0 :     xMvHalf[1] = x_mv + 2;  // R  position
    6177           0 :     xMvHalf[2] = x_mv;      // T  position
    6178           0 :     xMvHalf[3] = x_mv;      // B  position
    6179           0 :     xMvHalf[4] = x_mv - 2;  // TL position
    6180           0 :     xMvHalf[5] = x_mv + 2;  // TR position
    6181           0 :     xMvHalf[6] = x_mv + 2;  // BR position
    6182           0 :     xMvHalf[7] = x_mv - 2;  // BL position
    6183             : 
    6184           0 :     yMvHalf[0] = y_mv;      // L  position
    6185           0 :     yMvHalf[1] = y_mv;      // R  position
    6186           0 :     yMvHalf[2] = y_mv - 2;  // T  position
    6187           0 :     yMvHalf[3] = y_mv + 2;  // B  position
    6188           0 :     yMvHalf[4] = y_mv - 2;  // TL position
    6189           0 :     yMvHalf[5] = y_mv - 2;  // TR position
    6190           0 :     yMvHalf[6] = y_mv + 2;  // BR position
    6191           0 :     yMvHalf[7] = y_mv + 2;  // BL position
    6192             : 
    6193             :     // Compute SSD for the best full search candidate
    6194           0 :     if (context_ptr->fractional_search_method == SSD_SEARCH) {
    6195           0 :         *pBestSsd = (uint32_t)spatial_full_distortion_kernel(
    6196             :                 context_ptr->sb_src_ptr,
    6197             :                 puLcuBufferIndex,
    6198             :                 context_ptr->sb_src_stride,
    6199             :                 refBuffer,
    6200           0 :                 ySearchIndex * ref_stride + xSearchIndex,
    6201             :                 ref_stride,
    6202             :                 pu_width,
    6203             :                 pu_height);
    6204             :     }
    6205             :     // Use SATD only when QP mod, and RC are OFF
    6206             :     // QP mod, and RC assume that ME distotion is always SAD.
    6207             :     // This problem might be solved by computing SAD for the best position after
    6208             :     // fractional search is done, or by considring the full pel resolution SAD.
    6209             :     {
    6210             :         // L position
    6211           0 :         searchRegionIndex =
    6212           0 :             xSearchIndex +
    6213           0 :             (int16_t)context_ptr->interpolated_stride * ySearchIndex;
    6214           0 :         distortionLeftPosition =
    6215           0 :             (context_ptr->fractional_search_method == SSD_SEARCH)
    6216           0 :                 ? spatial_full_distortion_kernel(
    6217             :                           context_ptr->sb_src_ptr,
    6218             :                           puLcuBufferIndex,
    6219             :                           context_ptr->sb_src_stride,
    6220             :                           pos_b_buffer,
    6221             :                           searchRegionIndex,
    6222             :                           context_ptr->interpolated_stride,
    6223             :                           pu_width,
    6224             :                           pu_height)
    6225           0 :                 : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    6226           0 :                       ? (nxm_sad_kernel(
    6227           0 :                             &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6228           0 :                             context_ptr->sb_src_stride << 1,
    6229           0 :                             &(pos_b_buffer[searchRegionIndex]),
    6230           0 :                             context_ptr->interpolated_stride << 1,
    6231             :                             pu_height >> 1,
    6232             :                             pu_width))
    6233           0 :                             << 1
    6234           0 :                       : nxm_sad_kernel(
    6235           0 :                             &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6236             :                             context_ptr->sb_src_stride,
    6237           0 :                             &(pos_b_buffer[searchRegionIndex]),
    6238             :                             context_ptr->interpolated_stride,
    6239             :                             pu_height,
    6240             :                             pu_width);
    6241           0 :         if (context_ptr->fractional_search_method == SSD_SEARCH) {
    6242           0 :             if (distortionLeftPosition < *pBestSsd) {
    6243           0 :                 *pBestSad = (uint32_t)
    6244           0 :                     nxm_sad_kernel(
    6245           0 :                         &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6246             :                         context_ptr->sb_src_stride,
    6247           0 :                         &(pos_b_buffer[searchRegionIndex]),
    6248             :                         context_ptr->interpolated_stride,
    6249             :                         pu_height,
    6250             :                         pu_width);
    6251           0 :                 *pBestMV =
    6252           0 :                     ((uint16_t)yMvHalf[0] << 16) | ((uint16_t)xMvHalf[0]);
    6253           0 :                 *pBestSsd = (uint32_t)distortionLeftPosition;
    6254             :             }
    6255             :         } else {
    6256           0 :             if (distortionLeftPosition < *pBestSad) {
    6257           0 :                 *pBestSad = (uint32_t)distortionLeftPosition;
    6258           0 :                 *pBestMV =
    6259           0 :                     ((uint16_t)yMvHalf[0] << 16) | ((uint16_t)xMvHalf[0]);
    6260             :             }
    6261             :         }
    6262             :         // R position
    6263           0 :         searchRegionIndex++;
    6264           0 :         distortionRightPosition =
    6265           0 :             (context_ptr->fractional_search_method == SSD_SEARCH)
    6266           0 :                 ? spatial_full_distortion_kernel(
    6267             :                           context_ptr->sb_src_ptr,
    6268             :                           puLcuBufferIndex,
    6269             :                           context_ptr->sb_src_stride,
    6270             :                           pos_b_buffer,
    6271             :                           searchRegionIndex,
    6272             :                           context_ptr->interpolated_stride,
    6273             :                           pu_width,
    6274             :                           pu_height)
    6275           0 :                 : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    6276           0 :                       ? (nxm_sad_kernel(
    6277           0 :                             &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6278           0 :                             context_ptr->sb_src_stride << 1,
    6279           0 :                             &(pos_b_buffer[searchRegionIndex]),
    6280           0 :                             context_ptr->interpolated_stride << 1,
    6281             :                             pu_height >> 1,
    6282             :                             pu_width))
    6283           0 :                             << 1
    6284           0 :                       : nxm_sad_kernel(
    6285           0 :                             &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6286             :                             context_ptr->sb_src_stride,
    6287           0 :                             &(pos_b_buffer[searchRegionIndex]),
    6288             :                             context_ptr->interpolated_stride,
    6289             :                             pu_height,
    6290             :                             pu_width);
    6291           0 :         if (context_ptr->fractional_search_method == SSD_SEARCH) {
    6292           0 :             if (distortionRightPosition < *pBestSsd) {
    6293           0 :                 *pBestSad = (uint32_t)
    6294           0 :                     nxm_sad_kernel(
    6295           0 :                         &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6296             :                         context_ptr->sb_src_stride,
    6297           0 :                         &(pos_b_buffer[searchRegionIndex]),
    6298             :                         context_ptr->interpolated_stride,
    6299             :                         pu_height,
    6300             :                         pu_width);
    6301           0 :                 *pBestMV =
    6302           0 :                     ((uint16_t)yMvHalf[1] << 16) | ((uint16_t)xMvHalf[1]);
    6303           0 :                 *pBestSsd = (uint32_t)distortionRightPosition;
    6304             :             }
    6305             :         } else {
    6306           0 :             if (distortionRightPosition < *pBestSad) {
    6307           0 :                 *pBestSad = (uint32_t)distortionRightPosition;
    6308           0 :                 *pBestMV =
    6309           0 :                     ((uint16_t)yMvHalf[1] << 16) | ((uint16_t)xMvHalf[1]);
    6310             :             }
    6311             :         }
    6312             :         // T position
    6313           0 :         searchRegionIndex =
    6314           0 :             xSearchIndex +
    6315           0 :             (int16_t)context_ptr->interpolated_stride * ySearchIndex;
    6316           0 :         distortionTopPosition =
    6317           0 :             (context_ptr->fractional_search_method == SSD_SEARCH)
    6318           0 :                 ? spatial_full_distortion_kernel(
    6319             :                           context_ptr->sb_src_ptr,
    6320             :                           puLcuBufferIndex,
    6321             :                           context_ptr->sb_src_stride,
    6322             :                           pos_h_buffer,
    6323             :                           searchRegionIndex,
    6324             :                           context_ptr->interpolated_stride,
    6325             :                           pu_width,
    6326             :                           pu_height)
    6327           0 :                 : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    6328           0 :                       ? (nxm_sad_kernel(
    6329           0 :                             &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6330           0 :                             context_ptr->sb_src_stride << 1,
    6331           0 :                             &(pos_h_buffer[searchRegionIndex]),
    6332           0 :                             context_ptr->interpolated_stride << 1,
    6333             :                             pu_height >> 1,
    6334             :                             pu_width))
    6335           0 :                             << 1
    6336           0 :                       : nxm_sad_kernel(
    6337           0 :                             &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6338             :                             context_ptr->sb_src_stride,
    6339           0 :                             &(pos_h_buffer[searchRegionIndex]),
    6340             :                             context_ptr->interpolated_stride,
    6341             :                             pu_height,
    6342             :                             pu_width);
    6343           0 :         if (context_ptr->fractional_search_method == SSD_SEARCH) {
    6344           0 :             if (distortionTopPosition < *pBestSsd) {
    6345           0 :                 *pBestSad = (uint32_t)
    6346           0 :                     nxm_sad_kernel(
    6347           0 :                         &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6348             :                         context_ptr->sb_src_stride,
    6349           0 :                         &(pos_h_buffer[searchRegionIndex]),
    6350             :                         context_ptr->interpolated_stride,
    6351             :                         pu_height,
    6352             :                         pu_width);
    6353           0 :                 *pBestMV =
    6354           0 :                     ((uint16_t)yMvHalf[2] << 16) | ((uint16_t)xMvHalf[2]);
    6355           0 :                 *pBestSsd = (uint32_t)distortionTopPosition;
    6356             :             }
    6357             :         } else {
    6358           0 :             if (distortionTopPosition < *pBestSad) {
    6359           0 :                 *pBestSad = (uint32_t)distortionTopPosition;
    6360           0 :                 *pBestMV =
    6361           0 :                     ((uint16_t)yMvHalf[2] << 16) | ((uint16_t)xMvHalf[2]);
    6362             :             }
    6363             :         }
    6364             : 
    6365             :         // B position
    6366           0 :         searchRegionIndex += (int16_t)context_ptr->interpolated_stride;
    6367           0 :         distortionBottomPosition =
    6368           0 :             (context_ptr->fractional_search_method == SSD_SEARCH)
    6369           0 :                 ? spatial_full_distortion_kernel(
    6370             :                           context_ptr->sb_src_ptr,
    6371             :                           puLcuBufferIndex,
    6372             :                           context_ptr->sb_src_stride,
    6373             :                           pos_h_buffer,
    6374             :                           searchRegionIndex,
    6375             :                           context_ptr->interpolated_stride,
    6376             :                           pu_width,
    6377             :                           pu_height)
    6378           0 :                 : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    6379           0 :                       ? (nxm_sad_kernel(
    6380           0 :                             &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6381           0 :                             context_ptr->sb_src_stride << 1,
    6382           0 :                             &(pos_h_buffer[searchRegionIndex]),
    6383           0 :                             context_ptr->interpolated_stride << 1,
    6384             :                             pu_height >> 1,
    6385             :                             pu_width))
    6386           0 :                             << 1
    6387           0 :                       : nxm_sad_kernel(
    6388           0 :                             &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6389             :                             context_ptr->sb_src_stride,
    6390           0 :                             &(pos_h_buffer[searchRegionIndex]),
    6391             :                             context_ptr->interpolated_stride,
    6392             :                             pu_height,
    6393             :                             pu_width);
    6394           0 :         if (context_ptr->fractional_search_method == SSD_SEARCH) {
    6395           0 :             if (distortionBottomPosition < *pBestSsd) {
    6396           0 :                 *pBestSad = (uint32_t)
    6397           0 :                     nxm_sad_kernel(
    6398           0 :                         &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6399             :                         context_ptr->sb_src_stride,
    6400           0 :                         &(pos_h_buffer[searchRegionIndex]),
    6401             :                         context_ptr->interpolated_stride,
    6402             :                         pu_height,
    6403             :                         pu_width);
    6404           0 :                 *pBestMV =
    6405           0 :                     ((uint16_t)yMvHalf[3] << 16) | ((uint16_t)xMvHalf[3]);
    6406           0 :                 *pBestSsd = (uint32_t)distortionBottomPosition;
    6407             :             }
    6408             :         } else {
    6409           0 :             if (distortionBottomPosition < *pBestSad) {
    6410           0 :                 *pBestSad = (uint32_t)distortionBottomPosition;
    6411           0 :                 *pBestMV =
    6412           0 :                     ((uint16_t)yMvHalf[3] << 16) | ((uint16_t)xMvHalf[3]);
    6413             :             }
    6414             :         }
    6415             : 
    6416             :         // TL position
    6417           0 :         searchRegionIndex =
    6418           0 :             xSearchIndex +
    6419           0 :             (int16_t)context_ptr->interpolated_stride * ySearchIndex;
    6420           0 :         distortionTopLeftPosition =
    6421           0 :             (context_ptr->fractional_search_method == SSD_SEARCH)
    6422           0 :                 ? spatial_full_distortion_kernel(
    6423             :                           context_ptr->sb_src_ptr,
    6424             :                           puLcuBufferIndex,
    6425             :                           context_ptr->sb_src_stride,
    6426             :                           pos_j_buffer,
    6427             :                           searchRegionIndex,
    6428             :                           context_ptr->interpolated_stride,
    6429             :                           pu_width,
    6430             :                           pu_height)
    6431           0 :                 : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    6432           0 :                       ? (nxm_sad_kernel(
    6433           0 :                             &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6434           0 :                             context_ptr->sb_src_stride << 1,
    6435           0 :                             &(pos_j_buffer[searchRegionIndex]),
    6436           0 :                             context_ptr->interpolated_stride << 1,
    6437             :                             pu_height >> 1,
    6438             :                             pu_width))
    6439           0 :                             << 1
    6440           0 :                       : nxm_sad_kernel(
    6441           0 :                             &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6442             :                             context_ptr->sb_src_stride,
    6443           0 :                             &(pos_j_buffer[searchRegionIndex]),
    6444             :                             context_ptr->interpolated_stride,
    6445             :                             pu_height,
    6446             :                             pu_width);
    6447           0 :         if (context_ptr->fractional_search_method == SSD_SEARCH) {
    6448           0 :             if (distortionTopLeftPosition < *pBestSsd) {
    6449           0 :                 *pBestSad = (uint32_t)
    6450           0 :                     nxm_sad_kernel(
    6451           0 :                         &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6452             :                         context_ptr->sb_src_stride,
    6453           0 :                         &(pos_j_buffer[searchRegionIndex]),
    6454             :                         context_ptr->interpolated_stride,
    6455             :                         pu_height,
    6456             :                         pu_width);
    6457           0 :                 *pBestMV =
    6458           0 :                     ((uint16_t)yMvHalf[4] << 16) | ((uint16_t)xMvHalf[4]);
    6459           0 :                 *pBestSsd = (uint32_t)distortionTopLeftPosition;
    6460             :             }
    6461             :         } else {
    6462           0 :             if (distortionTopLeftPosition < *pBestSad) {
    6463           0 :                 *pBestSad = (uint32_t)distortionTopLeftPosition;
    6464           0 :                 *pBestMV =
    6465           0 :                     ((uint16_t)yMvHalf[4] << 16) | ((uint16_t)xMvHalf[4]);
    6466             :             }
    6467             :         }
    6468             : 
    6469             :         // TR position
    6470           0 :         searchRegionIndex++;
    6471           0 :         distortionTopRightPosition =
    6472           0 :             (context_ptr->fractional_search_method == SSD_SEARCH)
    6473           0 :                 ? spatial_full_distortion_kernel(
    6474             :                           context_ptr->sb_src_ptr,
    6475             :                           puLcuBufferIndex,
    6476             :                           context_ptr->sb_src_stride,
    6477             :                           pos_j_buffer,
    6478             :                           searchRegionIndex,
    6479             :                           context_ptr->interpolated_stride,
    6480             :                           pu_width,
    6481             :                           pu_height)
    6482           0 :                 : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    6483           0 :                       ? (nxm_sad_kernel(
    6484           0 :                             &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6485           0 :                             context_ptr->sb_src_stride << 1,
    6486           0 :                             &(pos_j_buffer[searchRegionIndex]),
    6487           0 :                             context_ptr->interpolated_stride << 1,
    6488             :                             pu_height >> 1,
    6489             :                             pu_width))
    6490           0 :                             << 1
    6491           0 :                       : nxm_sad_kernel(
    6492           0 :                             &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6493             :                             context_ptr->sb_src_stride,
    6494           0 :                             &(pos_j_buffer[searchRegionIndex]),
    6495             :                             context_ptr->interpolated_stride,
    6496             :                             pu_height,
    6497             :                             pu_width);
    6498             : 
    6499           0 :         if (context_ptr->fractional_search_method == SSD_SEARCH) {
    6500           0 :             if (distortionTopRightPosition < *pBestSsd) {
    6501           0 :                 *pBestSad = (uint32_t)
    6502           0 :                     nxm_sad_kernel(
    6503           0 :                         &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6504             :                         context_ptr->sb_src_stride,
    6505           0 :                         &(pos_j_buffer[searchRegionIndex]),
    6506             :                         context_ptr->interpolated_stride,
    6507             :                         pu_height,
    6508             :                         pu_width);
    6509           0 :                 *pBestMV =
    6510           0 :                     ((uint16_t)yMvHalf[5] << 16) | ((uint16_t)xMvHalf[5]);
    6511           0 :                 *pBestSsd = (uint32_t)distortionTopRightPosition;
    6512             :             }
    6513             :         } else {
    6514           0 :             if (distortionTopRightPosition < *pBestSad) {
    6515           0 :                 *pBestSad = (uint32_t)distortionTopRightPosition;
    6516           0 :                 *pBestMV =
    6517           0 :                     ((uint16_t)yMvHalf[5] << 16) | ((uint16_t)xMvHalf[5]);
    6518             :             }
    6519             :         }
    6520             : 
    6521             :         // BR position
    6522           0 :         searchRegionIndex += (int16_t)context_ptr->interpolated_stride;
    6523           0 :         distortionBottomRightPosition =
    6524           0 :             (context_ptr->fractional_search_method == SSD_SEARCH)
    6525           0 :                 ? spatial_full_distortion_kernel(
    6526             :                           context_ptr->sb_src_ptr,
    6527             :                           puLcuBufferIndex,
    6528             :                           context_ptr->sb_src_stride,
    6529             :                           pos_j_buffer,
    6530             :                           searchRegionIndex,
    6531             :                           context_ptr->interpolated_stride,
    6532             :                           pu_width,
    6533             :                           pu_height)
    6534           0 :                 : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    6535           0 :                       ? (nxm_sad_kernel(
    6536           0 :                             &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6537           0 :                             context_ptr->sb_src_stride << 1,
    6538           0 :                             &(pos_j_buffer[searchRegionIndex]),
    6539           0 :                             context_ptr->interpolated_stride << 1,
    6540             :                             pu_height >> 1,
    6541             :                             pu_width))
    6542           0 :                             << 1
    6543           0 :                       : nxm_sad_kernel(
    6544           0 :                             &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6545             :                             context_ptr->sb_src_stride,
    6546           0 :                             &(pos_j_buffer[searchRegionIndex]),
    6547             :                             context_ptr->interpolated_stride,
    6548             :                             pu_height,
    6549             :                             pu_width);
    6550           0 :         if (context_ptr->fractional_search_method == SSD_SEARCH) {
    6551           0 :             if (distortionBottomRightPosition < *pBestSsd) {
    6552           0 :                 *pBestSad = (uint32_t)
    6553           0 :                     nxm_sad_kernel(
    6554           0 :                         &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6555             :                         context_ptr->sb_src_stride,
    6556           0 :                         &(pos_j_buffer[searchRegionIndex]),
    6557             :                         context_ptr->interpolated_stride,
    6558             :                         pu_height,
    6559             :                         pu_width);
    6560           0 :                 *pBestMV =
    6561           0 :                     ((uint16_t)yMvHalf[6] << 16) | ((uint16_t)xMvHalf[6]);
    6562           0 :                 *pBestSsd = (uint32_t)distortionBottomRightPosition;
    6563             :             }
    6564             :         } else {
    6565           0 :             if (distortionBottomRightPosition < *pBestSad) {
    6566           0 :                 *pBestSad = (uint32_t)distortionBottomRightPosition;
    6567           0 :                 *pBestMV =
    6568           0 :                     ((uint16_t)yMvHalf[6] << 16) | ((uint16_t)xMvHalf[6]);
    6569             :             }
    6570             :         }
    6571             : 
    6572             :         // BL position
    6573           0 :         searchRegionIndex--;
    6574           0 :         distortionBottomLeftPosition =
    6575           0 :             (context_ptr->fractional_search_method == SSD_SEARCH)
    6576           0 :                 ? spatial_full_distortion_kernel(
    6577             :                           context_ptr->sb_src_ptr,
    6578             :                           puLcuBufferIndex,
    6579             :                           context_ptr->sb_src_stride,
    6580             :                           pos_j_buffer,
    6581             :                           searchRegionIndex,
    6582             :                           context_ptr->interpolated_stride,
    6583             :                           pu_width,
    6584             :                           pu_height)
    6585           0 :                 : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    6586           0 :                       ? (nxm_sad_kernel(
    6587           0 :                             &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6588           0 :                             context_ptr->sb_src_stride << 1,
    6589           0 :                             &(pos_j_buffer[searchRegionIndex]),
    6590           0 :                             context_ptr->interpolated_stride << 1,
    6591             :                             pu_height >> 1,
    6592             :                             pu_width))
    6593           0 :                             << 1
    6594           0 :                       : (nxm_sad_kernel(
    6595           0 :                             &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6596             :                             context_ptr->sb_src_stride,
    6597           0 :                             &(pos_j_buffer[searchRegionIndex]),
    6598             :                             context_ptr->interpolated_stride,
    6599             :                             pu_height,
    6600             :                             pu_width));
    6601             : 
    6602           0 :         if (context_ptr->fractional_search_method == SSD_SEARCH) {
    6603           0 :             if (distortionBottomLeftPosition < *pBestSsd) {
    6604           0 :                 *pBestSad = (uint32_t)(
    6605           0 :                     nxm_sad_kernel(
    6606           0 :                         &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
    6607             :                         context_ptr->sb_src_stride,
    6608           0 :                         &(pos_j_buffer[searchRegionIndex]),
    6609             :                         context_ptr->interpolated_stride,
    6610             :                         pu_height,
    6611             :                         pu_width));
    6612           0 :                 *pBestMV =
    6613           0 :                     ((uint16_t)yMvHalf[7] << 16) | ((uint16_t)xMvHalf[7]);
    6614           0 :                 *pBestSsd = (uint32_t)distortionBottomLeftPosition;
    6615             :             }
    6616             :         } else {
    6617           0 :             if (distortionBottomLeftPosition < *pBestSad) {
    6618           0 :                 *pBestSad = (uint32_t)distortionBottomLeftPosition;
    6619           0 :                 *pBestMV =
    6620           0 :                     ((uint16_t)yMvHalf[7] << 16) | ((uint16_t)xMvHalf[7]);
    6621             :             }
    6622             :         }
    6623             :     }
    6624             : 
    6625           0 :     bestHalfSad =
    6626           0 :         MIN(distortionLeftPosition,
    6627             :             MIN(distortionRightPosition,
    6628             :                 MIN(distortionTopPosition,
    6629             :                     MIN(distortionBottomPosition,
    6630             :                         MIN(distortionTopLeftPosition,
    6631             :                             MIN(distortionTopRightPosition,
    6632             :                                 MIN(distortionBottomLeftPosition,
    6633             :                                     distortionBottomRightPosition)))))));
    6634             : 
    6635           0 :     if (bestHalfSad == distortionLeftPosition)
    6636           0 :         *psubPelDirection = LEFT_POSITION;
    6637           0 :     else if (bestHalfSad == distortionRightPosition)
    6638           0 :         *psubPelDirection = RIGHT_POSITION;
    6639           0 :     else if (bestHalfSad == distortionTopPosition)
    6640           0 :         *psubPelDirection = TOP_POSITION;
    6641           0 :     else if (bestHalfSad == distortionBottomPosition)
    6642           0 :         *psubPelDirection = BOTTOM_POSITION;
    6643           0 :     else if (bestHalfSad == distortionTopLeftPosition)
    6644           0 :         *psubPelDirection = TOP_LEFT_POSITION;
    6645           0 :     else if (bestHalfSad == distortionTopRightPosition)
    6646           0 :         *psubPelDirection = TOP_RIGHT_POSITION;
    6647           0 :     else if (bestHalfSad == distortionBottomLeftPosition)
    6648           0 :         *psubPelDirection = BOTTOM_LEFT_POSITION;
    6649           0 :     else if (bestHalfSad == distortionBottomRightPosition)
    6650           0 :         *psubPelDirection = BOTTOM_RIGHT_POSITION;
    6651           0 :     return;
    6652             : }
    6653             : 
    6654             : /*******************************************
    6655             :  * HalfPelSearch_LCU
    6656             :  *   performs Half Pel refinement for the 85 PUs
    6657             :  *******************************************/
    6658           0 : void HalfPelSearch_LCU(
    6659             :     SequenceControlSet
    6660             :         *sequence_control_set_ptr,  // input parameter, Sequence control set Ptr
    6661             :     PictureParentControlSet *picture_control_set_ptr,
    6662             :     MeContext *context_ptr,  // input/output parameter, ME context Ptr, used to
    6663             :                              // get/update ME results
    6664             :     uint8_t *refBuffer, uint32_t ref_stride,
    6665             :     uint8_t *pos_b_buffer,  // input parameter, position "b" interpolated search
    6666             :                             // area Ptr
    6667             :     uint8_t *pos_h_buffer,  // input parameter, position "h" interpolated search
    6668             :                             // area Ptr
    6669             :     uint8_t *pos_j_buffer,  // input parameter, position "j" interpolated search
    6670             :                             // area Ptr
    6671             :     int16_t x_search_area_origin,  // input parameter, search area origin in the
    6672             :                                    // horizontal direction, used to point to
    6673             :                                    // reference samples
    6674             :     int16_t y_search_area_origin,  // input parameter, search area origin in the
    6675             :                                    // vertical direction, used to point to
    6676             :                                    // reference samples
    6677             :     EbBool disable8x8CuInMeFlag, EbBool enableHalfPel32x32,
    6678             :     EbBool enableHalfPel16x16, EbBool enableHalfPel8x8)
    6679             : {
    6680             :     uint32_t idx;
    6681             :     uint32_t pu_index;
    6682             :     uint32_t puShiftXIndex;
    6683             :     uint32_t puShiftYIndex;
    6684             :     uint32_t puLcuBufferIndex;
    6685             :     uint32_t posbBufferIndex;
    6686             :     uint32_t poshBufferIndex;
    6687             :     uint32_t posjBufferIndex;
    6688             : 
    6689           0 :     if (context_ptr->fractional_search64x64)
    6690           0 :         PU_HalfPelRefinement(sequence_control_set_ptr,
    6691             :                              context_ptr,
    6692             :                              &(refBuffer[0]),
    6693             :                              ref_stride,
    6694             :                              context_ptr->p_best_ssd64x64,
    6695             :                              0,
    6696             :                              &(pos_b_buffer[0]),
    6697             :                              &(pos_h_buffer[0]),
    6698             :                              &(pos_j_buffer[0]),
    6699             :                              64,
    6700             :                              64,
    6701             :                              x_search_area_origin,
    6702             :                              y_search_area_origin,
    6703             :                              context_ptr->p_best_sad64x64,
    6704             :                              context_ptr->p_best_mv64x64,
    6705             :                              &context_ptr->psub_pel_direction64x64);
    6706             : 
    6707           0 :     if (enableHalfPel32x32) {
    6708             :         // 32x32 [4 partitions]
    6709           0 :         for (pu_index = 0; pu_index < 4; ++pu_index) {
    6710           0 :             puShiftXIndex = (pu_index & 0x01) << 5;
    6711           0 :             puShiftYIndex = (pu_index >> 1) << 5;
    6712             : 
    6713           0 :             puLcuBufferIndex =
    6714           0 :                 puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
    6715           0 :             posbBufferIndex = puShiftXIndex +
    6716           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6717           0 :             poshBufferIndex = puShiftXIndex +
    6718           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6719           0 :             posjBufferIndex = puShiftXIndex +
    6720           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6721             : 
    6722           0 :             PU_HalfPelRefinement(
    6723             :                 sequence_control_set_ptr,
    6724             :                 context_ptr,
    6725           0 :                 &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
    6726             :                 ref_stride,
    6727           0 :                 &context_ptr->p_best_ssd32x32[pu_index],
    6728             :                 puLcuBufferIndex,
    6729             :                 &(pos_b_buffer[posbBufferIndex]),
    6730             :                 &(pos_h_buffer[poshBufferIndex]),
    6731             :                 &(pos_j_buffer[posjBufferIndex]),
    6732             :                 32,
    6733             :                 32,
    6734             :                 x_search_area_origin,
    6735             :                 y_search_area_origin,
    6736           0 :                 &context_ptr->p_best_sad32x32[pu_index],
    6737           0 :                 &context_ptr->p_best_mv32x32[pu_index],
    6738             :                 &context_ptr->psub_pel_direction32x32[pu_index]);
    6739             :         }
    6740             :     }
    6741           0 :     if (enableHalfPel16x16) {
    6742             :         // 16x16 [16 partitions]
    6743           0 :         for (pu_index = 0; pu_index < 16; ++pu_index) {
    6744           0 :             idx = tab16x16[pu_index];
    6745             : 
    6746           0 :             puShiftXIndex = (pu_index & 0x03) << 4;
    6747           0 :             puShiftYIndex = (pu_index >> 2) << 4;
    6748             : 
    6749           0 :             puLcuBufferIndex =
    6750           0 :                 puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
    6751           0 :             posbBufferIndex = puShiftXIndex +
    6752           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6753           0 :             poshBufferIndex = puShiftXIndex +
    6754           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6755           0 :             posjBufferIndex = puShiftXIndex +
    6756           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6757             : 
    6758           0 :             PU_HalfPelRefinement(
    6759             :                 sequence_control_set_ptr,
    6760             :                 context_ptr,
    6761           0 :                 &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
    6762             :                 ref_stride,
    6763           0 :                 &context_ptr->p_best_ssd16x16[idx],
    6764             :                 puLcuBufferIndex,
    6765             :                 &(pos_b_buffer[posbBufferIndex]),
    6766             :                 &(pos_h_buffer[poshBufferIndex]),
    6767             :                 &(pos_j_buffer[posjBufferIndex]),
    6768             :                 16,
    6769             :                 16,
    6770             :                 x_search_area_origin,
    6771             :                 y_search_area_origin,
    6772           0 :                 &context_ptr->p_best_sad16x16[idx],
    6773           0 :                 &context_ptr->p_best_mv16x16[idx],
    6774             :                 &context_ptr->psub_pel_direction16x16[idx]);
    6775             :         }
    6776             :     }
    6777           0 :     if (enableHalfPel8x8) {
    6778             :         // 8x8   [64 partitions]
    6779           0 :         if (!disable8x8CuInMeFlag) {
    6780           0 :             for (pu_index = 0; pu_index < 64; ++pu_index) {
    6781           0 :                 idx = tab8x8[pu_index];  // TODO bitwise this
    6782             : 
    6783           0 :                 puShiftXIndex = (pu_index & 0x07) << 3;
    6784           0 :                 puShiftYIndex = (pu_index >> 3) << 3;
    6785             : 
    6786           0 :                 puLcuBufferIndex =
    6787           0 :                     puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
    6788             : 
    6789           0 :                 posbBufferIndex =
    6790             :                     puShiftXIndex +
    6791           0 :                     puShiftYIndex * context_ptr->interpolated_stride;
    6792           0 :                 poshBufferIndex =
    6793             :                     puShiftXIndex +
    6794           0 :                     puShiftYIndex * context_ptr->interpolated_stride;
    6795           0 :                 posjBufferIndex =
    6796             :                     puShiftXIndex +
    6797           0 :                     puShiftYIndex * context_ptr->interpolated_stride;
    6798             : 
    6799           0 :                 PU_HalfPelRefinement(
    6800             :                     sequence_control_set_ptr,
    6801             :                     context_ptr,
    6802           0 :                     &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
    6803             :                     ref_stride,
    6804           0 :                     &context_ptr->p_best_ssd8x8[idx],
    6805             :                     puLcuBufferIndex,
    6806             :                     &(pos_b_buffer[posbBufferIndex]),
    6807             :                     &(pos_h_buffer[poshBufferIndex]),
    6808             :                     &(pos_j_buffer[posjBufferIndex]),
    6809             :                     8,
    6810             :                     8,
    6811             :                     x_search_area_origin,
    6812             :                     y_search_area_origin,
    6813           0 :                     &context_ptr->p_best_sad8x8[idx],
    6814           0 :                     &context_ptr->p_best_mv8x8[idx],
    6815             :                     &context_ptr->psub_pel_direction8x8[idx]);
    6816             :             }
    6817             :         }
    6818             :     }
    6819           0 :     if (picture_control_set_ptr->pic_depth_mode <= PIC_ALL_C_DEPTH_MODE) {
    6820             :         // 64x32
    6821           0 :         for (pu_index = 0; pu_index < 2; ++pu_index) {
    6822           0 :             puShiftXIndex = 0;
    6823           0 :             puShiftYIndex = pu_index << 5;
    6824             : 
    6825           0 :             puLcuBufferIndex =
    6826           0 :                 puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
    6827             : 
    6828           0 :             posbBufferIndex = puShiftXIndex +
    6829           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6830           0 :             poshBufferIndex = puShiftXIndex +
    6831           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6832           0 :             posjBufferIndex = puShiftXIndex +
    6833           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6834             : 
    6835           0 :             PU_HalfPelRefinement(
    6836             :                 sequence_control_set_ptr,
    6837             :                 context_ptr,
    6838           0 :                 &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
    6839             :                 ref_stride,
    6840           0 :                 &context_ptr->p_best_ssd64x32[pu_index],
    6841             :                 puLcuBufferIndex,
    6842             :                 &(pos_b_buffer[posbBufferIndex]),
    6843             :                 &(pos_h_buffer[poshBufferIndex]),
    6844             :                 &(pos_j_buffer[posjBufferIndex]),
    6845             :                 64,
    6846             :                 32,
    6847             :                 x_search_area_origin,
    6848             :                 y_search_area_origin,
    6849           0 :                 &context_ptr->p_best_sad64x32[pu_index],
    6850           0 :                 &context_ptr->p_best_mv64x32[pu_index],
    6851             :                 &context_ptr->psub_pel_direction64x32[pu_index]);
    6852             :         }
    6853             : 
    6854             :         // 32x16
    6855           0 :         for (pu_index = 0; pu_index < 8; ++pu_index) {
    6856           0 :             idx = tab32x16[pu_index];  // TODO bitwise this
    6857             : 
    6858           0 :             puShiftXIndex = (pu_index & 0x01) << 5;
    6859           0 :             puShiftYIndex = (pu_index >> 1) << 4;
    6860             : 
    6861           0 :             puLcuBufferIndex =
    6862           0 :                 puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
    6863             : 
    6864           0 :             posbBufferIndex = puShiftXIndex +
    6865           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6866           0 :             poshBufferIndex = puShiftXIndex +
    6867           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6868           0 :             posjBufferIndex = puShiftXIndex +
    6869           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6870             : 
    6871           0 :             PU_HalfPelRefinement(
    6872             :                 sequence_control_set_ptr,
    6873             :                 context_ptr,
    6874           0 :                 &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
    6875             :                 ref_stride,
    6876           0 :                 &context_ptr->p_best_ssd32x16[idx],
    6877             :                 puLcuBufferIndex,
    6878             :                 &(pos_b_buffer[posbBufferIndex]),
    6879             :                 &(pos_h_buffer[poshBufferIndex]),
    6880             :                 &(pos_j_buffer[posjBufferIndex]),
    6881             :                 32,
    6882             :                 16,
    6883             :                 x_search_area_origin,
    6884             :                 y_search_area_origin,
    6885           0 :                 &context_ptr->p_best_sad32x16[idx],
    6886           0 :                 &context_ptr->p_best_mv32x16[idx],
    6887             :                 &context_ptr->psub_pel_direction32x16[idx]);
    6888             :         }
    6889             : 
    6890             :         // 16x8
    6891           0 :         for (pu_index = 0; pu_index < 32; ++pu_index) {
    6892           0 :             idx = tab16x8[pu_index];
    6893             : 
    6894           0 :             puShiftXIndex = (pu_index & 0x03) << 4;
    6895           0 :             puShiftYIndex = (pu_index >> 2) << 3;
    6896             : 
    6897           0 :             puLcuBufferIndex =
    6898           0 :                 puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
    6899             : 
    6900           0 :             posbBufferIndex = puShiftXIndex +
    6901           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6902           0 :             poshBufferIndex = puShiftXIndex +
    6903           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6904           0 :             posjBufferIndex = puShiftXIndex +
    6905           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6906             : 
    6907           0 :             PU_HalfPelRefinement(
    6908             :                 sequence_control_set_ptr,
    6909             :                 context_ptr,
    6910           0 :                 &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
    6911             :                 ref_stride,
    6912           0 :                 &context_ptr->p_best_ssd16x8[idx],
    6913             :                 puLcuBufferIndex,
    6914             :                 &(pos_b_buffer[posbBufferIndex]),
    6915             :                 &(pos_h_buffer[poshBufferIndex]),
    6916             :                 &(pos_j_buffer[posjBufferIndex]),
    6917             :                 16,
    6918             :                 8,
    6919             :                 x_search_area_origin,
    6920             :                 y_search_area_origin,
    6921           0 :                 &context_ptr->p_best_sad16x8[idx],
    6922           0 :                 &context_ptr->p_best_mv16x8[idx],
    6923             :                 &context_ptr->psub_pel_direction16x8[idx]);
    6924             :         }
    6925             : 
    6926             :         // 32x64
    6927           0 :         for (pu_index = 0; pu_index < 2; ++pu_index) {
    6928           0 :             puShiftXIndex = pu_index << 5;
    6929           0 :             puShiftYIndex = 0;
    6930             : 
    6931           0 :             puLcuBufferIndex =
    6932           0 :                 puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
    6933             : 
    6934           0 :             posbBufferIndex = puShiftXIndex +
    6935           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6936           0 :             poshBufferIndex = puShiftXIndex +
    6937           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6938           0 :             posjBufferIndex = puShiftXIndex +
    6939           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6940             : 
    6941           0 :             PU_HalfPelRefinement(
    6942             :                 sequence_control_set_ptr,
    6943             :                 context_ptr,
    6944           0 :                 &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
    6945             :                 ref_stride,
    6946           0 :                 &context_ptr->p_best_ssd32x64[pu_index],
    6947             :                 puLcuBufferIndex,
    6948             :                 &(pos_b_buffer[posbBufferIndex]),
    6949             :                 &(pos_h_buffer[poshBufferIndex]),
    6950             :                 &(pos_j_buffer[posjBufferIndex]),
    6951             :                 32,
    6952             :                 64,
    6953             :                 x_search_area_origin,
    6954             :                 y_search_area_origin,
    6955           0 :                 &context_ptr->p_best_sad32x64[pu_index],
    6956           0 :                 &context_ptr->p_best_mv32x64[pu_index],
    6957             :                 &context_ptr->psub_pel_direction32x64[pu_index]);
    6958             :         }
    6959             : 
    6960             :         // 16x32
    6961           0 :         for (pu_index = 0; pu_index < 8; ++pu_index) {
    6962           0 :             idx = tab16x32[pu_index];
    6963             : 
    6964           0 :             puShiftXIndex = (pu_index & 0x03) << 4;
    6965           0 :             puShiftYIndex = (pu_index >> 2) << 5;
    6966             : 
    6967           0 :             puLcuBufferIndex =
    6968           0 :                 puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
    6969             : 
    6970           0 :             posbBufferIndex = puShiftXIndex +
    6971           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6972           0 :             poshBufferIndex = puShiftXIndex +
    6973           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6974           0 :             posjBufferIndex = puShiftXIndex +
    6975           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    6976             : 
    6977           0 :             PU_HalfPelRefinement(
    6978             :                 sequence_control_set_ptr,
    6979             :                 context_ptr,
    6980           0 :                 &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
    6981             :                 ref_stride,
    6982           0 :                 &context_ptr->p_best_ssd16x32[idx],
    6983             :                 puLcuBufferIndex,
    6984             :                 &(pos_b_buffer[posbBufferIndex]),
    6985             :                 &(pos_h_buffer[poshBufferIndex]),
    6986             :                 &(pos_j_buffer[posjBufferIndex]),
    6987             :                 16,
    6988             :                 32,
    6989             :                 x_search_area_origin,
    6990             :                 y_search_area_origin,
    6991           0 :                 &context_ptr->p_best_sad16x32[idx],
    6992           0 :                 &context_ptr->p_best_mv16x32[idx],
    6993             :                 &context_ptr->psub_pel_direction16x32[idx]);
    6994             :         }
    6995             : 
    6996             :         // 8x16
    6997           0 :         for (pu_index = 0; pu_index < 32; ++pu_index) {
    6998           0 :             idx = tab8x16[pu_index];
    6999             : 
    7000           0 :             puShiftXIndex = (pu_index & 0x07) << 3;
    7001           0 :             puShiftYIndex = (pu_index >> 3) << 4;
    7002             : 
    7003           0 :             puLcuBufferIndex =
    7004           0 :                 puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
    7005             : 
    7006           0 :             posbBufferIndex = puShiftXIndex +
    7007           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    7008           0 :             poshBufferIndex = puShiftXIndex +
    7009           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    7010           0 :             posjBufferIndex = puShiftXIndex +
    7011           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    7012             : 
    7013           0 :             PU_HalfPelRefinement(
    7014             :                 sequence_control_set_ptr,
    7015             :                 context_ptr,
    7016           0 :                 &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
    7017             :                 ref_stride,
    7018           0 :                 &context_ptr->p_best_ssd8x16[idx],
    7019             :                 puLcuBufferIndex,
    7020             :                 &(pos_b_buffer[posbBufferIndex]),
    7021             :                 &(pos_h_buffer[poshBufferIndex]),
    7022             :                 &(pos_j_buffer[posjBufferIndex]),
    7023             :                 8,
    7024             :                 16,
    7025             :                 x_search_area_origin,
    7026             :                 y_search_area_origin,
    7027           0 :                 &context_ptr->p_best_sad8x16[idx],
    7028           0 :                 &context_ptr->p_best_mv8x16[idx],
    7029             :                 &context_ptr->psub_pel_direction8x16[idx]);
    7030             :         }
    7031             : 
    7032             :         // 32x8
    7033           0 :         for (pu_index = 0; pu_index < 16; ++pu_index) {
    7034           0 :             idx = tab32x8[pu_index];
    7035             : 
    7036           0 :             puShiftXIndex = (pu_index & 0x01) << 5;
    7037           0 :             puShiftYIndex = (pu_index >> 1) << 3;
    7038             : 
    7039           0 :             puLcuBufferIndex =
    7040           0 :                 puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
    7041             : 
    7042           0 :             posbBufferIndex = puShiftXIndex +
    7043           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    7044           0 :             poshBufferIndex = puShiftXIndex +
    7045           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    7046           0 :             posjBufferIndex = puShiftXIndex +
    7047           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    7048             : 
    7049           0 :             PU_HalfPelRefinement(
    7050             :                 sequence_control_set_ptr,
    7051             :                 context_ptr,
    7052           0 :                 &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
    7053             :                 ref_stride,
    7054           0 :                 &context_ptr->p_best_ssd32x8[idx],
    7055             :                 puLcuBufferIndex,
    7056             :                 &(pos_b_buffer[posbBufferIndex]),
    7057             :                 &(pos_h_buffer[poshBufferIndex]),
    7058             :                 &(pos_j_buffer[posjBufferIndex]),
    7059             :                 32,
    7060             :                 8,
    7061             :                 x_search_area_origin,
    7062             :                 y_search_area_origin,
    7063           0 :                 &context_ptr->p_best_sad32x8[idx],
    7064           0 :                 &context_ptr->p_best_mv32x8[idx],
    7065             :                 &context_ptr->psub_pel_direction32x8[idx]);
    7066             :         }
    7067             : 
    7068           0 :         for (pu_index = 0; pu_index < 16; ++pu_index) {
    7069           0 :             idx = tab8x32[pu_index];
    7070             : 
    7071           0 :             puShiftXIndex = (pu_index & 0x07) << 3;
    7072           0 :             puShiftYIndex = (pu_index >> 3) << 5;
    7073             : 
    7074           0 :             puLcuBufferIndex =
    7075           0 :                 puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
    7076           0 :             posbBufferIndex = puShiftXIndex +
    7077           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    7078           0 :             poshBufferIndex = puShiftXIndex +
    7079           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    7080           0 :             posjBufferIndex = puShiftXIndex +
    7081           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    7082             : 
    7083           0 :             PU_HalfPelRefinement(
    7084             :                 sequence_control_set_ptr,
    7085             :                 context_ptr,
    7086           0 :                 &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
    7087             :                 ref_stride,
    7088           0 :                 &context_ptr->p_best_ssd8x32[idx],
    7089             :                 puLcuBufferIndex,
    7090             :                 &(pos_b_buffer[posbBufferIndex]),
    7091             :                 &(pos_h_buffer[poshBufferIndex]),
    7092             :                 &(pos_j_buffer[posjBufferIndex]),
    7093             :                 8,
    7094             :                 32,
    7095             :                 x_search_area_origin,
    7096             :                 y_search_area_origin,
    7097           0 :                 &context_ptr->p_best_sad8x32[idx],
    7098           0 :                 &context_ptr->p_best_mv8x32[idx],
    7099             :                 &context_ptr->psub_pel_direction8x32[idx]);
    7100             :         }
    7101             : 
    7102           0 :         for (pu_index = 0; pu_index < 4; ++pu_index) {
    7103           0 :             idx = pu_index;
    7104             : 
    7105           0 :             puShiftXIndex = 0;
    7106           0 :             puShiftYIndex = pu_index << 4;
    7107             : 
    7108           0 :             puLcuBufferIndex =
    7109           0 :                 puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
    7110           0 :             posbBufferIndex = puShiftXIndex +
    7111           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    7112           0 :             poshBufferIndex = puShiftXIndex +
    7113           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    7114           0 :             posjBufferIndex = puShiftXIndex +
    7115           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    7116             : 
    7117           0 :             PU_HalfPelRefinement(
    7118             :                 sequence_control_set_ptr,
    7119             :                 context_ptr,
    7120           0 :                 &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
    7121             :                 ref_stride,
    7122           0 :                 &context_ptr->p_best_ssd64x16[idx],
    7123             :                 puLcuBufferIndex,
    7124             :                 &(pos_b_buffer[posbBufferIndex]),
    7125             :                 &(pos_h_buffer[poshBufferIndex]),
    7126             :                 &(pos_j_buffer[posjBufferIndex]),
    7127             :                 64,
    7128             :                 16,
    7129             :                 x_search_area_origin,
    7130             :                 y_search_area_origin,
    7131           0 :                 &context_ptr->p_best_sad64x16[idx],
    7132           0 :                 &context_ptr->p_best_mv64x16[idx],
    7133             :                 &context_ptr->psub_pel_direction64x16[idx]);
    7134             :         }
    7135             : 
    7136           0 :         for (pu_index = 0; pu_index < 4; ++pu_index) {
    7137           0 :             idx = pu_index;
    7138             : 
    7139           0 :             puShiftXIndex = pu_index << 4;
    7140           0 :             puShiftYIndex = 0;
    7141             : 
    7142           0 :             puLcuBufferIndex =
    7143           0 :                 puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
    7144           0 :             posbBufferIndex = puShiftXIndex +
    7145           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    7146           0 :             poshBufferIndex = puShiftXIndex +
    7147           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    7148           0 :             posjBufferIndex = puShiftXIndex +
    7149           0 :                               puShiftYIndex * context_ptr->interpolated_stride;
    7150             : 
    7151           0 :             PU_HalfPelRefinement(
    7152             :                 sequence_control_set_ptr,
    7153             :                 context_ptr,
    7154           0 :                 &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
    7155             :                 ref_stride,
    7156           0 :                 &context_ptr->p_best_ssd16x64[idx],
    7157             :                 puLcuBufferIndex,
    7158             :                 &(pos_b_buffer[posbBufferIndex]),
    7159             :                 &(pos_h_buffer[poshBufferIndex]),
    7160             :                 &(pos_j_buffer[posjBufferIndex]),
    7161             :                 16,
    7162             :                 64,
    7163             :                 x_search_area_origin,
    7164             :                 y_search_area_origin,
    7165           0 :                 &context_ptr->p_best_sad16x64[idx],
    7166           0 :                 &context_ptr->p_best_mv16x64[idx],
    7167             :                 &context_ptr->psub_pel_direction16x64[idx]);
    7168             :         }
    7169             :     }
    7170             : 
    7171           0 :     return;
    7172             : }
    7173             : /*******************************************
    7174             :  * combined_averaging_ssd
    7175             :  *
    7176             :  *******************************************/
    7177           0 : uint32_t combined_averaging_ssd_c(uint8_t *src, ptrdiff_t src_stride,
    7178             :                                   uint8_t *ref1, ptrdiff_t ref1_stride,
    7179             :                                   uint8_t *ref2, ptrdiff_t ref2_stride,
    7180             :                                   uint32_t height, uint32_t width) {
    7181             :     uint32_t x, y;
    7182           0 :     uint32_t ssd = 0;
    7183             :     uint8_t avgpel;
    7184           0 :     for (y = 0; y < height; y++) {
    7185           0 :         for (x = 0; x < width; x++) {
    7186           0 :             avgpel = (ref1[x] + ref2[x] + 1) >> 1;
    7187           0 :             ssd += SQR((int64_t)(src[x]) - (avgpel));
    7188             :         }
    7189           0 :         src += src_stride;
    7190           0 :         ref1 += ref1_stride;
    7191           0 :         ref2 += ref2_stride;
    7192             :     }
    7193           0 :     return ssd;
    7194             : }
    7195             : /*******************************************
    7196             :  * PU_QuarterPelRefinementOnTheFly
    7197             :  *   performs Quarter Pel refinement for each PU
    7198             :  *******************************************/
    7199           0 : static void PU_QuarterPelRefinementOnTheFly(
    7200             :     MeContext *context_ptr,  // [IN] ME context Ptr, used to get SB Ptr
    7201             :     uint32_t *pBestSsd,
    7202             :     uint32_t
    7203             :         puLcuBufferIndex,  // [IN] PU origin, used to point to source samples
    7204             :     uint8_t **buf1,        // [IN]
    7205             :     uint32_t *buf1Stride,
    7206             :     uint8_t **buf2,  // [IN]
    7207             :     uint32_t *buf2Stride,
    7208             :     uint32_t pu_width,   // [IN]  PU width
    7209             :     uint32_t pu_height,  // [IN]  PU height
    7210             :     int16_t
    7211             :         x_search_area_origin,  // [IN] search area origin in the horizontal
    7212             :                                // direction, used to point to reference samples
    7213             :     int16_t
    7214             :         y_search_area_origin,  // [IN] search area origin in the vertical
    7215             :                                // direction, used to point to reference samples
    7216             :     uint32_t *pBestSad, uint32_t *pBestMV,
    7217             :     uint8_t sub_pel_direction) {
    7218           0 :     int16_t x_mv = _MVXT(*pBestMV);
    7219           0 :     int16_t y_mv = _MVYT(*pBestMV);
    7220             : 
    7221           0 :     int16_t xSearchIndex = ((x_mv + 2) >> 2) - x_search_area_origin;
    7222           0 :     int16_t ySearchIndex = ((y_mv + 2) >> 2) - y_search_area_origin;
    7223             : 
    7224             :     uint64_t dist;
    7225             : 
    7226             :     EbBool validTL, validT, validTR, validR, validBR, validB, validBL, validL;
    7227             : 
    7228             :     int16_t xMvQuarter[8];
    7229             :     int16_t yMvQuarter[8];
    7230           0 :     int32_t searchRegionIndex1 = 0;
    7231           0 :     int32_t searchRegionIndex2 = 0;
    7232           0 :     if (context_ptr->full_quarter_pel_refinement) {
    7233           0 :         validTL = EB_TRUE;
    7234           0 :         validT = EB_TRUE;
    7235           0 :         validTR = EB_TRUE;
    7236           0 :         validR = EB_TRUE;
    7237           0 :         validBR = EB_TRUE;
    7238           0 :         validB = EB_TRUE;
    7239           0 :         validBL = EB_TRUE;
    7240           0 :         validL = EB_TRUE;
    7241             :     } else {
    7242           0 :         if ((y_mv & 2) + ((x_mv & 2) >> 1)) {
    7243           0 :             validTL = (EbBool)(sub_pel_direction == RIGHT_POSITION ||
    7244           0 :                                sub_pel_direction == BOTTOM_RIGHT_POSITION ||
    7245             :                                sub_pel_direction == BOTTOM_POSITION);
    7246           0 :             validT = (EbBool)(sub_pel_direction == BOTTOM_RIGHT_POSITION ||
    7247           0 :                               sub_pel_direction == BOTTOM_POSITION ||
    7248             :                               sub_pel_direction == BOTTOM_LEFT_POSITION);
    7249           0 :             validTR = (EbBool)(sub_pel_direction == BOTTOM_POSITION ||
    7250           0 :                                sub_pel_direction == BOTTOM_LEFT_POSITION ||
    7251             :                                sub_pel_direction == LEFT_POSITION);
    7252           0 :             validR = (EbBool)(sub_pel_direction == BOTTOM_LEFT_POSITION ||
    7253           0 :                               sub_pel_direction == LEFT_POSITION ||
    7254             :                               sub_pel_direction == TOP_LEFT_POSITION);
    7255           0 :             validBR = (EbBool)(sub_pel_direction == LEFT_POSITION ||
    7256           0 :                                sub_pel_direction == TOP_LEFT_POSITION ||
    7257             :                                sub_pel_direction == TOP_POSITION);
    7258           0 :             validB = (EbBool)(sub_pel_direction == TOP_LEFT_POSITION ||
    7259           0 :                               sub_pel_direction == TOP_POSITION ||
    7260             :                               sub_pel_direction == TOP_RIGHT_POSITION);
    7261           0 :             validBL = (EbBool)(sub_pel_direction == TOP_POSITION ||
    7262           0 :                                sub_pel_direction == TOP_RIGHT_POSITION ||
    7263             :                                sub_pel_direction == RIGHT_POSITION);
    7264           0 :             validL = (EbBool)(sub_pel_direction == TOP_RIGHT_POSITION ||
    7265           0 :                               sub_pel_direction == RIGHT_POSITION ||
    7266             :                               sub_pel_direction == BOTTOM_RIGHT_POSITION);
    7267             :         } else {
    7268           0 :             validTL = (EbBool)(sub_pel_direction == LEFT_POSITION ||
    7269           0 :                                sub_pel_direction == TOP_LEFT_POSITION ||
    7270             :                                sub_pel_direction == TOP_POSITION);
    7271           0 :             validT = (EbBool)(sub_pel_direction == TOP_LEFT_POSITION ||
    7272           0 :                               sub_pel_direction == TOP_POSITION ||
    7273             :                               sub_pel_direction == TOP_RIGHT_POSITION);
    7274           0 :             validTR = (EbBool)(sub_pel_direction == TOP_POSITION ||
    7275           0 :                                sub_pel_direction == TOP_RIGHT_POSITION ||
    7276             :                                sub_pel_direction == RIGHT_POSITION);
    7277           0 :             validR = (EbBool)(sub_pel_direction == TOP_RIGHT_POSITION ||
    7278           0 :                               sub_pel_direction == RIGHT_POSITION ||
    7279             :                               sub_pel_direction == BOTTOM_RIGHT_POSITION);
    7280           0 :             validBR = (EbBool)(sub_pel_direction == RIGHT_POSITION ||
    7281           0 :                                sub_pel_direction == BOTTOM_RIGHT_POSITION ||
    7282             :                                sub_pel_direction == BOTTOM_POSITION);
    7283           0 :             validB = (EbBool)(sub_pel_direction == BOTTOM_RIGHT_POSITION ||
    7284           0 :                               sub_pel_direction == BOTTOM_POSITION ||
    7285             :                               sub_pel_direction == BOTTOM_LEFT_POSITION);
    7286           0 :             validBL = (EbBool)(sub_pel_direction == BOTTOM_POSITION ||
    7287           0 :                                sub_pel_direction == BOTTOM_LEFT_POSITION ||
    7288             :                                sub_pel_direction == LEFT_POSITION);
    7289           0 :             validL = (EbBool)(sub_pel_direction == BOTTOM_LEFT_POSITION ||
    7290           0 :                               sub_pel_direction == LEFT_POSITION ||
    7291             :                               sub_pel_direction == TOP_LEFT_POSITION);
    7292             :         }
    7293             :     }
    7294           0 :     xMvQuarter[0] = x_mv - 1;  // L  position
    7295           0 :     xMvQuarter[1] = x_mv + 1;  // R  position
    7296           0 :     xMvQuarter[2] = x_mv;      // T  position
    7297           0 :     xMvQuarter[3] = x_mv;      // B  position
    7298           0 :     xMvQuarter[4] = x_mv - 1;  // TL position
    7299           0 :     xMvQuarter[5] = x_mv + 1;  // TR position
    7300           0 :     xMvQuarter[6] = x_mv + 1;  // BR position
    7301           0 :     xMvQuarter[7] = x_mv - 1;  // BL position
    7302             : 
    7303           0 :     yMvQuarter[0] = y_mv;      // L  position
    7304           0 :     yMvQuarter[1] = y_mv;      // R  position
    7305           0 :     yMvQuarter[2] = y_mv - 1;  // T  position
    7306           0 :     yMvQuarter[3] = y_mv + 1;  // B  position
    7307           0 :     yMvQuarter[4] = y_mv - 1;  // TL position
    7308           0 :     yMvQuarter[5] = y_mv - 1;  // TR position
    7309           0 :     yMvQuarter[6] = y_mv + 1;  // BR position
    7310           0 :     yMvQuarter[7] = y_mv + 1;  // BL position
    7311             : 
    7312             :     // Use SATD only when QP mod, and RC are OFF
    7313             :     // QP mod, and RC assume that ME distotion is always SAD.
    7314             :     // This problem might be solved by computing SAD for the best position after
    7315             :     // fractional search is done, or by considring the full pel resolution SAD.
    7316             : 
    7317             :     {
    7318             :         // L position
    7319           0 :         if (validL) {
    7320           0 :             searchRegionIndex1 = (int32_t)xSearchIndex +
    7321           0 :                                  (int32_t)buf1Stride[0] * (int32_t)ySearchIndex;
    7322           0 :             searchRegionIndex2 = (int32_t)xSearchIndex +
    7323           0 :                                  (int32_t)buf2Stride[0] * (int32_t)ySearchIndex;
    7324             : 
    7325           0 :             dist =
    7326           0 :                 (context_ptr->fractional_search_method == SSD_SEARCH)
    7327           0 :                     ? combined_averaging_ssd(
    7328           0 :                           &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7329             :                           BLOCK_SIZE_64,
    7330           0 :                           buf1[0] + searchRegionIndex1,
    7331           0 :                           buf1Stride[0],
    7332           0 :                           buf2[0] + searchRegionIndex2,
    7333           0 :                           buf2Stride[0],
    7334             :                           pu_height,
    7335             :                           pu_width)
    7336           0 :                     : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    7337           0 :                           ? (nxm_sad_avg_kernel(
    7338             :                                      &(context_ptr
    7339           0 :                                            ->sb_buffer[puLcuBufferIndex]),
    7340             :                                      BLOCK_SIZE_64 << 1,
    7341           0 :                                      buf1[0] + searchRegionIndex1,
    7342           0 :                                      buf1Stride[0] << 1,
    7343           0 :                                      buf2[0] + searchRegionIndex2,
    7344           0 :                                      buf2Stride[0] << 1,
    7345             :                                      pu_height >> 1,
    7346             :                                      pu_width))
    7347           0 :                                 << 1
    7348           0 :                           : nxm_sad_avg_kernel(
    7349           0 :                                     &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7350             :                                     BLOCK_SIZE_64,
    7351           0 :                                     buf1[0] + searchRegionIndex1,
    7352             :                                     buf1Stride[0],
    7353           0 :                                     buf2[0] + searchRegionIndex2,
    7354             :                                     buf2Stride[0],
    7355             :                                     pu_height,
    7356             :                                     pu_width);
    7357           0 :             if (context_ptr->fractional_search_method == SSD_SEARCH) {
    7358           0 :                 if (dist < *pBestSsd) {
    7359           0 :                     *pBestSad =
    7360           0 :                         (uint32_t)nxm_sad_avg_kernel(
    7361           0 :                                 &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7362             :                                 BLOCK_SIZE_64,
    7363           0 :                                 buf1[0] + searchRegionIndex1,
    7364             :                                 buf1Stride[0],
    7365           0 :                                 buf2[0] + searchRegionIndex2,
    7366             :                                 buf2Stride[0],
    7367             :                                 pu_height,
    7368             :                                 pu_width);
    7369           0 :                     *pBestMV = ((uint16_t)yMvQuarter[0] << 16) |
    7370           0 :                                ((uint16_t)xMvQuarter[0]);
    7371           0 :                     *pBestSsd = (uint32_t)dist;
    7372             :                 }
    7373             :             } else {
    7374           0 :                 if (dist < *pBestSad) {
    7375           0 :                     *pBestSad = (uint32_t)dist;
    7376           0 :                     *pBestMV = ((uint16_t)yMvQuarter[0] << 16) |
    7377           0 :                                ((uint16_t)xMvQuarter[0]);
    7378             :                 }
    7379             :             }
    7380             :         }
    7381             : 
    7382             :         // R positions
    7383           0 :         if (validR) {
    7384           0 :             searchRegionIndex1 = (int32_t)xSearchIndex +
    7385           0 :                                  (int32_t)buf1Stride[1] * (int32_t)ySearchIndex;
    7386           0 :             searchRegionIndex2 = (int32_t)xSearchIndex +
    7387           0 :                                  (int32_t)buf2Stride[1] * (int32_t)ySearchIndex;
    7388           0 :             dist =
    7389           0 :                 (context_ptr->fractional_search_method == SSD_SEARCH)
    7390           0 :                     ? combined_averaging_ssd(
    7391           0 :                           &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7392             :                           BLOCK_SIZE_64,
    7393           0 :                           buf1[1] + searchRegionIndex1,
    7394           0 :                           buf1Stride[1],
    7395           0 :                           buf2[1] + searchRegionIndex2,
    7396           0 :                           buf2Stride[1],
    7397             :                           pu_height,
    7398             :                           pu_width)
    7399           0 :                     : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    7400           0 :                           ? (nxm_sad_avg_kernel(
    7401             :                                      &(context_ptr
    7402           0 :                                            ->sb_buffer[puLcuBufferIndex]),
    7403             :                                      BLOCK_SIZE_64 << 1,
    7404           0 :                                      buf1[1] + searchRegionIndex1,
    7405           0 :                                      buf1Stride[1] << 1,
    7406           0 :                                      buf2[1] + searchRegionIndex2,
    7407           0 :                                      buf2Stride[1] << 1,
    7408             :                                      pu_height >> 1,
    7409             :                                      pu_width))
    7410           0 :                                 << 1
    7411           0 :                           : nxm_sad_avg_kernel(
    7412           0 :                                     &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7413             :                                     BLOCK_SIZE_64,
    7414           0 :                                     buf1[1] + searchRegionIndex1,
    7415           0 :                                     buf1Stride[1],
    7416           0 :                                     buf2[1] + searchRegionIndex2,
    7417           0 :                                     buf2Stride[1],
    7418             :                                     pu_height,
    7419             :                                     pu_width);
    7420           0 :             if (context_ptr->fractional_search_method == SSD_SEARCH) {
    7421           0 :                 if (dist < *pBestSsd) {
    7422           0 :                     *pBestSad =
    7423           0 :                         (uint32_t)nxm_sad_avg_kernel(
    7424           0 :                                 &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7425             :                                 BLOCK_SIZE_64,
    7426           0 :                                 buf1[1] + searchRegionIndex1,
    7427           0 :                                 buf1Stride[1],
    7428           0 :                                 buf2[1] + searchRegionIndex2,
    7429           0 :                                 buf2Stride[1],
    7430             :                                 pu_height,
    7431             :                                 pu_width);
    7432           0 :                     *pBestMV = ((uint16_t)yMvQuarter[1] << 16) |
    7433           0 :                                ((uint16_t)xMvQuarter[1]);
    7434           0 :                     *pBestSsd = (uint32_t)dist;
    7435             :                 }
    7436             :             } else {
    7437           0 :                 if (dist < *pBestSad) {
    7438           0 :                     *pBestSad = (uint32_t)dist;
    7439           0 :                     *pBestMV = ((uint16_t)yMvQuarter[1] << 16) |
    7440           0 :                                ((uint16_t)xMvQuarter[1]);
    7441             :                 }
    7442             :             }
    7443             :         }
    7444             : 
    7445             :         // T position
    7446           0 :         if (validT) {
    7447           0 :             searchRegionIndex1 = (int32_t)xSearchIndex +
    7448           0 :                                  (int32_t)buf1Stride[2] * (int32_t)ySearchIndex;
    7449           0 :             searchRegionIndex2 = (int32_t)xSearchIndex +
    7450           0 :                                  (int32_t)buf2Stride[2] * (int32_t)ySearchIndex;
    7451           0 :             dist =
    7452           0 :                 (context_ptr->fractional_search_method == SSD_SEARCH)
    7453           0 :                     ? combined_averaging_ssd(
    7454           0 :                           &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7455             :                           BLOCK_SIZE_64,
    7456           0 :                           buf1[2] + searchRegionIndex1,
    7457           0 :                           buf1Stride[2],
    7458           0 :                           buf2[2] + searchRegionIndex2,
    7459           0 :                           buf2Stride[2],
    7460             :                           pu_height,
    7461             :                           pu_width)
    7462           0 :                     : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    7463           0 :                           ? (nxm_sad_avg_kernel(
    7464             :                                      &(context_ptr
    7465           0 :                                            ->sb_buffer[puLcuBufferIndex]),
    7466             :                                      BLOCK_SIZE_64 << 1,
    7467           0 :                                      buf1[2] + searchRegionIndex1,
    7468           0 :                                      buf1Stride[2] << 1,
    7469           0 :                                      buf2[2] + searchRegionIndex2,
    7470           0 :                                      buf2Stride[2] << 1,
    7471             :                                      pu_height >> 1,
    7472             :                                      pu_width))
    7473           0 :                                 << 1
    7474           0 :                           : nxm_sad_avg_kernel(
    7475           0 :                                     &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7476             :                                     BLOCK_SIZE_64,
    7477           0 :                                     buf1[2] + searchRegionIndex1,
    7478           0 :                                     buf1Stride[2],
    7479           0 :                                     buf2[2] + searchRegionIndex2,
    7480           0 :                                     buf2Stride[2],
    7481             :                                     pu_height,
    7482             :                                     pu_width);
    7483           0 :             if (context_ptr->fractional_search_method == SSD_SEARCH) {
    7484           0 :                 if (dist < *pBestSsd) {
    7485           0 :                     *pBestSad =
    7486           0 :                         (uint32_t)nxm_sad_avg_kernel(
    7487           0 :                                 &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7488             :                                 BLOCK_SIZE_64,
    7489           0 :                                 buf1[2] + searchRegionIndex1,
    7490           0 :                                 buf1Stride[2],
    7491           0 :                                 buf2[2] + searchRegionIndex2,
    7492           0 :                                 buf2Stride[2],
    7493             :                                 pu_height,
    7494             :                                 pu_width);
    7495           0 :                     *pBestMV = ((uint16_t)yMvQuarter[2] << 16) |
    7496           0 :                                ((uint16_t)xMvQuarter[2]);
    7497           0 :                     *pBestSsd = (uint32_t)dist;
    7498             :                 }
    7499             :             } else {
    7500           0 :                 if (dist < *pBestSad) {
    7501           0 :                     *pBestSad = (uint32_t)dist;
    7502           0 :                     *pBestMV = ((uint16_t)yMvQuarter[2] << 16) |
    7503           0 :                                ((uint16_t)xMvQuarter[2]);
    7504             :                 }
    7505             :             }
    7506             :         }
    7507             : 
    7508             :         // B position
    7509           0 :         if (validB) {
    7510           0 :             searchRegionIndex1 = (int32_t)xSearchIndex +
    7511           0 :                                  (int32_t)buf1Stride[3] * (int32_t)ySearchIndex;
    7512           0 :             searchRegionIndex2 = (int32_t)xSearchIndex +
    7513           0 :                                  (int32_t)buf2Stride[3] * (int32_t)ySearchIndex;
    7514           0 :             dist =
    7515           0 :                 (context_ptr->fractional_search_method == SSD_SEARCH)
    7516           0 :                     ? combined_averaging_ssd(
    7517           0 :                           &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7518             :                           BLOCK_SIZE_64,
    7519           0 :                           buf1[3] + searchRegionIndex1,
    7520           0 :                           buf1Stride[3],
    7521           0 :                           buf2[3] + searchRegionIndex2,
    7522           0 :                           buf2Stride[3],
    7523             :                           pu_height,
    7524             :                           pu_width)
    7525           0 :                     : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    7526           0 :                           ? (nxm_sad_avg_kernel(
    7527             :                                      &(context_ptr
    7528           0 :                                            ->sb_buffer[puLcuBufferIndex]),
    7529             :                                      BLOCK_SIZE_64 << 1,
    7530           0 :                                      buf1[3] + searchRegionIndex1,
    7531           0 :                                      buf1Stride[3] << 1,
    7532           0 :                                      buf2[3] + searchRegionIndex2,
    7533           0 :                                      buf2Stride[3] << 1,
    7534             :                                      pu_height >> 1,
    7535             :                                      pu_width))
    7536           0 :                                 << 1
    7537           0 :                           : nxm_sad_avg_kernel(
    7538           0 :                                     &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7539             :                                     BLOCK_SIZE_64,
    7540           0 :                                     buf1[3] + searchRegionIndex1,
    7541           0 :                                     buf1Stride[3],
    7542           0 :                                     buf2[3] + searchRegionIndex2,
    7543           0 :                                     buf2Stride[3],
    7544             :                                     pu_height,
    7545             :                                     pu_width);
    7546           0 :             if (context_ptr->fractional_search_method == SSD_SEARCH) {
    7547           0 :                 if (dist < *pBestSsd) {
    7548           0 :                     *pBestSad =
    7549           0 :                         (uint32_t)nxm_sad_avg_kernel(
    7550           0 :                                 &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7551             :                                 BLOCK_SIZE_64,
    7552           0 :                                 buf1[3] + searchRegionIndex1,
    7553           0 :                                 buf1Stride[3],
    7554           0 :                                 buf2[3] + searchRegionIndex2,
    7555           0 :                                 buf2Stride[3],
    7556             :                                 pu_height,
    7557             :                                 pu_width);
    7558           0 :                     *pBestMV = ((uint16_t)yMvQuarter[3] << 16) |
    7559           0 :                                ((uint16_t)xMvQuarter[3]);
    7560           0 :                     *pBestSsd = (uint32_t)dist;
    7561             :                 }
    7562             :             } else {
    7563           0 :                 if (dist < *pBestSad) {
    7564           0 :                     *pBestSad = (uint32_t)dist;
    7565           0 :                     *pBestMV = ((uint16_t)yMvQuarter[3] << 16) |
    7566           0 :                                ((uint16_t)xMvQuarter[3]);
    7567             :                 }
    7568             :             }
    7569             :         }
    7570             : 
    7571             :         // TL position
    7572           0 :         if (validTL) {
    7573           0 :             searchRegionIndex1 = (int32_t)xSearchIndex +
    7574           0 :                                  (int32_t)buf1Stride[4] * (int32_t)ySearchIndex;
    7575           0 :             searchRegionIndex2 = (int32_t)xSearchIndex +
    7576           0 :                                  (int32_t)buf2Stride[4] * (int32_t)ySearchIndex;
    7577           0 :             dist =
    7578           0 :                 (context_ptr->fractional_search_method == SSD_SEARCH)
    7579           0 :                     ? combined_averaging_ssd(
    7580           0 :                           &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7581             :                           BLOCK_SIZE_64,
    7582           0 :                           buf1[4] + searchRegionIndex1,
    7583           0 :                           buf1Stride[4],
    7584           0 :                           buf2[4] + searchRegionIndex2,
    7585           0 :                           buf2Stride[4],
    7586             :                           pu_height,
    7587             :                           pu_width)
    7588           0 :                     : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    7589           0 :                           ? (nxm_sad_avg_kernel(
    7590             :                                      &(context_ptr
    7591           0 :                                            ->sb_buffer[puLcuBufferIndex]),
    7592             :                                      BLOCK_SIZE_64 << 1,
    7593           0 :                                      buf1[4] + searchRegionIndex1,
    7594           0 :                                      buf1Stride[4] << 1,
    7595           0 :                                      buf2[4] + searchRegionIndex2,
    7596           0 :                                      buf2Stride[4] << 1,
    7597             :                                      pu_height >> 1,
    7598             :                                      pu_width))
    7599           0 :                                 << 1
    7600           0 :                           : nxm_sad_avg_kernel(
    7601           0 :                                     &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7602             :                                     BLOCK_SIZE_64,
    7603           0 :                                     buf1[4] + searchRegionIndex1,
    7604           0 :                                     buf1Stride[4],
    7605           0 :                                     buf2[4] + searchRegionIndex2,
    7606           0 :                                     buf2Stride[4],
    7607             :                                     pu_height,
    7608             :                                     pu_width);
    7609           0 :             if (context_ptr->fractional_search_method == SSD_SEARCH) {
    7610           0 :                 if (dist < *pBestSsd) {
    7611           0 :                     *pBestSad =
    7612           0 :                         (uint32_t)nxm_sad_avg_kernel(
    7613           0 :                                 &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7614             :                                 BLOCK_SIZE_64,
    7615           0 :                                 buf1[4] + searchRegionIndex1,
    7616           0 :                                 buf1Stride[4],
    7617           0 :                                 buf2[4] + searchRegionIndex2,
    7618           0 :                                 buf2Stride[4],
    7619             :                                 pu_height,
    7620             :                                 pu_width);
    7621           0 :                     *pBestMV = ((uint16_t)yMvQuarter[4] << 16) |
    7622           0 :                                ((uint16_t)xMvQuarter[4]);
    7623           0 :                     *pBestSsd = (uint32_t)dist;
    7624             :                 }
    7625             :             } else {
    7626           0 :                 if (dist < *pBestSad) {
    7627           0 :                     *pBestSad = (uint32_t)dist;
    7628           0 :                     *pBestMV = ((uint16_t)yMvQuarter[4] << 16) |
    7629           0 :                                ((uint16_t)xMvQuarter[4]);
    7630             :                 }
    7631             :             }
    7632             :         }
    7633             : 
    7634             :         // TR position
    7635           0 :         if (validTR) {
    7636           0 :             searchRegionIndex1 = (int32_t)xSearchIndex +
    7637           0 :                                  (int32_t)buf1Stride[5] * (int32_t)ySearchIndex;
    7638           0 :             searchRegionIndex2 = (int32_t)xSearchIndex +
    7639           0 :                                  (int32_t)buf2Stride[5] * (int32_t)ySearchIndex;
    7640           0 :             dist =
    7641           0 :                 (context_ptr->fractional_search_method == SSD_SEARCH)
    7642           0 :                     ? combined_averaging_ssd(
    7643           0 :                           &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7644             :                           BLOCK_SIZE_64,
    7645           0 :                           buf1[5] + searchRegionIndex1,
    7646           0 :                           buf1Stride[5],
    7647           0 :                           buf2[5] + searchRegionIndex2,
    7648           0 :                           buf2Stride[5],
    7649             :                           pu_height,
    7650             :                           pu_width)
    7651           0 :                     : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    7652           0 :                           ? (nxm_sad_avg_kernel(
    7653             :                                      &(context_ptr
    7654           0 :                                            ->sb_buffer[puLcuBufferIndex]),
    7655             :                                      BLOCK_SIZE_64 << 1,
    7656           0 :                                      buf1[5] + searchRegionIndex1,
    7657           0 :                                      buf1Stride[5] << 1,
    7658           0 :                                      buf2[5] + searchRegionIndex2,
    7659           0 :                                      buf2Stride[5] << 1,
    7660             :                                      pu_height >> 1,
    7661             :                                      pu_width))
    7662           0 :                                 << 1
    7663           0 :                           : nxm_sad_avg_kernel(
    7664           0 :                                     &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7665             :                                     BLOCK_SIZE_64,
    7666           0 :                                     buf1[5] + searchRegionIndex1,
    7667           0 :                                     buf1Stride[5],
    7668           0 :                                     buf2[5] + searchRegionIndex2,
    7669           0 :                                     buf2Stride[5],
    7670             :                                     pu_height,
    7671             :                                     pu_width);
    7672           0 :             if (context_ptr->fractional_search_method == SSD_SEARCH) {
    7673           0 :                 if (dist < *pBestSsd) {
    7674           0 :                     *pBestSad =
    7675           0 :                         (uint32_t)nxm_sad_avg_kernel(
    7676           0 :                                 &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7677             :                                 BLOCK_SIZE_64,
    7678           0 :                                 buf1[5] + searchRegionIndex1,
    7679           0 :                                 buf1Stride[5],
    7680           0 :                                 buf2[5] + searchRegionIndex2,
    7681           0 :                                 buf2Stride[5],
    7682             :                                 pu_height,
    7683             :                                 pu_width);
    7684           0 :                     *pBestMV = ((uint16_t)yMvQuarter[5] << 16) |
    7685           0 :                                ((uint16_t)xMvQuarter[5]);
    7686           0 :                     *pBestSsd = (uint32_t)dist;
    7687             :                 }
    7688             :             } else {
    7689           0 :                 if (dist < *pBestSad) {
    7690           0 :                     *pBestSad = (uint32_t)dist;
    7691           0 :                     *pBestMV = ((uint16_t)yMvQuarter[5] << 16) |
    7692           0 :                                ((uint16_t)xMvQuarter[5]);
    7693             :                 }
    7694             :             }
    7695             :         }
    7696             : 
    7697             :         // BR position
    7698           0 :         if (validBR) {
    7699           0 :             searchRegionIndex1 = (int32_t)xSearchIndex +
    7700           0 :                                  (int32_t)buf1Stride[6] * (int32_t)ySearchIndex;
    7701           0 :             searchRegionIndex2 = (int32_t)xSearchIndex +
    7702           0 :                                  (int32_t)buf2Stride[6] * (int32_t)ySearchIndex;
    7703           0 :             dist =
    7704           0 :                 (context_ptr->fractional_search_method == SSD_SEARCH)
    7705           0 :                     ? combined_averaging_ssd(
    7706           0 :                           &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7707             :                           BLOCK_SIZE_64,
    7708           0 :                           buf1[6] + searchRegionIndex1,
    7709           0 :                           buf1Stride[6],
    7710           0 :                           buf2[6] + searchRegionIndex2,
    7711           0 :                           buf2Stride[6],
    7712             :                           pu_height,
    7713             :                           pu_width)
    7714           0 :                     : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    7715           0 :                           ? (nxm_sad_avg_kernel(
    7716             :                                      &(context_ptr
    7717           0 :                                            ->sb_buffer[puLcuBufferIndex]),
    7718             :                                      BLOCK_SIZE_64 << 1,
    7719           0 :                                      buf1[6] + searchRegionIndex1,
    7720           0 :                                      buf1Stride[6] << 1,
    7721           0 :                                      buf2[6] + searchRegionIndex2,
    7722           0 :                                      buf2Stride[6] << 1,
    7723             :                                      pu_height >> 1,
    7724             :                                      pu_width))
    7725           0 :                                 << 1
    7726           0 :                           : nxm_sad_avg_kernel(
    7727           0 :                                     &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7728             :                                     BLOCK_SIZE_64,
    7729           0 :                                     buf1[6] + searchRegionIndex1,
    7730           0 :                                     buf1Stride[6],
    7731           0 :                                     buf2[6] + searchRegionIndex2,
    7732           0 :                                     buf2Stride[6],
    7733             :                                     pu_height,
    7734             :                                     pu_width);
    7735           0 :             if (context_ptr->fractional_search_method == SSD_SEARCH) {
    7736           0 :                 if (dist < *pBestSsd) {
    7737           0 :                     *pBestSad =
    7738           0 :                         (uint32_t)nxm_sad_avg_kernel(
    7739           0 :                                 &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7740             :                                 BLOCK_SIZE_64,
    7741           0 :                                 buf1[6] + searchRegionIndex1,
    7742           0 :                                 buf1Stride[6],
    7743           0 :                                 buf2[6] + searchRegionIndex2,
    7744           0 :                                 buf2Stride[6],
    7745             :                                 pu_height,
    7746             :                                 pu_width);
    7747           0 :                     *pBestMV = ((uint16_t)yMvQuarter[6] << 16) |
    7748           0 :                                ((uint16_t)xMvQuarter[6]);
    7749           0 :                     *pBestSsd = (uint32_t)dist;
    7750             :                 }
    7751             :             } else {
    7752           0 :                 if (dist < *pBestSad) {
    7753           0 :                     *pBestSad = (uint32_t)dist;
    7754           0 :                     *pBestMV = ((uint16_t)yMvQuarter[6] << 16) |
    7755           0 :                                ((uint16_t)xMvQuarter[6]);
    7756             :                 }
    7757             :             }
    7758             :         }
    7759             : 
    7760             :         // BL position
    7761           0 :         if (validBL) {
    7762           0 :             searchRegionIndex1 = (int32_t)xSearchIndex +
    7763           0 :                                  (int32_t)buf1Stride[7] * (int32_t)ySearchIndex;
    7764           0 :             searchRegionIndex2 = (int32_t)xSearchIndex +
    7765           0 :                                  (int32_t)buf2Stride[7] * (int32_t)ySearchIndex;
    7766           0 :             dist =
    7767           0 :                 (context_ptr->fractional_search_method == SSD_SEARCH)
    7768           0 :                     ? combined_averaging_ssd(
    7769           0 :                           &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7770             :                           BLOCK_SIZE_64,
    7771           0 :                           buf1[7] + searchRegionIndex1,
    7772           0 :                           buf1Stride[7],
    7773           0 :                           buf2[7] + searchRegionIndex2,
    7774           0 :                           buf2Stride[7],
    7775             :                           pu_height,
    7776             :                           pu_width)
    7777           0 :                     : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    7778           0 :                           ? (nxm_sad_avg_kernel(
    7779             :                                      &(context_ptr
    7780           0 :                                            ->sb_buffer[puLcuBufferIndex]),
    7781             :                                      BLOCK_SIZE_64 << 1,
    7782           0 :                                      buf1[7] + searchRegionIndex1,
    7783           0 :                                      buf1Stride[7] << 1,
    7784           0 :                                      buf2[7] + searchRegionIndex2,
    7785           0 :                                      buf2Stride[7] << 1,
    7786             :                                      pu_height >> 1,
    7787             :                                      pu_width))
    7788           0 :                                 << 1
    7789           0 :                           : nxm_sad_avg_kernel(
    7790           0 :                                     &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7791             :                                     BLOCK_SIZE_64,
    7792           0 :                                     buf1[7] + searchRegionIndex1,
    7793           0 :                                     buf1Stride[7],
    7794           0 :                                     buf2[7] + searchRegionIndex2,
    7795           0 :                                     buf2Stride[7],
    7796             :                                     pu_height,
    7797             :                                     pu_width);
    7798           0 :             if (context_ptr->fractional_search_method == SSD_SEARCH) {
    7799           0 :                 if (dist < *pBestSsd) {
    7800           0 :                     *pBestSad =
    7801           0 :                         (uint32_t)nxm_sad_avg_kernel(
    7802           0 :                                 &(context_ptr->sb_buffer[puLcuBufferIndex]),
    7803             :                                 BLOCK_SIZE_64,
    7804           0 :                                 buf1[7] + searchRegionIndex1,
    7805           0 :                                 buf1Stride[7],
    7806           0 :                                 buf2[7] + searchRegionIndex2,
    7807           0 :                                 buf2Stride[7],
    7808             :                                 pu_height,
    7809             :                                 pu_width);
    7810           0 :                     *pBestMV = ((uint16_t)yMvQuarter[7] << 16) |
    7811           0 :                                ((uint16_t)xMvQuarter[7]);
    7812           0 :                     *pBestSsd = (uint32_t)dist;
    7813             :                 }
    7814             :             } else {
    7815           0 :                 if (dist < *pBestSad) {
    7816           0 :                     *pBestSad = (uint32_t)dist;
    7817           0 :                     *pBestMV = ((uint16_t)yMvQuarter[7] << 16) |
    7818           0 :                                ((uint16_t)xMvQuarter[7]);
    7819             :                 }
    7820             :             }
    7821             :         }
    7822             :     }
    7823             : 
    7824           0 :     return;
    7825             : }
    7826             : 
    7827             : /*******************************************
    7828             : * SetQuarterPelRefinementInputsOnTheFly
    7829             : *   determine the 2 half pel buffers to do
    7830             : averaging for Quarter Pel Refinement
    7831             : *******************************************/
    7832           0 : static void SetQuarterPelRefinementInputsOnTheFly(
    7833             :     uint8_t *pos_Full,     //[IN] points to A
    7834             :     uint32_t FullStride,   //[IN]
    7835             :     uint8_t *pos_b,        //[IN] points to b
    7836             :     uint8_t *pos_h,        //[IN] points to h
    7837             :     uint8_t *pos_j,        //[IN] points to j
    7838             :     uint32_t Stride,       //[IN]
    7839             :     int16_t x_mv,          //[IN]
    7840             :     int16_t y_mv,          //[IN]
    7841             :     uint8_t **buf1,        //[OUT]
    7842             :     uint32_t *buf1Stride,  //[OUT]
    7843             :     uint8_t **buf2,        //[OUT]
    7844             :     uint32_t *buf2Stride   //[OUT]
    7845             : ) {
    7846           0 :     uint32_t quarterPelRefinementMethod = (y_mv & 2) + ((x_mv & 2) >> 1);
    7847             : 
    7848             :     // for each one of the 8 postions, we need to determine the 2 half pel
    7849             :     // buffers to  do averaging
    7850             : 
    7851             :     //     A    a    b    c
    7852             :     //     d    e    f    g
    7853             :     //     h    i    j    k
    7854             :     //     n    p    q    r
    7855             : 
    7856           0 :     switch (quarterPelRefinementMethod) {
    7857           0 :     case EB_QUARTER_IN_FULL:
    7858             : 
    7859           0 :         /*c=b+A*/ buf1[0] = pos_b;
    7860           0 :         buf1Stride[0] = Stride;
    7861           0 :         buf2[0] = pos_Full;
    7862           0 :         buf2Stride[0] = FullStride;
    7863           0 :         /*a=A+b*/ buf1[1] = pos_Full;
    7864           0 :         buf1Stride[1] = FullStride;
    7865           0 :         buf2[1] = pos_b + 1;
    7866           0 :         buf2Stride[1] = Stride;
    7867           0 :         /*n=h+A*/ buf1[2] = pos_h;
    7868           0 :         buf1Stride[2] = Stride;
    7869           0 :         buf2[2] = pos_Full;
    7870           0 :         buf2Stride[2] = FullStride;
    7871           0 :         /*d=A+h*/ buf1[3] = pos_Full;
    7872           0 :         buf1Stride[3] = FullStride;
    7873           0 :         buf2[3] = pos_h + Stride;
    7874           0 :         buf2Stride[3] = Stride;
    7875           0 :         /*r=b+h*/ buf1[4] = pos_b;
    7876           0 :         buf1Stride[4] = Stride;
    7877           0 :         buf2[4] = pos_h;
    7878           0 :         buf2Stride[4] = Stride;
    7879           0 :         /*p=h+b*/ buf1[5] = pos_h;
    7880           0 :         buf1Stride[5] = Stride;
    7881           0 :         buf2[5] = pos_b + 1;
    7882           0 :         buf2Stride[5] = Stride;
    7883           0 :         /*e=h+b*/ buf1[6] = pos_h + Stride;
    7884           0 :         buf1Stride[6] = Stride;
    7885           0 :         buf2[6] = pos_b + 1;
    7886           0 :         buf2Stride[6] = Stride;
    7887           0 :         /*g=b+h*/ buf1[7] = pos_b;
    7888           0 :         buf1Stride[7] = Stride;
    7889           0 :         buf2[7] = pos_h + Stride;
    7890           0 :         buf2Stride[7] = Stride;
    7891             : 
    7892           0 :         break;
    7893             : 
    7894           0 :     case EB_QUARTER_IN_HALF_HORIZONTAL:
    7895             : 
    7896           0 :         /*a=A+b*/ buf1[0] = pos_Full - 1;
    7897           0 :         buf1Stride[0] = FullStride;
    7898           0 :         buf2[0] = pos_b;
    7899           0 :         buf2Stride[0] = Stride;
    7900           0 :         /*c=b+A*/ buf1[1] = pos_b;
    7901           0 :         buf1Stride[1] = Stride;
    7902           0 :         buf2[1] = pos_Full;
    7903           0 :         buf2Stride[1] = FullStride;
    7904           0 :         /*q=j+b*/ buf1[2] = pos_j;
    7905           0 :         buf1Stride[2] = Stride;
    7906           0 :         buf2[2] = pos_b;
    7907           0 :         buf2Stride[2] = Stride;
    7908           0 :         /*f=b+j*/ buf1[3] = pos_b;
    7909           0 :         buf1Stride[3] = Stride;
    7910           0 :         buf2[3] = pos_j + Stride;
    7911           0 :         buf2Stride[3] = Stride;
    7912           0 :         /*p=h+b*/ buf1[4] = pos_h - 1;
    7913           0 :         buf1Stride[4] = Stride;
    7914           0 :         buf2[4] = pos_b;
    7915           0 :         buf2Stride[4] = Stride;
    7916           0 :         /*r=b+h*/ buf1[5] = pos_b;
    7917           0 :         buf1Stride[5] = Stride;
    7918           0 :         buf2[5] = pos_h;
    7919           0 :         buf2Stride[5] = Stride;
    7920           0 :         /*g=b+h*/ buf1[6] = pos_b;
    7921           0 :         buf1Stride[6] = Stride;
    7922           0 :         buf2[6] = pos_h + Stride;
    7923           0 :         buf2Stride[6] = Stride;
    7924           0 :         /*e=h+b*/ buf1[7] = pos_h - 1 + Stride;
    7925           0 :         buf1Stride[7] = Stride;
    7926           0 :         buf2[7] = pos_b;
    7927           0 :         buf2Stride[7] = Stride;
    7928             : 
    7929           0 :         break;
    7930             : 
    7931           0 :     case EB_QUARTER_IN_HALF_VERTICAL:
    7932             : 
    7933           0 :         /*k=j+h*/ buf1[0] = pos_j;
    7934           0 :         buf1Stride[0] = Stride;
    7935           0 :         buf2[0] = pos_h;
    7936           0 :         buf2Stride[0] = Stride;
    7937           0 :         /*i=h+j*/ buf1[1] = pos_h;
    7938           0 :         buf1Stride[1] = Stride;
    7939           0 :         buf2[1] = pos_j + 1;
    7940           0 :         buf2Stride[1] = Stride;
    7941           0 :         /*d=A+h*/ buf1[2] = pos_Full - FullStride;
    7942           0 :         buf1Stride[2] = FullStride;
    7943           0 :         buf2[2] = pos_h;
    7944           0 :         buf2Stride[2] = Stride;
    7945           0 :         /*n=h+A*/ buf1[3] = pos_h;
    7946           0 :         buf1Stride[3] = Stride;
    7947           0 :         buf2[3] = pos_Full;
    7948           0 :         buf2Stride[3] = FullStride;
    7949           0 :         /*g=b+h*/ buf1[4] = pos_b - Stride;
    7950           0 :         buf1Stride[4] = Stride;
    7951           0 :         buf2[4] = pos_h;
    7952           0 :         buf2Stride[4] = Stride;
    7953           0 :         /*e=h+b*/ buf1[5] = pos_h;
    7954           0 :         buf1Stride[5] = Stride;
    7955           0 :         buf2[5] = pos_b + 1 - Stride;
    7956           0 :         buf2Stride[5] = Stride;
    7957           0 :         /*p=h+b*/ buf1[6] = pos_h;
    7958           0 :         buf1Stride[6] = Stride;
    7959           0 :         buf2[6] = pos_b + 1;
    7960           0 :         buf2Stride[6] = Stride;
    7961           0 :         /*r=b+h*/ buf1[7] = pos_b;
    7962           0 :         buf1Stride[7] = Stride;
    7963           0 :         buf2[7] = pos_h;
    7964           0 :         buf2Stride[7] = Stride;
    7965             : 
    7966           0 :         break;
    7967             : 
    7968           0 :     case EB_QUARTER_IN_HALF_DIAGONAL:
    7969             : 
    7970           0 :         /*i=h+j*/ buf1[0] = pos_h - 1;
    7971           0 :         buf1Stride[0] = Stride;
    7972           0 :         buf2[0] = pos_j;
    7973           0 :         buf2Stride[0] = Stride;
    7974           0 :         /*k=j+h*/ buf1[1] = pos_j;
    7975           0 :         buf1Stride[1] = Stride;
    7976           0 :         buf2[1] = pos_h;
    7977           0 :         buf2Stride[1] = Stride;
    7978           0 :         /*f=b+j*/ buf1[2] = pos_b - Stride;
    7979           0 :         buf1Stride[2] = Stride;
    7980           0 :         buf2[2] = pos_j;
    7981           0 :         buf2Stride[2] = Stride;
    7982           0 :         /*q=j+b*/ buf1[3] = pos_j;
    7983           0 :         buf1Stride[3] = Stride;
    7984           0 :         buf2[3] = pos_b;
    7985           0 :         buf2Stride[3] = Stride;
    7986           0 :         /*e=h+b*/ buf1[4] = pos_h - 1;
    7987           0 :         buf1Stride[4] = Stride;
    7988           0 :         buf2[4] = pos_b - Stride;
    7989           0 :         buf2Stride[4] = Stride;
    7990           0 :         /*g=b+h*/ buf1[5] = pos_b - Stride;
    7991           0 :         buf1Stride[5] = Stride;
    7992           0 :         buf2[5] = pos_h;
    7993           0 :         buf2Stride[5] = Stride;
    7994           0 :         /*r=b+h*/ buf1[6] = pos_b;
    7995           0 :         buf1Stride[6] = Stride;
    7996           0 :         buf2[6] = pos_h;
    7997           0 :         buf2Stride[6] = Stride;
    7998           0 :         /*p=h+b*/ buf1[7] = pos_h - 1;
    7999           0 :         buf1Stride[7] = Stride;
    8000           0 :         buf2[7] = pos_b;
    8001           0 :         buf2Stride[7] = Stride;
    8002             : 
    8003           0 :         break;
    8004             : 
    8005           0 :     default: break;
    8006             :     }
    8007             : 
    8008           0 :     return;
    8009             : }
    8010             : 
    8011             : /*******************************************
    8012             :  * QuarterPelSearch_LCU
    8013             :  *   performs Quarter Pel refinement for the 85 PUs
    8014             :  *******************************************/
    8015           0 : static void QuarterPelSearch_LCU(
    8016             :     MeContext
    8017             :         *context_ptr,  //[IN/OUT]  ME context Ptr, used to get/update ME results
    8018             :     uint8_t *pos_Full,    //[IN]
    8019             :     uint32_t FullStride,  //[IN]
    8020             :     uint8_t *pos_b,       //[IN]
    8021             :     uint8_t *pos_h,       //[IN]
    8022             :     uint8_t *pos_j,       //[IN]
    8023             :     int16_t
    8024             :         x_search_area_origin,  //[IN] search area origin in the horizontal
    8025             :                                // direction, used to point to reference samples
    8026             :     int16_t
    8027             :         y_search_area_origin,  //[IN] search area origin in the vertical
    8028             :                                // direction, used to point to reference samples
    8029             :     EbBool disable8x8CuInMeFlag,
    8030             :     EbBool enable_half_pel32x32, EbBool enable_half_pel16x16,
    8031             :     EbBool enable_half_pel8x8,
    8032             :     EbBool enableQuarterPel, EbBool ext_block_flag)
    8033             : {
    8034             :     uint32_t pu_index;
    8035             : 
    8036             :     uint32_t puShiftXIndex;
    8037             :     uint32_t puShiftYIndex;
    8038             : 
    8039             :     uint32_t puLcuBufferIndex;
    8040             : 
    8041             :     // for each one of the 8 positions, we need to determine the 2 buffers to do
    8042             :     // averaging
    8043             :     uint8_t *buf1[8];
    8044             :     uint8_t *buf2[8];
    8045             : 
    8046             :     uint32_t buf1Stride[8];
    8047             :     uint32_t buf2Stride[8];
    8048             : 
    8049             :     int16_t x_mv, y_mv;
    8050             :     uint32_t nidx;
    8051             : 
    8052           0 :     if (context_ptr->fractional_search64x64) {
    8053           0 :         x_mv = _MVXT(*context_ptr->p_best_mv64x64);
    8054           0 :         y_mv = _MVYT(*context_ptr->p_best_mv64x64);
    8055             : 
    8056           0 :         SetQuarterPelRefinementInputsOnTheFly(pos_Full,
    8057             :                                               FullStride,
    8058             :                                               pos_b,
    8059             :                                               pos_h,
    8060             :                                               pos_j,
    8061             :                                               context_ptr->interpolated_stride,
    8062             :                                               x_mv,
    8063             :                                               y_mv,
    8064             :                                               buf1,
    8065             :                                               buf1Stride,
    8066             :                                               buf2,
    8067             :                                               buf2Stride);
    8068             : 
    8069           0 :         buf1[0] = buf1[0];
    8070           0 :         buf2[0] = buf2[0];
    8071           0 :         buf1[1] = buf1[1];
    8072           0 :         buf2[1] = buf2[1];
    8073           0 :         buf1[2] = buf1[2];
    8074           0 :         buf2[2] = buf2[2];
    8075           0 :         buf1[3] = buf1[3];
    8076           0 :         buf2[3] = buf2[3];
    8077           0 :         buf1[4] = buf1[4];
    8078           0 :         buf2[4] = buf2[4];
    8079           0 :         buf1[5] = buf1[5];
    8080           0 :         buf2[5] = buf2[5];
    8081           0 :         buf1[6] = buf1[6];
    8082           0 :         buf2[6] = buf2[6];
    8083           0 :         buf1[7] = buf1[7];
    8084           0 :         buf2[7] = buf2[7];
    8085             : 
    8086           0 :         PU_QuarterPelRefinementOnTheFly(context_ptr,
    8087             :                                         context_ptr->p_best_ssd64x64,
    8088             :                                         0,
    8089             :                                         buf1,
    8090             :                                         buf1Stride,
    8091             :                                         buf2,
    8092             :                                         buf2Stride,
    8093             :                                         64,
    8094             :                                         64,
    8095             :                                         x_search_area_origin,
    8096             :                                         y_search_area_origin,
    8097             :                                         context_ptr->p_best_sad64x64,
    8098             :                                         context_ptr->p_best_mv64x64,
    8099           0 :                                         context_ptr->psub_pel_direction64x64);
    8100             :     }
    8101           0 :     if (enableQuarterPel && enable_half_pel32x32)
    8102             :     {
    8103             :         // 32x32 [4 partitions]
    8104           0 :         for (pu_index = 0; pu_index < 4; ++pu_index) {
    8105           0 :             x_mv = _MVXT(context_ptr->p_best_mv32x32[pu_index]);
    8106           0 :             y_mv = _MVYT(context_ptr->p_best_mv32x32[pu_index]);
    8107             : 
    8108           0 :             SetQuarterPelRefinementInputsOnTheFly(
    8109             :                 pos_Full,
    8110             :                 FullStride,
    8111             :                 pos_b,
    8112             :                 pos_h,
    8113             :                 pos_j,
    8114             :                 context_ptr->interpolated_stride,
    8115             :                 x_mv,
    8116             :                 y_mv,
    8117             :                 buf1,
    8118             :                 buf1Stride,
    8119             :                 buf2,
    8120             :                 buf2Stride);
    8121             : 
    8122           0 :             puShiftXIndex = (pu_index & 0x01) << 5;
    8123           0 :             puShiftYIndex = (pu_index >> 1) << 5;
    8124             : 
    8125           0 :             puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
    8126             : 
    8127           0 :             buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
    8128           0 :             buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
    8129           0 :             buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
    8130           0 :             buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
    8131           0 :             buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
    8132           0 :             buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
    8133           0 :             buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
    8134           0 :             buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
    8135           0 :             buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
    8136           0 :             buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
    8137           0 :             buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
    8138           0 :             buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
    8139           0 :             buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
    8140           0 :             buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
    8141           0 :             buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
    8142           0 :             buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
    8143             : 
    8144           0 :             PU_QuarterPelRefinementOnTheFly(
    8145             :                 context_ptr,
    8146           0 :                 &context_ptr->p_best_ssd32x32[pu_index],
    8147             :                 puLcuBufferIndex,
    8148             :                 buf1,
    8149             :                 buf1Stride,
    8150             :                 buf2,
    8151             :                 buf2Stride,
    8152             :                 32,
    8153             :                 32,
    8154             :                 x_search_area_origin,
    8155             :                 y_search_area_origin,
    8156           0 :                 &context_ptr->p_best_sad32x32[pu_index],
    8157           0 :                 &context_ptr->p_best_mv32x32[pu_index],
    8158           0 :                 context_ptr->psub_pel_direction32x32[pu_index]);
    8159             :         }
    8160             :     }
    8161             : 
    8162           0 :     if (enableQuarterPel && enable_half_pel16x16)
    8163             :     {
    8164             :         // 16x16 [16 partitions]
    8165           0 :         for (pu_index = 0; pu_index < 16; ++pu_index) {
    8166           0 :             nidx = tab16x16[pu_index];
    8167             : 
    8168           0 :             x_mv = _MVXT(context_ptr->p_best_mv16x16[nidx]);
    8169           0 :             y_mv = _MVYT(context_ptr->p_best_mv16x16[nidx]);
    8170             : 
    8171           0 :             SetQuarterPelRefinementInputsOnTheFly(
    8172             :                 pos_Full,
    8173             :                 FullStride,
    8174             :                 pos_b,
    8175             :                 pos_h,
    8176             :                 pos_j,
    8177             :                 context_ptr->interpolated_stride,
    8178             :                 x_mv,
    8179             :                 y_mv,
    8180             :                 buf1,
    8181             :                 buf1Stride,
    8182             :                 buf2,
    8183             :                 buf2Stride);
    8184             : 
    8185           0 :             puShiftXIndex = (pu_index & 0x03) << 4;
    8186           0 :             puShiftYIndex = (pu_index >> 2) << 4;
    8187             : 
    8188           0 :             puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
    8189             : 
    8190           0 :             buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
    8191           0 :             buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
    8192           0 :             buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
    8193           0 :             buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
    8194           0 :             buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
    8195           0 :             buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
    8196           0 :             buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
    8197           0 :             buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
    8198           0 :             buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
    8199           0 :             buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
    8200           0 :             buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
    8201           0 :             buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
    8202           0 :             buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
    8203           0 :             buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
    8204           0 :             buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
    8205           0 :             buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
    8206             : 
    8207           0 :             PU_QuarterPelRefinementOnTheFly(
    8208             :                 context_ptr,
    8209           0 :                 &context_ptr->p_best_ssd16x16[nidx],
    8210             :                 puLcuBufferIndex,
    8211             :                 buf1,
    8212             :                 buf1Stride,
    8213             :                 buf2,
    8214             :                 buf2Stride,
    8215             :                 16,
    8216             :                 16,
    8217             :                 x_search_area_origin,
    8218             :                 y_search_area_origin,
    8219           0 :                 &context_ptr->p_best_sad16x16[nidx],
    8220           0 :                 &context_ptr->p_best_mv16x16[nidx],
    8221           0 :                 context_ptr->psub_pel_direction16x16[nidx]);
    8222             :         }
    8223             :     }
    8224             : 
    8225           0 :     if (enableQuarterPel && enable_half_pel8x8)
    8226             :     {
    8227             :         // 8x8   [64 partitions]
    8228           0 :         if (!disable8x8CuInMeFlag) {
    8229           0 :             for (pu_index = 0; pu_index < 64; ++pu_index) {
    8230           0 :                 nidx = tab8x8[pu_index];
    8231             : 
    8232           0 :                 x_mv = _MVXT(context_ptr->p_best_mv8x8[nidx]);
    8233           0 :                 y_mv = _MVYT(context_ptr->p_best_mv8x8[nidx]);
    8234             : 
    8235           0 :                 SetQuarterPelRefinementInputsOnTheFly(
    8236             :                     pos_Full,
    8237             :                     FullStride,
    8238             :                     pos_b,
    8239             :                     pos_h,
    8240             :                     pos_j,
    8241             :                     context_ptr->interpolated_stride,
    8242             :                     x_mv,
    8243             :                     y_mv,
    8244             :                     buf1,
    8245             :                     buf1Stride,
    8246             :                     buf2,
    8247             :                     buf2Stride);
    8248             : 
    8249           0 :                 puShiftXIndex = (pu_index & 0x07) << 3;
    8250           0 :                 puShiftYIndex = (pu_index >> 3) << 3;
    8251             : 
    8252           0 :                 puLcuBufferIndex =
    8253           0 :                     puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
    8254             : 
    8255           0 :                 buf1[0] =
    8256           0 :                     buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
    8257           0 :                 buf2[0] =
    8258           0 :                     buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
    8259           0 :                 buf1[1] =
    8260           0 :                     buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
    8261           0 :                 buf2[1] =
    8262           0 :                     buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
    8263           0 :                 buf1[2] =
    8264           0 :                     buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
    8265           0 :                 buf2[2] =
    8266           0 :                     buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
    8267           0 :                 buf1[3] =
    8268           0 :                     buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
    8269           0 :                 buf2[3] =
    8270           0 :                     buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
    8271           0 :                 buf1[4] =
    8272           0 :                     buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
    8273           0 :                 buf2[4] =
    8274           0 :                     buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
    8275           0 :                 buf1[5] =
    8276           0 :                     buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
    8277           0 :                 buf2[5] =
    8278           0 :                     buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
    8279           0 :                 buf1[6] =
    8280           0 :                     buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
    8281           0 :                 buf2[6] =
    8282           0 :                     buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
    8283           0 :                 buf1[7] =
    8284           0 :                     buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
    8285           0 :                 buf2[7] =
    8286           0 :                     buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
    8287             : 
    8288           0 :                 PU_QuarterPelRefinementOnTheFly(
    8289             :                     context_ptr,
    8290           0 :                     &context_ptr->p_best_ssd8x8[nidx],
    8291             :                     puLcuBufferIndex,
    8292             :                     buf1,
    8293             :                     buf1Stride,
    8294             :                     buf2,
    8295             :                     buf2Stride,
    8296             :                     8,
    8297             :                     8,
    8298             :                     x_search_area_origin,
    8299             :                     y_search_area_origin,
    8300           0 :                     &context_ptr->p_best_sad8x8[nidx],
    8301           0 :                     &context_ptr->p_best_mv8x8[nidx],
    8302           0 :                     context_ptr->psub_pel_direction8x8[nidx]);
    8303             :             }
    8304             :         }
    8305             :     }
    8306             : 
    8307           0 :     if (ext_block_flag) {
    8308             :         // 64x32
    8309           0 :         for (pu_index = 0; pu_index < 2; ++pu_index) {
    8310           0 :             puShiftXIndex = 0;
    8311           0 :             puShiftYIndex = pu_index << 5;
    8312             : 
    8313           0 :             x_mv = _MVXT(context_ptr->p_best_mv64x32[pu_index]);
    8314           0 :             y_mv = _MVYT(context_ptr->p_best_mv64x32[pu_index]);
    8315             : 
    8316           0 :             SetQuarterPelRefinementInputsOnTheFly(
    8317             :                 pos_Full,
    8318             :                 FullStride,
    8319             :                 pos_b,
    8320             :                 pos_h,
    8321             :                 pos_j,
    8322             :                 context_ptr->interpolated_stride,
    8323             :                 x_mv,
    8324             :                 y_mv,
    8325             :                 buf1,
    8326             :                 buf1Stride,
    8327             :                 buf2,
    8328             :                 buf2Stride);
    8329             : 
    8330           0 :             puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
    8331             : 
    8332           0 :             buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
    8333           0 :             buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
    8334           0 :             buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
    8335           0 :             buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
    8336           0 :             buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
    8337           0 :             buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
    8338           0 :             buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
    8339           0 :             buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
    8340           0 :             buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
    8341           0 :             buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
    8342           0 :             buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
    8343           0 :             buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
    8344           0 :             buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
    8345           0 :             buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
    8346           0 :             buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
    8347           0 :             buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
    8348             : 
    8349           0 :             PU_QuarterPelRefinementOnTheFly(
    8350             :                 context_ptr,
    8351           0 :                 &context_ptr->p_best_ssd64x32[pu_index],
    8352             :                 puLcuBufferIndex,
    8353             :                 buf1,
    8354             :                 buf1Stride,
    8355             :                 buf2,
    8356             :                 buf2Stride,
    8357             :                 64,
    8358             :                 32,
    8359             :                 x_search_area_origin,
    8360             :                 y_search_area_origin,
    8361           0 :                 &context_ptr->p_best_sad64x32[pu_index],
    8362           0 :                 &context_ptr->p_best_mv64x32[pu_index],
    8363           0 :                 context_ptr->psub_pel_direction64x32[pu_index]);
    8364             :         }
    8365             : 
    8366             :         // 32x16
    8367           0 :         for (pu_index = 0; pu_index < 8; ++pu_index) {
    8368           0 :             nidx = tab32x16[pu_index];  // TODO bitwise this
    8369             : 
    8370           0 :             puShiftXIndex = (pu_index & 0x01) << 5;
    8371           0 :             puShiftYIndex = (pu_index >> 1) << 4;
    8372             : 
    8373           0 :             x_mv = _MVXT(context_ptr->p_best_mv32x16[nidx]);
    8374           0 :             y_mv = _MVYT(context_ptr->p_best_mv32x16[nidx]);
    8375             : 
    8376           0 :             SetQuarterPelRefinementInputsOnTheFly(
    8377             :                 pos_Full,
    8378             :                 FullStride,
    8379             :                 pos_b,
    8380             :                 pos_h,
    8381             :                 pos_j,
    8382             :                 context_ptr->interpolated_stride,
    8383             :                 x_mv,
    8384             :                 y_mv,
    8385             :                 buf1,
    8386             :                 buf1Stride,
    8387             :                 buf2,
    8388             :                 buf2Stride);
    8389             : 
    8390           0 :             puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
    8391             : 
    8392           0 :             buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
    8393           0 :             buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
    8394           0 :             buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
    8395           0 :             buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
    8396           0 :             buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
    8397           0 :             buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
    8398           0 :             buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
    8399           0 :             buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
    8400           0 :             buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
    8401           0 :             buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
    8402           0 :             buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
    8403           0 :             buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
    8404           0 :             buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
    8405           0 :             buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
    8406           0 :             buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
    8407           0 :             buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
    8408             : 
    8409           0 :             PU_QuarterPelRefinementOnTheFly(
    8410             :                 context_ptr,
    8411           0 :                 &context_ptr->p_best_ssd32x16[nidx],
    8412             :                 puLcuBufferIndex,
    8413             :                 buf1,
    8414             :                 buf1Stride,
    8415             :                 buf2,
    8416             :                 buf2Stride,
    8417             :                 32,
    8418             :                 16,
    8419             :                 x_search_area_origin,
    8420             :                 y_search_area_origin,
    8421           0 :                 &context_ptr->p_best_sad32x16[nidx],
    8422           0 :                 &context_ptr->p_best_mv32x16[nidx],
    8423           0 :                 context_ptr->psub_pel_direction32x16[nidx]);
    8424             :         }
    8425             : 
    8426             :         // 16x8
    8427           0 :         for (pu_index = 0; pu_index < 32; ++pu_index) {
    8428           0 :             nidx = tab16x8[pu_index];
    8429             : 
    8430           0 :             puShiftXIndex = (pu_index & 0x03) << 4;
    8431           0 :             puShiftYIndex = (pu_index >> 2) << 3;
    8432             : 
    8433           0 :             x_mv = _MVXT(context_ptr->p_best_mv16x8[nidx]);
    8434           0 :             y_mv = _MVYT(context_ptr->p_best_mv16x8[nidx]);
    8435             : 
    8436           0 :             SetQuarterPelRefinementInputsOnTheFly(
    8437             :                 pos_Full,
    8438             :                 FullStride,
    8439             :                 pos_b,
    8440             :                 pos_h,
    8441             :                 pos_j,
    8442             :                 context_ptr->interpolated_stride,
    8443             :                 x_mv,
    8444             :                 y_mv,
    8445             :                 buf1,
    8446             :                 buf1Stride,
    8447             :                 buf2,
    8448             :                 buf2Stride);
    8449             : 
    8450           0 :             puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
    8451             : 
    8452           0 :             buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
    8453           0 :             buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
    8454           0 :             buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
    8455           0 :             buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
    8456           0 :             buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
    8457           0 :             buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
    8458           0 :             buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
    8459           0 :             buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
    8460           0 :             buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
    8461           0 :             buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
    8462           0 :             buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
    8463           0 :             buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
    8464           0 :             buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
    8465           0 :             buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
    8466           0 :             buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
    8467           0 :             buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
    8468             : 
    8469           0 :             PU_QuarterPelRefinementOnTheFly(
    8470             :                 context_ptr,
    8471           0 :                 &context_ptr->p_best_ssd16x8[nidx],
    8472             :                 puLcuBufferIndex,
    8473             :                 buf1,
    8474             :                 buf1Stride,
    8475             :                 buf2,
    8476             :                 buf2Stride,
    8477             :                 16,
    8478             :                 8,
    8479             :                 x_search_area_origin,
    8480             :                 y_search_area_origin,
    8481           0 :                 &context_ptr->p_best_sad16x8[nidx],
    8482           0 :                 &context_ptr->p_best_mv16x8[nidx],
    8483           0 :                 context_ptr->psub_pel_direction16x8[nidx]);
    8484             :         }
    8485             : 
    8486             :         // 32x64
    8487           0 :         for (pu_index = 0; pu_index < 2; ++pu_index) {
    8488           0 :             puShiftXIndex = pu_index << 5;
    8489           0 :             puShiftYIndex = 0;
    8490             : 
    8491           0 :             x_mv = _MVXT(context_ptr->p_best_mv32x64[pu_index]);
    8492           0 :             y_mv = _MVYT(context_ptr->p_best_mv32x64[pu_index]);
    8493             : 
    8494           0 :             SetQuarterPelRefinementInputsOnTheFly(
    8495             :                 pos_Full,
    8496             :                 FullStride,
    8497             :                 pos_b,
    8498             :                 pos_h,
    8499             :                 pos_j,
    8500             :                 context_ptr->interpolated_stride,
    8501             :                 x_mv,
    8502             :                 y_mv,
    8503             :                 buf1,
    8504             :                 buf1Stride,
    8505             :                 buf2,
    8506             :                 buf2Stride);
    8507             : 
    8508           0 :             puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
    8509             : 
    8510           0 :             buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
    8511           0 :             buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
    8512           0 :             buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
    8513           0 :             buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
    8514           0 :             buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
    8515           0 :             buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
    8516           0 :             buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
    8517           0 :             buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
    8518           0 :             buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
    8519           0 :             buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
    8520           0 :             buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
    8521           0 :             buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
    8522           0 :             buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
    8523           0 :             buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
    8524           0 :             buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
    8525           0 :             buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
    8526             : 
    8527           0 :             PU_QuarterPelRefinementOnTheFly(
    8528             :                 context_ptr,
    8529           0 :                 &context_ptr->p_best_ssd32x64[pu_index],
    8530             :                 puLcuBufferIndex,
    8531             :                 buf1,
    8532             :                 buf1Stride,
    8533             :                 buf2,
    8534             :                 buf2Stride,
    8535             :                 32,
    8536             :                 64,
    8537             :                 x_search_area_origin,
    8538             :                 y_search_area_origin,
    8539           0 :                 &context_ptr->p_best_sad32x64[pu_index],
    8540           0 :                 &context_ptr->p_best_mv32x64[pu_index],
    8541           0 :                 context_ptr->psub_pel_direction32x64[pu_index]);
    8542             :         }
    8543             : 
    8544             :         // 16x32
    8545           0 :         for (pu_index = 0; pu_index < 8; ++pu_index) {
    8546           0 :             nidx = tab16x32[pu_index];
    8547             : 
    8548           0 :             puShiftXIndex = (pu_index & 0x03) << 4;
    8549           0 :             puShiftYIndex = (pu_index >> 2) << 5;
    8550             : 
    8551           0 :             x_mv = _MVXT(context_ptr->p_best_mv16x32[nidx]);
    8552           0 :             y_mv = _MVYT(context_ptr->p_best_mv16x32[nidx]);
    8553             : 
    8554           0 :             SetQuarterPelRefinementInputsOnTheFly(
    8555             :                 pos_Full,
    8556             :                 FullStride,
    8557             :                 pos_b,
    8558             :                 pos_h,
    8559             :                 pos_j,
    8560             :                 context_ptr->interpolated_stride,
    8561             :                 x_mv,
    8562             :                 y_mv,
    8563             :                 buf1,
    8564             :                 buf1Stride,
    8565             :                 buf2,
    8566             :                 buf2Stride);
    8567             : 
    8568           0 :             puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
    8569             : 
    8570           0 :             buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
    8571           0 :             buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
    8572           0 :             buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
    8573           0 :             buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
    8574           0 :             buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
    8575           0 :             buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
    8576           0 :             buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
    8577           0 :             buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
    8578           0 :             buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
    8579           0 :             buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
    8580           0 :             buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
    8581           0 :             buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
    8582           0 :             buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
    8583           0 :             buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
    8584           0 :             buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
    8585           0 :             buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
    8586             : 
    8587           0 :             PU_QuarterPelRefinementOnTheFly(
    8588             :                 context_ptr,
    8589           0 :                 &context_ptr->p_best_ssd16x32[nidx],
    8590             :                 puLcuBufferIndex,
    8591             :                 buf1,
    8592             :                 buf1Stride,
    8593             :                 buf2,
    8594             :                 buf2Stride,
    8595             :                 16,
    8596             :                 32,
    8597             :                 x_search_area_origin,
    8598             :                 y_search_area_origin,
    8599           0 :                 &context_ptr->p_best_sad16x32[nidx],
    8600           0 :                 &context_ptr->p_best_mv16x32[nidx],
    8601           0 :                 context_ptr->psub_pel_direction16x32[nidx]);
    8602             :         }
    8603             : 
    8604             :         // 8x16
    8605           0 :         for (pu_index = 0; pu_index < 32; ++pu_index) {
    8606           0 :             nidx = tab8x16[pu_index];
    8607             : 
    8608           0 :             puShiftXIndex = (pu_index & 0x07) << 3;
    8609           0 :             puShiftYIndex = (pu_index >> 3) << 4;
    8610             : 
    8611           0 :             x_mv = _MVXT(context_ptr->p_best_mv8x16[nidx]);
    8612           0 :             y_mv = _MVYT(context_ptr->p_best_mv8x16[nidx]);
    8613             : 
    8614           0 :             SetQuarterPelRefinementInputsOnTheFly(
    8615             :                 pos_Full,
    8616             :                 FullStride,
    8617             :                 pos_b,
    8618             :                 pos_h,
    8619             :                 pos_j,
    8620             :                 context_ptr->interpolated_stride,
    8621             :                 x_mv,
    8622             :                 y_mv,
    8623             :                 buf1,
    8624             :                 buf1Stride,
    8625             :                 buf2,
    8626             :                 buf2Stride);
    8627             : 
    8628           0 :             puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
    8629             : 
    8630           0 :             buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
    8631           0 :             buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
    8632           0 :             buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
    8633           0 :             buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
    8634           0 :             buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
    8635           0 :             buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
    8636           0 :             buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
    8637           0 :             buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
    8638           0 :             buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
    8639           0 :             buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
    8640           0 :             buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
    8641           0 :             buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
    8642           0 :             buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
    8643           0 :             buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
    8644           0 :             buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
    8645           0 :             buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
    8646             : 
    8647           0 :             PU_QuarterPelRefinementOnTheFly(
    8648             :                 context_ptr,
    8649           0 :                 &context_ptr->p_best_ssd8x16[nidx],
    8650             :                 puLcuBufferIndex,
    8651             :                 buf1,
    8652             :                 buf1Stride,
    8653             :                 buf2,
    8654             :                 buf2Stride,
    8655             :                 8,
    8656             :                 16,
    8657             :                 x_search_area_origin,
    8658             :                 y_search_area_origin,
    8659           0 :                 &context_ptr->p_best_sad8x16[nidx],
    8660           0 :                 &context_ptr->p_best_mv8x16[nidx],
    8661           0 :                 context_ptr->psub_pel_direction8x16[nidx]);
    8662             :         }
    8663             : 
    8664             :         // 32x8
    8665           0 :         for (pu_index = 0; pu_index < 16; ++pu_index) {
    8666           0 :             nidx = tab32x8[pu_index];
    8667             : 
    8668           0 :             puShiftXIndex = (pu_index & 0x01) << 5;
    8669           0 :             puShiftYIndex = (pu_index >> 1) << 3;
    8670             : 
    8671           0 :             x_mv = _MVXT(context_ptr->p_best_mv32x8[nidx]);
    8672           0 :             y_mv = _MVYT(context_ptr->p_best_mv32x8[nidx]);
    8673             : 
    8674           0 :             SetQuarterPelRefinementInputsOnTheFly(
    8675             :                 pos_Full,
    8676             :                 FullStride,
    8677             :                 pos_b,
    8678             :                 pos_h,
    8679             :                 pos_j,
    8680             :                 context_ptr->interpolated_stride,
    8681             :                 x_mv,
    8682             :                 y_mv,
    8683             :                 buf1,
    8684             :                 buf1Stride,
    8685             :                 buf2,
    8686             :                 buf2Stride);
    8687             : 
    8688           0 :             puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
    8689             : 
    8690           0 :             buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
    8691           0 :             buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
    8692           0 :             buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
    8693           0 :             buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
    8694           0 :             buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
    8695           0 :             buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
    8696           0 :             buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
    8697           0 :             buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
    8698           0 :             buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
    8699           0 :             buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
    8700           0 :             buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
    8701           0 :             buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
    8702           0 :             buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
    8703           0 :             buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
    8704           0 :             buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
    8705           0 :             buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
    8706             : 
    8707           0 :             PU_QuarterPelRefinementOnTheFly(
    8708             :                 context_ptr,
    8709           0 :                 &context_ptr->p_best_ssd32x8[nidx],
    8710             :                 puLcuBufferIndex,
    8711             :                 buf1,
    8712             :                 buf1Stride,
    8713             :                 buf2,
    8714             :                 buf2Stride,
    8715             :                 32,
    8716             :                 8,
    8717             :                 x_search_area_origin,
    8718             :                 y_search_area_origin,
    8719           0 :                 &context_ptr->p_best_sad32x8[nidx],
    8720           0 :                 &context_ptr->p_best_mv32x8[nidx],
    8721           0 :                 context_ptr->psub_pel_direction32x8[nidx]);
    8722             :         }
    8723             : 
    8724             :         // 8x32
    8725           0 :         for (pu_index = 0; pu_index < 16; ++pu_index) {
    8726           0 :             nidx = tab8x32[pu_index];
    8727             : 
    8728           0 :             puShiftXIndex = (pu_index & 0x07) << 3;
    8729           0 :             puShiftYIndex = (pu_index >> 3) << 5;
    8730             : 
    8731           0 :             x_mv = _MVXT(context_ptr->p_best_mv8x32[nidx]);
    8732           0 :             y_mv = _MVYT(context_ptr->p_best_mv8x32[nidx]);
    8733             : 
    8734           0 :             SetQuarterPelRefinementInputsOnTheFly(
    8735             :                 pos_Full,
    8736             :                 FullStride,
    8737             :                 pos_b,
    8738             :                 pos_h,
    8739             :                 pos_j,
    8740             :                 context_ptr->interpolated_stride,
    8741             :                 x_mv,
    8742             :                 y_mv,
    8743             :                 buf1,
    8744             :                 buf1Stride,
    8745             :                 buf2,
    8746             :                 buf2Stride);
    8747             : 
    8748           0 :             puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
    8749             : 
    8750           0 :             buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
    8751           0 :             buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
    8752           0 :             buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
    8753           0 :             buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
    8754           0 :             buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
    8755           0 :             buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
    8756           0 :             buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
    8757           0 :             buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
    8758           0 :             buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
    8759           0 :             buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
    8760           0 :             buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
    8761           0 :             buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
    8762           0 :             buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
    8763           0 :             buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
    8764           0 :             buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
    8765           0 :             buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
    8766             : 
    8767           0 :             PU_QuarterPelRefinementOnTheFly(
    8768             :                 context_ptr,
    8769           0 :                 &context_ptr->p_best_ssd8x32[nidx],
    8770             :                 puLcuBufferIndex,
    8771             :                 buf1,
    8772             :                 buf1Stride,
    8773             :                 buf2,
    8774             :                 buf2Stride,
    8775             :                 8,
    8776             :                 32,
    8777             :                 x_search_area_origin,
    8778             :                 y_search_area_origin,
    8779           0 :                 &context_ptr->p_best_sad8x32[nidx],
    8780           0 :                 &context_ptr->p_best_mv8x32[nidx],
    8781           0 :                 context_ptr->psub_pel_direction8x32[nidx]);
    8782             :         }
    8783             : 
    8784             :         // 64x16
    8785           0 :         for (pu_index = 0; pu_index < 4; ++pu_index) {
    8786           0 :             nidx = pu_index;
    8787             : 
    8788           0 :             puShiftXIndex = 0;
    8789           0 :             puShiftYIndex = pu_index << 4;
    8790             : 
    8791           0 :             x_mv = _MVXT(context_ptr->p_best_mv64x16[nidx]);
    8792           0 :             y_mv = _MVYT(context_ptr->p_best_mv64x16[nidx]);
    8793             : 
    8794           0 :             SetQuarterPelRefinementInputsOnTheFly(
    8795             :                 pos_Full,
    8796             :                 FullStride,
    8797             :                 pos_b,
    8798             :                 pos_h,
    8799             :                 pos_j,
    8800             :                 context_ptr->interpolated_stride,
    8801             :                 x_mv,
    8802             :                 y_mv,
    8803             :                 buf1,
    8804             :                 buf1Stride,
    8805             :                 buf2,
    8806             :                 buf2Stride);
    8807             : 
    8808           0 :             puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
    8809             : 
    8810           0 :             buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
    8811           0 :             buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
    8812           0 :             buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
    8813           0 :             buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
    8814           0 :             buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
    8815           0 :             buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
    8816           0 :             buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
    8817           0 :             buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
    8818           0 :             buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
    8819           0 :             buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
    8820           0 :             buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
    8821           0 :             buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
    8822           0 :             buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
    8823           0 :             buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
    8824           0 :             buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
    8825           0 :             buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
    8826             : 
    8827           0 :             PU_QuarterPelRefinementOnTheFly(
    8828             :                 context_ptr,
    8829           0 :                 &context_ptr->p_best_ssd64x16[nidx],
    8830             :                 puLcuBufferIndex,
    8831             :                 buf1,
    8832             :                 buf1Stride,
    8833             :                 buf2,
    8834             :                 buf2Stride,
    8835             :                 64,
    8836             :                 16,
    8837             :                 x_search_area_origin,
    8838             :                 y_search_area_origin,
    8839           0 :                 &context_ptr->p_best_sad64x16[nidx],
    8840           0 :                 &context_ptr->p_best_mv64x16[nidx],
    8841           0 :                 context_ptr->psub_pel_direction64x16[nidx]);
    8842             :         }
    8843             : 
    8844             :         // 16x64
    8845           0 :         for (pu_index = 0; pu_index < 4; ++pu_index) {
    8846           0 :             nidx = pu_index;
    8847             : 
    8848           0 :             puShiftXIndex = pu_index << 4;
    8849           0 :             puShiftYIndex = 0;
    8850             : 
    8851           0 :             x_mv = _MVXT(context_ptr->p_best_mv16x64[nidx]);
    8852           0 :             y_mv = _MVYT(context_ptr->p_best_mv16x64[nidx]);
    8853             : 
    8854           0 :             SetQuarterPelRefinementInputsOnTheFly(
    8855             :                 pos_Full,
    8856             :                 FullStride,
    8857             :                 pos_b,
    8858             :                 pos_h,
    8859             :                 pos_j,
    8860             :                 context_ptr->interpolated_stride,
    8861             :                 x_mv,
    8862             :                 y_mv,
    8863             :                 buf1,
    8864             :                 buf1Stride,
    8865             :                 buf2,
    8866             :                 buf2Stride);
    8867             : 
    8868           0 :             puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
    8869             : 
    8870           0 :             buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
    8871           0 :             buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
    8872           0 :             buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
    8873           0 :             buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
    8874           0 :             buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
    8875           0 :             buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
    8876           0 :             buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
    8877           0 :             buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
    8878           0 :             buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
    8879           0 :             buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
    8880           0 :             buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
    8881           0 :             buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
    8882           0 :             buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
    8883           0 :             buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
    8884           0 :             buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
    8885           0 :             buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
    8886             : 
    8887           0 :             PU_QuarterPelRefinementOnTheFly(
    8888             :                 context_ptr,
    8889           0 :                 &context_ptr->p_best_ssd16x64[nidx],
    8890             :                 puLcuBufferIndex,
    8891             :                 buf1,
    8892             :                 buf1Stride,
    8893             :                 buf2,
    8894             :                 buf2Stride,
    8895             :                 16,
    8896             :                 64,
    8897             :                 x_search_area_origin,
    8898             :                 y_search_area_origin,
    8899           0 :                 &context_ptr->p_best_sad16x64[nidx],
    8900           0 :                 &context_ptr->p_best_mv16x64[nidx],
    8901           0 :                 context_ptr->psub_pel_direction16x64[nidx]);
    8902             :         }
    8903             :     }
    8904             : 
    8905           0 :     return;
    8906             : }
    8907             : #define QP_REF_OPT 1
    8908             : /*******************************************
    8909             :  * quarter_pel_refinemnet_block
    8910             :  *   performs Quarter Pel refinement for each block
    8911             :  *******************************************/
    8912           0 : static void quarter_pel_refinemnet_block(
    8913             :     MeContext *context_ptr,  // [IN] ME context Ptr, used to get SB Ptr
    8914             :     uint32_t *p_best_ssd,
    8915             :     uint32_t
    8916             :         src_block_index,  // [IN] PU origin, used to point to source samples
    8917             :     uint8_t **buf1,       // [IN]
    8918             :     uint32_t *buf1_stride,
    8919             :     uint8_t **buf2,  // [IN]
    8920             :     uint32_t *buf2_stride,
    8921             :     uint32_t pu_width,   // [IN]  PU width
    8922             :     uint32_t pu_height,  // [IN]  PU height
    8923             :     int16_t
    8924             :         x_search_area_origin,  // [IN] search area origin in the horizontal
    8925             :                                // direction, used to point to reference samples
    8926             :     int16_t
    8927             :         y_search_area_origin,  // [IN] search area origin in the vertical
    8928             :                                // direction, used to point to reference samples
    8929             :     uint32_t candidate_mv, uint32_t *p_best_sad,
    8930             :     uint32_t *p_best_mv, uint16_t is_frac_candidate) {
    8931           0 :     int16_t x_mv = _MVXT(candidate_mv);
    8932           0 :     int16_t y_mv = _MVYT(candidate_mv);
    8933           0 :     int16_t search_Index_x = ((x_mv + 2) >> 2) - x_search_area_origin;
    8934           0 :     int16_t search_Index_y = ((y_mv + 2) >> 2) - y_search_area_origin;
    8935             :     uint64_t dist;
    8936             :     int16_t quarter_mv_x[8];
    8937             :     int16_t quarter_mv_y[8];
    8938           0 :     int32_t search_region_Index1 = 0;
    8939           0 :     int32_t search_region_Index2 = 0;
    8940           0 :     quarter_mv_x[0] = x_mv - 1;  // L  position
    8941           0 :     quarter_mv_x[1] = x_mv + 1;  // R  position
    8942           0 :     quarter_mv_x[2] = x_mv;      // T  position
    8943           0 :     quarter_mv_x[3] = x_mv;      // B  position
    8944           0 :     quarter_mv_x[4] = x_mv - 1;  // TL position
    8945           0 :     quarter_mv_x[5] = x_mv + 1;  // TR position
    8946           0 :     quarter_mv_x[6] = x_mv + 1;  // BR position
    8947           0 :     quarter_mv_x[7] = x_mv - 1;  // BL position
    8948           0 :     quarter_mv_y[0] = y_mv;      // L  position
    8949           0 :     quarter_mv_y[1] = y_mv;      // R  position
    8950           0 :     quarter_mv_y[2] = y_mv - 1;  // T  position
    8951           0 :     quarter_mv_y[3] = y_mv + 1;  // B  position
    8952           0 :     quarter_mv_y[4] = y_mv - 1;  // TL position
    8953           0 :     quarter_mv_y[5] = y_mv - 1;  // TR position
    8954           0 :     quarter_mv_y[6] = y_mv + 1;  // BR position
    8955           0 :     quarter_mv_y[7] = y_mv + 1;  // BL position
    8956             :     // L position
    8957           0 :     search_region_Index1 = (int32_t)search_Index_x +
    8958           0 :                            (int32_t)buf1_stride[0] * (int32_t)search_Index_y;
    8959           0 :     search_region_Index2 = (int32_t)search_Index_x +
    8960           0 :                            (int32_t)buf2_stride[0] * (int32_t)search_Index_y;
    8961           0 :     dist = (context_ptr->fractional_search_method == SSD_SEARCH)
    8962           0 :                ? combined_averaging_ssd(
    8963           0 :                      &(context_ptr->sb_buffer[src_block_index]),
    8964             :                      BLOCK_SIZE_64,
    8965           0 :                      buf1[0] + search_region_Index1,
    8966           0 :                      buf1_stride[0],
    8967           0 :                      buf2[0] + search_region_Index2,
    8968           0 :                      buf2_stride[0],
    8969             :                      pu_height,
    8970             :                      pu_width)
    8971           0 :                : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    8972           0 :                      ? (nxm_sad_avg_kernel(
    8973           0 :                                 &(context_ptr->sb_buffer[src_block_index]),
    8974             :                                 BLOCK_SIZE_64 << 1,
    8975           0 :                                 buf1[0] + search_region_Index1,
    8976           0 :                                 buf1_stride[0] << 1,
    8977           0 :                                 buf2[0] + search_region_Index2,
    8978           0 :                                 buf2_stride[0] << 1,
    8979             :                                 pu_height >> 1,
    8980             :                                 pu_width))
    8981           0 :                            << 1
    8982           0 :                      : nxm_sad_avg_kernel(
    8983           0 :                                &(context_ptr->sb_buffer[src_block_index]),
    8984             :                                BLOCK_SIZE_64,
    8985           0 :                                buf1[0] + search_region_Index1,
    8986             :                                buf1_stride[0],
    8987           0 :                                buf2[0] + search_region_Index2,
    8988             :                                buf2_stride[0],
    8989             :                                pu_height,
    8990             :                                pu_width);
    8991           0 :     if (context_ptr->fractional_search_method == SSD_SEARCH) {
    8992           0 :         if (dist < *p_best_ssd) {
    8993           0 :             *p_best_sad = (uint32_t)
    8994           0 :                 nxm_sad_avg_kernel(
    8995           0 :                     &(context_ptr->sb_buffer[src_block_index]),
    8996             :                     BLOCK_SIZE_64,
    8997           0 :                     buf1[0] + search_region_Index1,
    8998             :                     buf1_stride[0],
    8999           0 :                     buf2[0] + search_region_Index2,
    9000             :                     buf2_stride[0],
    9001             :                     pu_height,
    9002             :                     pu_width);
    9003           0 :             *p_best_mv =
    9004           0 :                 ((uint16_t)quarter_mv_y[0] << 16) | ((uint16_t)quarter_mv_x[0]);
    9005           0 :             *p_best_ssd = (uint32_t)dist;
    9006             :         }
    9007             :     } else {
    9008           0 :         if (dist < *p_best_sad) {
    9009           0 :             *p_best_sad = (uint32_t)dist;
    9010           0 :             *p_best_mv =
    9011           0 :                 ((uint16_t)quarter_mv_y[0] << 16) | ((uint16_t)quarter_mv_x[0]);
    9012             :         }
    9013             :     }
    9014             :     // R positions
    9015           0 :     search_region_Index1 = (int32_t)search_Index_x +
    9016           0 :                            (int32_t)buf1_stride[1] * (int32_t)search_Index_y;
    9017           0 :     search_region_Index2 = (int32_t)search_Index_x +
    9018           0 :                            (int32_t)buf2_stride[1] * (int32_t)search_Index_y;
    9019           0 :     dist = (context_ptr->fractional_search_method == SSD_SEARCH)
    9020           0 :                ? combined_averaging_ssd(
    9021           0 :                      &(context_ptr->sb_buffer[src_block_index]),
    9022             :                      BLOCK_SIZE_64,
    9023           0 :                      buf1[1] + search_region_Index1,
    9024           0 :                      buf1_stride[1],
    9025           0 :                      buf2[1] + search_region_Index2,
    9026           0 :                      buf2_stride[1],
    9027             :                      pu_height,
    9028             :                      pu_width)
    9029           0 :                : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    9030           0 :                      ? (nxm_sad_avg_kernel(
    9031           0 :                                 &(context_ptr->sb_buffer[src_block_index]),
    9032             :                                 BLOCK_SIZE_64 << 1,
    9033           0 :                                 buf1[1] + search_region_Index1,
    9034           0 :                                 buf1_stride[1] << 1,
    9035           0 :                                 buf2[1] + search_region_Index2,
    9036           0 :                                 buf2_stride[1] << 1,
    9037             :                                 pu_height >> 1,
    9038             :                                 pu_width))
    9039           0 :                            << 1
    9040           0 :                      : nxm_sad_avg_kernel(
    9041           0 :                                &(context_ptr->sb_buffer[src_block_index]),
    9042             :                                BLOCK_SIZE_64,
    9043           0 :                                buf1[1] + search_region_Index1,
    9044           0 :                                buf1_stride[1],
    9045           0 :                                buf2[1] + search_region_Index2,
    9046           0 :                                buf2_stride[1],
    9047             :                                pu_height,
    9048             :                                pu_width);
    9049           0 :     if (context_ptr->fractional_search_method == SSD_SEARCH) {
    9050           0 :         if (dist < *p_best_ssd) {
    9051           0 :             *p_best_sad = (uint32_t)
    9052           0 :                 nxm_sad_avg_kernel(
    9053           0 :                     &(context_ptr->sb_buffer[src_block_index]),
    9054             :                     BLOCK_SIZE_64,
    9055           0 :                     buf1[1] + search_region_Index1,
    9056           0 :                     buf1_stride[1],
    9057           0 :                     buf2[1] + search_region_Index2,
    9058           0 :                     buf2_stride[1],
    9059             :                     pu_height,
    9060             :                     pu_width);
    9061           0 :             *p_best_mv =
    9062           0 :                 ((uint16_t)quarter_mv_y[1] << 16) | ((uint16_t)quarter_mv_x[1]);
    9063           0 :             *p_best_ssd = (uint32_t)dist;
    9064             :         }
    9065             :     } else {
    9066           0 :         if (dist < *p_best_sad) {
    9067           0 :             *p_best_sad = (uint32_t)dist;
    9068           0 :             *p_best_mv =
    9069           0 :                 ((uint16_t)quarter_mv_y[1] << 16) | ((uint16_t)quarter_mv_x[1]);
    9070             :         }
    9071             :     }
    9072             :     // T position
    9073           0 :     search_region_Index1 = (int32_t)search_Index_x +
    9074           0 :                            (int32_t)buf1_stride[2] * (int32_t)search_Index_y;
    9075           0 :     search_region_Index2 = (int32_t)search_Index_x +
    9076           0 :                            (int32_t)buf2_stride[2] * (int32_t)search_Index_y;
    9077           0 :     dist = (context_ptr->fractional_search_method == SSD_SEARCH)
    9078           0 :                ? combined_averaging_ssd(
    9079           0 :                      &(context_ptr->sb_buffer[src_block_index]),
    9080             :                      BLOCK_SIZE_64,
    9081           0 :                      buf1[2] + search_region_Index1,
    9082           0 :                      buf1_stride[2],
    9083           0 :                      buf2[2] + search_region_Index2,
    9084           0 :                      buf2_stride[2],
    9085             :                      pu_height,
    9086             :                      pu_width)
    9087           0 :                : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    9088           0 :                      ? (nxm_sad_avg_kernel(
    9089           0 :                                 &(context_ptr->sb_buffer[src_block_index]),
    9090             :                                 BLOCK_SIZE_64 << 1,
    9091           0 :                                 buf1[2] + search_region_Index1,
    9092           0 :                                 buf1_stride[2] << 1,
    9093           0 :                                 buf2[2] + search_region_Index2,
    9094           0 :                                 buf2_stride[2] << 1,
    9095             :                                 pu_height >> 1,
    9096             :                                 pu_width))
    9097           0 :                            << 1
    9098           0 :                      : nxm_sad_avg_kernel(
    9099           0 :                                &(context_ptr->sb_buffer[src_block_index]),
    9100             :                                BLOCK_SIZE_64,
    9101           0 :                                buf1[2] + search_region_Index1,
    9102           0 :                                buf1_stride[2],
    9103           0 :                                buf2[2] + search_region_Index2,
    9104           0 :                                buf2_stride[2],
    9105             :                                pu_height,
    9106             :                                pu_width);
    9107           0 :     if (context_ptr->fractional_search_method == SSD_SEARCH) {
    9108           0 :         if (dist < *p_best_ssd) {
    9109           0 :             *p_best_sad = (uint32_t)
    9110           0 :                 nxm_sad_avg_kernel(
    9111           0 :                     &(context_ptr->sb_buffer[src_block_index]),
    9112             :                     BLOCK_SIZE_64,
    9113           0 :                     buf1[2] + search_region_Index1,
    9114           0 :                     buf1_stride[2],
    9115           0 :                     buf2[2] + search_region_Index2,
    9116           0 :                     buf2_stride[2],
    9117             :                     pu_height,
    9118             :                     pu_width);
    9119           0 :             *p_best_mv =
    9120           0 :                 ((uint16_t)quarter_mv_y[2] << 16) | ((uint16_t)quarter_mv_x[2]);
    9121           0 :             *p_best_ssd = (uint32_t)dist;
    9122             :         }
    9123             :     } else {
    9124           0 :         if (dist < *p_best_sad) {
    9125           0 :             *p_best_sad = (uint32_t)dist;
    9126           0 :             *p_best_mv =
    9127           0 :                 ((uint16_t)quarter_mv_y[2] << 16) | ((uint16_t)quarter_mv_x[2]);
    9128             :         }
    9129             :     }
    9130             :     // B position
    9131           0 :     search_region_Index1 = (int32_t)search_Index_x +
    9132           0 :                            (int32_t)buf1_stride[3] * (int32_t)search_Index_y;
    9133           0 :     search_region_Index2 = (int32_t)search_Index_x +
    9134           0 :                            (int32_t)buf2_stride[3] * (int32_t)search_Index_y;
    9135           0 :     dist = (context_ptr->fractional_search_method == SSD_SEARCH)
    9136           0 :                ? combined_averaging_ssd(
    9137           0 :                      &(context_ptr->sb_buffer[src_block_index]),
    9138             :                      BLOCK_SIZE_64,
    9139           0 :                      buf1[3] + search_region_Index1,
    9140           0 :                      buf1_stride[3],
    9141           0 :                      buf2[3] + search_region_Index2,
    9142           0 :                      buf2_stride[3],
    9143             :                      pu_height,
    9144             :                      pu_width)
    9145           0 :                : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    9146           0 :                      ? (nxm_sad_avg_kernel(
    9147           0 :                                 &(context_ptr->sb_buffer[src_block_index]),
    9148             :                                 BLOCK_SIZE_64 << 1,
    9149           0 :                                 buf1[3] + search_region_Index1,
    9150           0 :                                 buf1_stride[3] << 1,
    9151           0 :                                 buf2[3] + search_region_Index2,
    9152           0 :                                 buf2_stride[3] << 1,
    9153             :                                 pu_height >> 1,
    9154             :                                 pu_width))
    9155           0 :                            << 1
    9156           0 :                      : nxm_sad_avg_kernel(
    9157           0 :                                &(context_ptr->sb_buffer[src_block_index]),
    9158             :                                BLOCK_SIZE_64,
    9159           0 :                                buf1[3] + search_region_Index1,
    9160           0 :                                buf1_stride[3],
    9161           0 :                                buf2[3] + search_region_Index2,
    9162           0 :                                buf2_stride[3],
    9163             :                                pu_height,
    9164             :                                pu_width);
    9165           0 :     if (context_ptr->fractional_search_method == SSD_SEARCH) {
    9166           0 :         if (dist < *p_best_ssd) {
    9167           0 :             *p_best_sad = (uint32_t)
    9168           0 :                 nxm_sad_avg_kernel(
    9169           0 :                     &(context_ptr->sb_buffer[src_block_index]),
    9170             :                     BLOCK_SIZE_64,
    9171           0 :                     buf1[3] + search_region_Index1,
    9172           0 :                     buf1_stride[3],
    9173           0 :                     buf2[3] + search_region_Index2,
    9174           0 :                     buf2_stride[3],
    9175             :                     pu_height,
    9176             :                     pu_width);
    9177           0 :             *p_best_mv =
    9178           0 :                 ((uint16_t)quarter_mv_y[3] << 16) | ((uint16_t)quarter_mv_x[3]);
    9179           0 :             *p_best_ssd = (uint32_t)dist;
    9180             :         }
    9181             :     } else {
    9182           0 :         if (dist < *p_best_sad) {
    9183           0 :             *p_best_sad = (uint32_t)dist;
    9184           0 :             *p_best_mv =
    9185           0 :                 ((uint16_t)quarter_mv_y[3] << 16) | ((uint16_t)quarter_mv_x[3]);
    9186             :         }
    9187             :     }
    9188             :     // TL position
    9189             : #if QP_REF_OPT
    9190           0 :     if (!is_frac_candidate) {
    9191             : #endif
    9192           0 :         search_region_Index1 =
    9193           0 :             (int32_t)search_Index_x +
    9194           0 :             (int32_t)buf1_stride[4] * (int32_t)search_Index_y;
    9195           0 :         search_region_Index2 =
    9196           0 :             (int32_t)search_Index_x +
    9197           0 :             (int32_t)buf2_stride[4] * (int32_t)search_Index_y;
    9198           0 :         dist = (context_ptr->fractional_search_method == SSD_SEARCH)
    9199           0 :                    ? combined_averaging_ssd(
    9200           0 :                          &(context_ptr->sb_buffer[src_block_index]),
    9201             :                          BLOCK_SIZE_64,
    9202           0 :                          buf1[4] + search_region_Index1,
    9203           0 :                          buf1_stride[4],
    9204           0 :                          buf2[4] + search_region_Index2,
    9205           0 :                          buf2_stride[4],
    9206             :                          pu_height,
    9207             :                          pu_width)
    9208           0 :                    : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    9209           0 :                          ? (nxm_sad_avg_kernel(
    9210           0 :                                     &(context_ptr->sb_buffer[src_block_index]),
    9211             :                                     BLOCK_SIZE_64 << 1,
    9212           0 :                                     buf1[4] + search_region_Index1,
    9213           0 :                                     buf1_stride[4] << 1,
    9214           0 :                                     buf2[4] + search_region_Index2,
    9215           0 :                                     buf2_stride[4] << 1,
    9216             :                                     pu_height >> 1,
    9217             :                                     pu_width))
    9218           0 :                                << 1
    9219           0 :                          : nxm_sad_avg_kernel(
    9220           0 :                                    &(context_ptr->sb_buffer[src_block_index]),
    9221             :                                    BLOCK_SIZE_64,
    9222           0 :                                    buf1[4] + search_region_Index1,
    9223           0 :                                    buf1_stride[4],
    9224           0 :                                    buf2[4] + search_region_Index2,
    9225           0 :                                    buf2_stride[4],
    9226             :                                    pu_height,
    9227             :                                    pu_width);
    9228           0 :         if (context_ptr->fractional_search_method == SSD_SEARCH) {
    9229           0 :             if (dist < *p_best_ssd) {
    9230           0 :                 *p_best_sad = (uint32_t)nxm_sad_avg_kernel(
    9231           0 :                         &(context_ptr->sb_buffer[src_block_index]),
    9232             :                         BLOCK_SIZE_64,
    9233           0 :                         buf1[4] + search_region_Index1,
    9234           0 :                         buf1_stride[4],
    9235           0 :                         buf2[4] + search_region_Index2,
    9236           0 :                         buf2_stride[4],
    9237             :                         pu_height,
    9238             :                         pu_width);
    9239           0 :                 *p_best_mv = ((uint16_t)quarter_mv_y[4] << 16) |
    9240           0 :                              ((uint16_t)quarter_mv_x[4]);
    9241           0 :                 *p_best_ssd = (uint32_t)dist;
    9242             :             }
    9243             :         } else {
    9244           0 :             if (dist < *p_best_sad) {
    9245           0 :                 *p_best_sad = (uint32_t)dist;
    9246           0 :                 *p_best_mv = ((uint16_t)quarter_mv_y[4] << 16) |
    9247           0 :                              ((uint16_t)quarter_mv_x[4]);
    9248             :             }
    9249             :         }
    9250             :         // TR position
    9251           0 :         search_region_Index1 =
    9252           0 :             (int32_t)search_Index_x +
    9253           0 :             (int32_t)buf1_stride[5] * (int32_t)search_Index_y;
    9254           0 :         search_region_Index2 =
    9255           0 :             (int32_t)search_Index_x +
    9256           0 :             (int32_t)buf2_stride[5] * (int32_t)search_Index_y;
    9257           0 :         dist = (context_ptr->fractional_search_method == SSD_SEARCH)
    9258           0 :                    ? combined_averaging_ssd(
    9259           0 :                          &(context_ptr->sb_buffer[src_block_index]),
    9260             :                          BLOCK_SIZE_64,
    9261           0 :                          buf1[5] + search_region_Index1,
    9262           0 :                          buf1_stride[5],
    9263           0 :                          buf2[5] + search_region_Index2,
    9264           0 :                          buf2_stride[5],
    9265             :                          pu_height,
    9266             :                          pu_width)
    9267           0 :                    : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    9268           0 :                          ? (nxm_sad_avg_kernel(
    9269           0 :                                     &(context_ptr->sb_buffer[src_block_index]),
    9270             :                                     BLOCK_SIZE_64 << 1,
    9271           0 :                                     buf1[5] + search_region_Index1,
    9272           0 :                                     buf1_stride[5] << 1,
    9273           0 :                                     buf2[5] + search_region_Index2,
    9274           0 :                                     buf2_stride[5] << 1,
    9275             :                                     pu_height >> 1,
    9276             :                                     pu_width))
    9277           0 :                                << 1
    9278           0 :                          : nxm_sad_avg_kernel(
    9279           0 :                                    &(context_ptr->sb_buffer[src_block_index]),
    9280             :                                    BLOCK_SIZE_64,
    9281           0 :                                    buf1[5] + search_region_Index1,
    9282           0 :                                    buf1_stride[5],
    9283           0 :                                    buf2[5] + search_region_Index2,
    9284           0 :                                    buf2_stride[5],
    9285             :                                    pu_height,
    9286             :                                    pu_width);
    9287           0 :         if (context_ptr->fractional_search_method == SSD_SEARCH) {
    9288           0 :             if (dist < *p_best_ssd) {
    9289           0 :                 *p_best_sad = (uint32_t)nxm_sad_avg_kernel(
    9290           0 :                         &(context_ptr->sb_buffer[src_block_index]),
    9291             :                         BLOCK_SIZE_64,
    9292           0 :                         buf1[5] + search_region_Index1,
    9293           0 :                         buf1_stride[5],
    9294           0 :                         buf2[5] + search_region_Index2,
    9295           0 :                         buf2_stride[5],
    9296             :                         pu_height,
    9297             :                         pu_width);
    9298           0 :                 *p_best_mv = ((uint16_t)quarter_mv_y[5] << 16) |
    9299           0 :                              ((uint16_t)quarter_mv_x[5]);
    9300           0 :                 *p_best_ssd = (uint32_t)dist;
    9301             :             }
    9302             :         } else {
    9303           0 :             if (dist < *p_best_sad) {
    9304           0 :                 *p_best_sad = (uint32_t)dist;
    9305           0 :                 *p_best_mv = ((uint16_t)quarter_mv_y[5] << 16) |
    9306           0 :                              ((uint16_t)quarter_mv_x[5]);
    9307             :             }
    9308             :         }
    9309             :         // BR position
    9310           0 :         search_region_Index1 =
    9311           0 :             (int32_t)search_Index_x +
    9312           0 :             (int32_t)buf1_stride[6] * (int32_t)search_Index_y;
    9313           0 :         search_region_Index2 =
    9314           0 :             (int32_t)search_Index_x +
    9315           0 :             (int32_t)buf2_stride[6] * (int32_t)search_Index_y;
    9316           0 :         dist = (context_ptr->fractional_search_method == SSD_SEARCH)
    9317           0 :                    ? combined_averaging_ssd(
    9318           0 :                          &(context_ptr->sb_buffer[src_block_index]),
    9319             :                          BLOCK_SIZE_64,
    9320           0 :                          buf1[6] + search_region_Index1,
    9321           0 :                          buf1_stride[6],
    9322           0 :                          buf2[6] + search_region_Index2,
    9323           0 :                          buf2_stride[6],
    9324             :                          pu_height,
    9325             :                          pu_width)
    9326           0 :                    : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    9327           0 :                          ? (nxm_sad_avg_kernel(
    9328           0 :                                     &(context_ptr->sb_buffer[src_block_index]),
    9329             :                                     BLOCK_SIZE_64 << 1,
    9330           0 :                                     buf1[6] + search_region_Index1,
    9331           0 :                                     buf1_stride[6] << 1,
    9332           0 :                                     buf2[6] + search_region_Index2,
    9333           0 :                                     buf2_stride[6] << 1,
    9334             :                                     pu_height >> 1,
    9335             :                                     pu_width))
    9336           0 :                                << 1
    9337           0 :                          : nxm_sad_avg_kernel(
    9338           0 :                                    &(context_ptr->sb_buffer[src_block_index]),
    9339             :                                    BLOCK_SIZE_64,
    9340           0 :                                    buf1[6] + search_region_Index1,
    9341           0 :                                    buf1_stride[6],
    9342           0 :                                    buf2[6] + search_region_Index2,
    9343           0 :                                    buf2_stride[6],
    9344             :                                    pu_height,
    9345             :                                    pu_width);
    9346           0 :         if (context_ptr->fractional_search_method == SSD_SEARCH) {
    9347           0 :             if (dist < *p_best_ssd) {
    9348           0 :                 *p_best_sad = (uint32_t)nxm_sad_avg_kernel(
    9349           0 :                         &(context_ptr->sb_buffer[src_block_index]),
    9350             :                         BLOCK_SIZE_64,
    9351           0 :                         buf1[6] + search_region_Index1,
    9352           0 :                         buf1_stride[6],
    9353           0 :                         buf2[6] + search_region_Index2,
    9354           0 :                         buf2_stride[6],
    9355             :                         pu_height,
    9356             :                         pu_width);
    9357           0 :                 *p_best_mv = ((uint16_t)quarter_mv_y[6] << 16) |
    9358           0 :                              ((uint16_t)quarter_mv_x[6]);
    9359           0 :                 *p_best_ssd = (uint32_t)dist;
    9360             :             }
    9361             :         } else {
    9362           0 :             if (dist < *p_best_sad) {
    9363           0 :                 *p_best_sad = (uint32_t)dist;
    9364           0 :                 *p_best_mv = ((uint16_t)quarter_mv_y[6] << 16) |
    9365           0 :                              ((uint16_t)quarter_mv_x[6]);
    9366             :             }
    9367             :         }
    9368             :         // BL position
    9369           0 :         search_region_Index1 =
    9370           0 :             (int32_t)search_Index_x +
    9371           0 :             (int32_t)buf1_stride[7] * (int32_t)search_Index_y;
    9372           0 :         search_region_Index2 =
    9373           0 :             (int32_t)search_Index_x +
    9374           0 :             (int32_t)buf2_stride[7] * (int32_t)search_Index_y;
    9375           0 :         dist = (context_ptr->fractional_search_method == SSD_SEARCH)
    9376           0 :                    ? combined_averaging_ssd(
    9377           0 :                          &(context_ptr->sb_buffer[src_block_index]),
    9378             :                          BLOCK_SIZE_64,
    9379           0 :                          buf1[7] + search_region_Index1,
    9380           0 :                          buf1_stride[7],
    9381           0 :                          buf2[7] + search_region_Index2,
    9382           0 :                          buf2_stride[7],
    9383             :                          pu_height,
    9384             :                          pu_width)
    9385           0 :                    : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
    9386           0 :                          ? (nxm_sad_avg_kernel(
    9387           0 :                                     &(context_ptr->sb_buffer[src_block_index]),
    9388             :                                     BLOCK_SIZE_64 << 1,
    9389           0 :                                     buf1[7] + search_region_Index1,
    9390           0 :                                     buf1_stride[7] << 1,
    9391           0 :                                     buf2[7] + search_region_Index2,
    9392           0 :                                     buf2_stride[7] << 1,
    9393             :                                     pu_height >> 1,
    9394             :                                     pu_width))
    9395           0 :                                << 1
    9396           0 :                          : nxm_sad_avg_kernel(
    9397           0 :                                    &(context_ptr->sb_buffer[src_block_index]),
    9398             :                                    BLOCK_SIZE_64,
    9399           0 :                                    buf1[7] + search_region_Index1,
    9400           0 :                                    buf1_stride[7],
    9401           0 :                                    buf2[7] + search_region_Index2,
    9402           0 :                                    buf2_stride[7],
    9403             :                                    pu_height,
    9404             :                                    pu_width);
    9405           0 :         if (context_ptr->fractional_search_method == SSD_SEARCH) {
    9406           0 :             if (dist < *p_best_ssd) {
    9407           0 :                 *p_best_sad = (uint32_t)nxm_sad_avg_kernel(
    9408           0 :                         &(context_ptr->sb_buffer[src_block_index]),
    9409             :                         BLOCK_SIZE_64,
    9410           0 :                         buf1[7] + search_region_Index1,
    9411           0 :                         buf1_stride[7],
    9412           0 :                         buf2[7] + search_region_Index2,
    9413           0 :                         buf2_stride[7],
    9414             :                         pu_height,
    9415             :                         pu_width);
    9416           0 :                 *p_best_mv = ((uint16_t)quarter_mv_y[7] << 16) |
    9417           0 :                              ((uint16_t)quarter_mv_x[7]);
    9418           0 :                 *p_best_ssd = (uint32_t)dist;
    9419             :             }
    9420             :         } else {
    9421           0 :             if (dist < *p_best_sad) {
    9422           0 :                 *p_best_sad = (uint32_t)dist;
    9423           0 :                 *p_best_mv = ((uint16_t)quarter_mv_y[7] << 16) |
    9424           0 :                              ((uint16_t)quarter_mv_x[7]);
    9425             :             }
    9426             :         }
    9427             : #if QP_REF_OPT
    9428             :     }
    9429             : #endif
    9430           0 :     return;
    9431             : }
    9432             : /*******************************************
    9433             :  * quarter_pel_refinement_sb
    9434             :  *   performs Quarter Pel refinement
    9435             :  *******************************************/
    9436           0 : void quarter_pel_refinement_sb(
    9437             :     MeContext
    9438             :         *context_ptr,  //[IN/OUT]  ME context Ptr, used to get/update ME results
    9439             :     uint8_t *pos_full,     //[IN]
    9440             :     uint32_t full_stride,  //[IN]
    9441             :     uint8_t *pos_b,        //[IN]
    9442             :     uint8_t *pos_h,        //[IN]
    9443             :     uint8_t *pos_j,        //[IN]
    9444             :     int16_t
    9445             :         x_search_area_origin,  //[IN] search area origin in the horizontal
    9446             :                                // direction, used to point to reference samples
    9447             :     int16_t
    9448             :         y_search_area_origin,  //[IN] search area origin in the vertical
    9449             :                                // direction, used to point to reference samples
    9450             :     uint32_t integer_mv)
    9451             : {
    9452             :     uint32_t pu_index;
    9453             :     uint32_t block_index_shift_x;
    9454             :     uint32_t block_index_shift_y;
    9455             :     uint32_t src_block_index;
    9456             :     uint8_t *buf1[8];
    9457             :     uint8_t *buf2[8];
    9458             :     uint32_t buf1_stride[8];
    9459             :     uint32_t buf2_stride[8];
    9460             :     int16_t x_mv, y_mv;
    9461             :     uint32_t nidx;
    9462           0 :     int16_t int_x_mv = _MVXT(integer_mv);
    9463           0 :     int16_t int_y_mv = _MVYT(integer_mv);
    9464           0 :     int16_t int_xSearchIndex = ((int_x_mv + 2) >> 2) - x_search_area_origin;
    9465           0 :     int16_t int_ySearchIndex = ((int_y_mv + 2) >> 2) - y_search_area_origin;
    9466             :     int16_t x_best_mv;
    9467             :     int16_t y_best_mv;
    9468             :     int16_t best_xSearchIndex;
    9469             :     int16_t best_ySearchIndex;
    9470             :     int16_t dis_x;
    9471             :     int16_t dis_y;
    9472           0 :     int8_t skip_qp_pel = 0;
    9473             :     uint32_t testmv;
    9474             :     int16_t it;
    9475           0 :     int16_t num_qp_it = 2;
    9476           0 :     if (context_ptr->fractional_search64x64) {
    9477           0 :         x_best_mv = _MVXT(*context_ptr->p_best_full_pel_mv64x64);
    9478           0 :         y_best_mv = _MVYT(*context_ptr->p_best_full_pel_mv64x64);
    9479           0 :         best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
    9480           0 :         best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
    9481           0 :         dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
    9482           0 :         dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
    9483           0 :         skip_qp_pel = 0;
    9484           0 :         if ((dis_x) > Q_PEL_SEARCH_WIND)
    9485           0 :             skip_qp_pel = 1;
    9486           0 :         if ((dis_y) > Q_PEL_SEARCH_WIND)
    9487           0 :             skip_qp_pel = 1;
    9488           0 :         if (!skip_qp_pel) {
    9489           0 :             for (it = 0; it < num_qp_it; it++) {
    9490           0 :                 x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
    9491           0 :                 y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
    9492           0 :                 testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    9493           0 :                 SetQuarterPelRefinementInputsOnTheFly(
    9494             :                     pos_full,
    9495             :                     full_stride,
    9496             :                     pos_b,
    9497             :                     pos_h,
    9498             :                     pos_j,
    9499             :                     context_ptr->interpolated_stride,
    9500             :                     x_mv,
    9501             :                     y_mv,
    9502             :                     buf1,
    9503             :                     buf1_stride,
    9504             :                     buf2,
    9505             :                     buf2_stride);
    9506           0 :                 buf1[0] = buf1[0];
    9507           0 :                 buf2[0] = buf2[0];
    9508           0 :                 buf1[1] = buf1[1];
    9509           0 :                 buf2[1] = buf2[1];
    9510           0 :                 buf1[2] = buf1[2];
    9511           0 :                 buf2[2] = buf2[2];
    9512           0 :                 buf1[3] = buf1[3];
    9513           0 :                 buf2[3] = buf2[3];
    9514           0 :                 buf1[4] = buf1[4];
    9515           0 :                 buf2[4] = buf2[4];
    9516           0 :                 buf1[5] = buf1[5];
    9517           0 :                 buf2[5] = buf2[5];
    9518           0 :                 buf1[6] = buf1[6];
    9519           0 :                 buf2[6] = buf2[6];
    9520           0 :                 buf1[7] = buf1[7];
    9521           0 :                 buf2[7] = buf2[7];
    9522           0 :                 quarter_pel_refinemnet_block(context_ptr,
    9523             :                                              context_ptr->p_best_ssd64x64,
    9524             :                                              0,
    9525             :                                              buf1,
    9526             :                                              buf1_stride,
    9527             :                                              buf2,
    9528             :                                              buf2_stride,
    9529             :                                              64,
    9530             :                                              64,
    9531             :                                              x_search_area_origin,
    9532             :                                              y_search_area_origin,
    9533             :                                              testmv,
    9534             :                                              context_ptr->p_best_sad64x64,
    9535             :                                              context_ptr->p_best_mv64x64,
    9536             :                                              it);
    9537             :             }
    9538             :         }
    9539             :     }
    9540             :     // 32x32 [4 partitions]
    9541           0 :     for (pu_index = 0; pu_index < 4; ++pu_index) {
    9542           0 :         x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv32x32[pu_index]);
    9543           0 :         y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv32x32[pu_index]);
    9544           0 :         best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
    9545           0 :         best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
    9546           0 :         dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
    9547           0 :         dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
    9548           0 :         skip_qp_pel = 0;
    9549           0 :         if ((dis_x) > Q_PEL_SEARCH_WIND)
    9550           0 :             skip_qp_pel = 1;
    9551           0 :         if ((dis_y) > Q_PEL_SEARCH_WIND)
    9552           0 :             skip_qp_pel = 1;
    9553           0 :         if (!skip_qp_pel) {
    9554           0 :             for (it = 0; it < num_qp_it; it++) {
    9555           0 :                 x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
    9556           0 :                 y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
    9557           0 :                 testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    9558           0 :                 SetQuarterPelRefinementInputsOnTheFly(
    9559             :                     pos_full,
    9560             :                     full_stride,
    9561             :                     pos_b,
    9562             :                     pos_h,
    9563             :                     pos_j,
    9564             :                     context_ptr->interpolated_stride,
    9565             :                     x_mv,
    9566             :                     y_mv,
    9567             :                     buf1,
    9568             :                     buf1_stride,
    9569             :                     buf2,
    9570             :                     buf2_stride);
    9571           0 :                 block_index_shift_x = (pu_index & 0x01) << 5;
    9572           0 :                 block_index_shift_y = (pu_index >> 1) << 5;
    9573           0 :                 src_block_index =
    9574           0 :                     block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
    9575           0 :                 buf1[0] = buf1[0] + block_index_shift_x +
    9576           0 :                           block_index_shift_y * buf1_stride[0];
    9577           0 :                 buf2[0] = buf2[0] + block_index_shift_x +
    9578           0 :                           block_index_shift_y * buf2_stride[0];
    9579           0 :                 buf1[1] = buf1[1] + block_index_shift_x +
    9580           0 :                           block_index_shift_y * buf1_stride[1];
    9581           0 :                 buf2[1] = buf2[1] + block_index_shift_x +
    9582           0 :                           block_index_shift_y * buf2_stride[1];
    9583           0 :                 buf1[2] = buf1[2] + block_index_shift_x +
    9584           0 :                           block_index_shift_y * buf1_stride[2];
    9585           0 :                 buf2[2] = buf2[2] + block_index_shift_x +
    9586           0 :                           block_index_shift_y * buf2_stride[2];
    9587           0 :                 buf1[3] = buf1[3] + block_index_shift_x +
    9588           0 :                           block_index_shift_y * buf1_stride[3];
    9589           0 :                 buf2[3] = buf2[3] + block_index_shift_x +
    9590           0 :                           block_index_shift_y * buf2_stride[3];
    9591           0 :                 buf1[4] = buf1[4] + block_index_shift_x +
    9592           0 :                           block_index_shift_y * buf1_stride[4];
    9593           0 :                 buf2[4] = buf2[4] + block_index_shift_x +
    9594           0 :                           block_index_shift_y * buf2_stride[4];
    9595           0 :                 buf1[5] = buf1[5] + block_index_shift_x +
    9596           0 :                           block_index_shift_y * buf1_stride[5];
    9597           0 :                 buf2[5] = buf2[5] + block_index_shift_x +
    9598           0 :                           block_index_shift_y * buf2_stride[5];
    9599           0 :                 buf1[6] = buf1[6] + block_index_shift_x +
    9600           0 :                           block_index_shift_y * buf1_stride[6];
    9601           0 :                 buf2[6] = buf2[6] + block_index_shift_x +
    9602           0 :                           block_index_shift_y * buf2_stride[6];
    9603           0 :                 buf1[7] = buf1[7] + block_index_shift_x +
    9604           0 :                           block_index_shift_y * buf1_stride[7];
    9605           0 :                 buf2[7] = buf2[7] + block_index_shift_x +
    9606           0 :                           block_index_shift_y * buf2_stride[7];
    9607           0 :                 quarter_pel_refinemnet_block(
    9608             :                     context_ptr,
    9609           0 :                     &context_ptr->p_best_ssd32x32[pu_index],
    9610             :                     src_block_index,
    9611             :                     buf1,
    9612             :                     buf1_stride,
    9613             :                     buf2,
    9614             :                     buf2_stride,
    9615             :                     32,
    9616             :                     32,
    9617             :                     x_search_area_origin,
    9618             :                     y_search_area_origin,
    9619             :                     testmv,
    9620           0 :                     &context_ptr->p_best_sad32x32[pu_index],
    9621           0 :                     &context_ptr->p_best_mv32x32[pu_index],
    9622             :                     it);
    9623             :             }
    9624             :         }
    9625             :     }
    9626             :     // 16x16 [16 partitions]
    9627           0 :     for (pu_index = 0; pu_index < 16; ++pu_index) {
    9628           0 :         nidx = tab16x16[pu_index];
    9629           0 :         x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv16x16[nidx]);
    9630           0 :         y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv16x16[nidx]);
    9631           0 :         best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
    9632           0 :         best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
    9633           0 :         dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
    9634           0 :         dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
    9635           0 :         skip_qp_pel = 0;
    9636           0 :         if ((dis_x) > Q_PEL_SEARCH_WIND)
    9637           0 :             skip_qp_pel = 1;
    9638           0 :         if ((dis_y) > Q_PEL_SEARCH_WIND)
    9639           0 :             skip_qp_pel = 1;
    9640           0 :         if (!skip_qp_pel) {
    9641           0 :             for (it = 0; it < num_qp_it; it++) {
    9642           0 :                 x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
    9643           0 :                 y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
    9644           0 :                 testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    9645           0 :                 SetQuarterPelRefinementInputsOnTheFly(
    9646             :                     pos_full,
    9647             :                     full_stride,
    9648             :                     pos_b,
    9649             :                     pos_h,
    9650             :                     pos_j,
    9651             :                     context_ptr->interpolated_stride,
    9652             :                     x_mv,
    9653             :                     y_mv,
    9654             :                     buf1,
    9655             :                     buf1_stride,
    9656             :                     buf2,
    9657             :                     buf2_stride);
    9658           0 :                 block_index_shift_x = (pu_index & 0x03) << 4;
    9659           0 :                 block_index_shift_y = (pu_index >> 2) << 4;
    9660           0 :                 src_block_index =
    9661           0 :                     block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
    9662           0 :                 buf1[0] = buf1[0] + block_index_shift_x +
    9663           0 :                           block_index_shift_y * buf1_stride[0];
    9664           0 :                 buf2[0] = buf2[0] + block_index_shift_x +
    9665           0 :                           block_index_shift_y * buf2_stride[0];
    9666           0 :                 buf1[1] = buf1[1] + block_index_shift_x +
    9667           0 :                           block_index_shift_y * buf1_stride[1];
    9668           0 :                 buf2[1] = buf2[1] + block_index_shift_x +
    9669           0 :                           block_index_shift_y * buf2_stride[1];
    9670           0 :                 buf1[2] = buf1[2] + block_index_shift_x +
    9671           0 :                           block_index_shift_y * buf1_stride[2];
    9672           0 :                 buf2[2] = buf2[2] + block_index_shift_x +
    9673           0 :                           block_index_shift_y * buf2_stride[2];
    9674           0 :                 buf1[3] = buf1[3] + block_index_shift_x +
    9675           0 :                           block_index_shift_y * buf1_stride[3];
    9676           0 :                 buf2[3] = buf2[3] + block_index_shift_x +
    9677           0 :                           block_index_shift_y * buf2_stride[3];
    9678           0 :                 buf1[4] = buf1[4] + block_index_shift_x +
    9679           0 :                           block_index_shift_y * buf1_stride[4];
    9680           0 :                 buf2[4] = buf2[4] + block_index_shift_x +
    9681           0 :                           block_index_shift_y * buf2_stride[4];
    9682           0 :                 buf1[5] = buf1[5] + block_index_shift_x +
    9683           0 :                           block_index_shift_y * buf1_stride[5];
    9684           0 :                 buf2[5] = buf2[5] + block_index_shift_x +
    9685           0 :                           block_index_shift_y * buf2_stride[5];
    9686           0 :                 buf1[6] = buf1[6] + block_index_shift_x +
    9687           0 :                           block_index_shift_y * buf1_stride[6];
    9688           0 :                 buf2[6] = buf2[6] + block_index_shift_x +
    9689           0 :                           block_index_shift_y * buf2_stride[6];
    9690           0 :                 buf1[7] = buf1[7] + block_index_shift_x +
    9691           0 :                           block_index_shift_y * buf1_stride[7];
    9692           0 :                 buf2[7] = buf2[7] + block_index_shift_x +
    9693           0 :                           block_index_shift_y * buf2_stride[7];
    9694           0 :                 quarter_pel_refinemnet_block(
    9695             :                     context_ptr,
    9696           0 :                     &context_ptr->p_best_ssd16x16[nidx],
    9697             :                     src_block_index,
    9698             :                     buf1,
    9699             :                     buf1_stride,
    9700             :                     buf2,
    9701             :                     buf2_stride,
    9702             :                     16,
    9703             :                     16,
    9704             :                     x_search_area_origin,
    9705             :                     y_search_area_origin,
    9706             :                     testmv,
    9707           0 :                     &context_ptr->p_best_sad16x16[nidx],
    9708           0 :                     &context_ptr->p_best_mv16x16[nidx],
    9709             :                     it);
    9710             :             }
    9711             :         }
    9712             :     }
    9713             :     // 8x8   [64 partitions]
    9714           0 :     for (pu_index = 0; pu_index < 64; ++pu_index) {
    9715           0 :         nidx = tab8x8[pu_index];
    9716           0 :         x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv8x8[nidx]);
    9717           0 :         y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv8x8[nidx]);
    9718           0 :         best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
    9719           0 :         best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
    9720           0 :         dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
    9721           0 :         dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
    9722           0 :         skip_qp_pel = 0;
    9723           0 :         if ((dis_x) > Q_PEL_SEARCH_WIND)
    9724           0 :             skip_qp_pel = 1;
    9725           0 :         if ((dis_y) > Q_PEL_SEARCH_WIND)
    9726           0 :             skip_qp_pel = 1;
    9727           0 :         if (!skip_qp_pel) {
    9728           0 :             for (it = 0; it < num_qp_it; it++) {
    9729           0 :                 x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
    9730           0 :                 y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
    9731           0 :                 testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    9732           0 :                 SetQuarterPelRefinementInputsOnTheFly(
    9733             :                     pos_full,
    9734             :                     full_stride,
    9735             :                     pos_b,
    9736             :                     pos_h,
    9737             :                     pos_j,
    9738             :                     context_ptr->interpolated_stride,
    9739             :                     x_mv,
    9740             :                     y_mv,
    9741             :                     buf1,
    9742             :                     buf1_stride,
    9743             :                     buf2,
    9744             :                     buf2_stride);
    9745           0 :                 block_index_shift_x = (pu_index & 0x07) << 3;
    9746           0 :                 block_index_shift_y = (pu_index >> 3) << 3;
    9747           0 :                 src_block_index =
    9748           0 :                     block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
    9749           0 :                 buf1[0] = buf1[0] + block_index_shift_x +
    9750           0 :                           block_index_shift_y * buf1_stride[0];
    9751           0 :                 buf2[0] = buf2[0] + block_index_shift_x +
    9752           0 :                           block_index_shift_y * buf2_stride[0];
    9753           0 :                 buf1[1] = buf1[1] + block_index_shift_x +
    9754           0 :                           block_index_shift_y * buf1_stride[1];
    9755           0 :                 buf2[1] = buf2[1] + block_index_shift_x +
    9756           0 :                           block_index_shift_y * buf2_stride[1];
    9757           0 :                 buf1[2] = buf1[2] + block_index_shift_x +
    9758           0 :                           block_index_shift_y * buf1_stride[2];
    9759           0 :                 buf2[2] = buf2[2] + block_index_shift_x +
    9760           0 :                           block_index_shift_y * buf2_stride[2];
    9761           0 :                 buf1[3] = buf1[3] + block_index_shift_x +
    9762           0 :                           block_index_shift_y * buf1_stride[3];
    9763           0 :                 buf2[3] = buf2[3] + block_index_shift_x +
    9764           0 :                           block_index_shift_y * buf2_stride[3];
    9765           0 :                 buf1[4] = buf1[4] + block_index_shift_x +
    9766           0 :                           block_index_shift_y * buf1_stride[4];
    9767           0 :                 buf2[4] = buf2[4] + block_index_shift_x +
    9768           0 :                           block_index_shift_y * buf2_stride[4];
    9769           0 :                 buf1[5] = buf1[5] + block_index_shift_x +
    9770           0 :                           block_index_shift_y * buf1_stride[5];
    9771           0 :                 buf2[5] = buf2[5] + block_index_shift_x +
    9772           0 :                           block_index_shift_y * buf2_stride[5];
    9773           0 :                 buf1[6] = buf1[6] + block_index_shift_x +
    9774           0 :                           block_index_shift_y * buf1_stride[6];
    9775           0 :                 buf2[6] = buf2[6] + block_index_shift_x +
    9776           0 :                           block_index_shift_y * buf2_stride[6];
    9777           0 :                 buf1[7] = buf1[7] + block_index_shift_x +
    9778           0 :                           block_index_shift_y * buf1_stride[7];
    9779           0 :                 buf2[7] = buf2[7] + block_index_shift_x +
    9780           0 :                           block_index_shift_y * buf2_stride[7];
    9781           0 :                 quarter_pel_refinemnet_block(context_ptr,
    9782           0 :                                              &context_ptr->p_best_ssd8x8[nidx],
    9783             :                                              src_block_index,
    9784             :                                              buf1,
    9785             :                                              buf1_stride,
    9786             :                                              buf2,
    9787             :                                              buf2_stride,
    9788             :                                              8,
    9789             :                                              8,
    9790             :                                              x_search_area_origin,
    9791             :                                              y_search_area_origin,
    9792             :                                              testmv,
    9793           0 :                                              &context_ptr->p_best_sad8x8[nidx],
    9794           0 :                                              &context_ptr->p_best_mv8x8[nidx],
    9795             :                                              it);
    9796             :             }
    9797             :         }
    9798             :     }
    9799             :     // 64x32
    9800           0 :     for (pu_index = 0; pu_index < 2; ++pu_index) {
    9801           0 :         block_index_shift_x = 0;
    9802           0 :         block_index_shift_y = pu_index << 5;
    9803           0 :         x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv64x32[pu_index]);
    9804           0 :         y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv64x32[pu_index]);
    9805           0 :         best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
    9806           0 :         best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
    9807           0 :         dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
    9808           0 :         dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
    9809           0 :         skip_qp_pel = 0;
    9810           0 :         if ((dis_x) > Q_PEL_SEARCH_WIND)
    9811           0 :             skip_qp_pel = 1;
    9812           0 :         if ((dis_y) > Q_PEL_SEARCH_WIND)
    9813           0 :             skip_qp_pel = 1;
    9814           0 :         if (!skip_qp_pel) {
    9815           0 :             for (it = 0; it < num_qp_it; it++) {
    9816           0 :                 x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
    9817           0 :                 y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
    9818           0 :                 testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    9819           0 :                 SetQuarterPelRefinementInputsOnTheFly(
    9820             :                     pos_full,
    9821             :                     full_stride,
    9822             :                     pos_b,
    9823             :                     pos_h,
    9824             :                     pos_j,
    9825             :                     context_ptr->interpolated_stride,
    9826             :                     x_mv,
    9827             :                     y_mv,
    9828             :                     buf1,
    9829             :                     buf1_stride,
    9830             :                     buf2,
    9831             :                     buf2_stride);
    9832           0 :                 src_block_index =
    9833           0 :                     block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
    9834           0 :                 buf1[0] = buf1[0] + block_index_shift_x +
    9835           0 :                           block_index_shift_y * buf1_stride[0];
    9836           0 :                 buf2[0] = buf2[0] + block_index_shift_x +
    9837           0 :                           block_index_shift_y * buf2_stride[0];
    9838           0 :                 buf1[1] = buf1[1] + block_index_shift_x +
    9839           0 :                           block_index_shift_y * buf1_stride[1];
    9840           0 :                 buf2[1] = buf2[1] + block_index_shift_x +
    9841           0 :                           block_index_shift_y * buf2_stride[1];
    9842           0 :                 buf1[2] = buf1[2] + block_index_shift_x +
    9843           0 :                           block_index_shift_y * buf1_stride[2];
    9844           0 :                 buf2[2] = buf2[2] + block_index_shift_x +
    9845           0 :                           block_index_shift_y * buf2_stride[2];
    9846           0 :                 buf1[3] = buf1[3] + block_index_shift_x +
    9847           0 :                           block_index_shift_y * buf1_stride[3];
    9848           0 :                 buf2[3] = buf2[3] + block_index_shift_x +
    9849           0 :                           block_index_shift_y * buf2_stride[3];
    9850           0 :                 buf1[4] = buf1[4] + block_index_shift_x +
    9851           0 :                           block_index_shift_y * buf1_stride[4];
    9852           0 :                 buf2[4] = buf2[4] + block_index_shift_x +
    9853           0 :                           block_index_shift_y * buf2_stride[4];
    9854           0 :                 buf1[5] = buf1[5] + block_index_shift_x +
    9855           0 :                           block_index_shift_y * buf1_stride[5];
    9856           0 :                 buf2[5] = buf2[5] + block_index_shift_x +
    9857           0 :                           block_index_shift_y * buf2_stride[5];
    9858           0 :                 buf1[6] = buf1[6] + block_index_shift_x +
    9859           0 :                           block_index_shift_y * buf1_stride[6];
    9860           0 :                 buf2[6] = buf2[6] + block_index_shift_x +
    9861           0 :                           block_index_shift_y * buf2_stride[6];
    9862           0 :                 buf1[7] = buf1[7] + block_index_shift_x +
    9863           0 :                           block_index_shift_y * buf1_stride[7];
    9864           0 :                 buf2[7] = buf2[7] + block_index_shift_x +
    9865           0 :                           block_index_shift_y * buf2_stride[7];
    9866           0 :                 quarter_pel_refinemnet_block(
    9867             :                     context_ptr,
    9868           0 :                     &context_ptr->p_best_ssd64x32[pu_index],
    9869             :                     src_block_index,
    9870             :                     buf1,
    9871             :                     buf1_stride,
    9872             :                     buf2,
    9873             :                     buf2_stride,
    9874             :                     64,
    9875             :                     32,
    9876             :                     x_search_area_origin,
    9877             :                     y_search_area_origin,
    9878             :                     testmv,
    9879           0 :                     &context_ptr->p_best_sad64x32[pu_index],
    9880           0 :                     &context_ptr->p_best_mv64x32[pu_index],
    9881             :                     it);
    9882             :             }
    9883             :         }
    9884             :     }
    9885             :     // 32x16
    9886           0 :     for (pu_index = 0; pu_index < 8; ++pu_index) {
    9887           0 :         nidx = tab32x16[pu_index];
    9888           0 :         block_index_shift_x = (pu_index & 0x01) << 5;
    9889           0 :         block_index_shift_y = (pu_index >> 1) << 4;
    9890           0 :         x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv32x16[nidx]);
    9891           0 :         y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv32x16[nidx]);
    9892           0 :         best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
    9893           0 :         best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
    9894           0 :         dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
    9895           0 :         dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
    9896           0 :         skip_qp_pel = 0;
    9897           0 :         if ((dis_x) > Q_PEL_SEARCH_WIND)
    9898           0 :             skip_qp_pel = 1;
    9899           0 :         if ((dis_y) > Q_PEL_SEARCH_WIND)
    9900           0 :             skip_qp_pel = 1;
    9901           0 :         if (!skip_qp_pel) {
    9902           0 :             for (it = 0; it < num_qp_it; it++) {
    9903           0 :                 x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
    9904           0 :                 y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
    9905           0 :                 testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    9906           0 :                 SetQuarterPelRefinementInputsOnTheFly(
    9907             :                     pos_full,
    9908             :                     full_stride,
    9909             :                     pos_b,
    9910             :                     pos_h,
    9911             :                     pos_j,
    9912             :                     context_ptr->interpolated_stride,
    9913             :                     x_mv,
    9914             :                     y_mv,
    9915             :                     buf1,
    9916             :                     buf1_stride,
    9917             :                     buf2,
    9918             :                     buf2_stride);
    9919           0 :                 src_block_index =
    9920           0 :                     block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
    9921           0 :                 buf1[0] = buf1[0] + block_index_shift_x +
    9922           0 :                           block_index_shift_y * buf1_stride[0];
    9923           0 :                 buf2[0] = buf2[0] + block_index_shift_x +
    9924           0 :                           block_index_shift_y * buf2_stride[0];
    9925           0 :                 buf1[1] = buf1[1] + block_index_shift_x +
    9926           0 :                           block_index_shift_y * buf1_stride[1];
    9927           0 :                 buf2[1] = buf2[1] + block_index_shift_x +
    9928           0 :                           block_index_shift_y * buf2_stride[1];
    9929           0 :                 buf1[2] = buf1[2] + block_index_shift_x +
    9930           0 :                           block_index_shift_y * buf1_stride[2];
    9931           0 :                 buf2[2] = buf2[2] + block_index_shift_x +
    9932           0 :                           block_index_shift_y * buf2_stride[2];
    9933           0 :                 buf1[3] = buf1[3] + block_index_shift_x +
    9934           0 :                           block_index_shift_y * buf1_stride[3];
    9935           0 :                 buf2[3] = buf2[3] + block_index_shift_x +
    9936           0 :                           block_index_shift_y * buf2_stride[3];
    9937           0 :                 buf1[4] = buf1[4] + block_index_shift_x +
    9938           0 :                           block_index_shift_y * buf1_stride[4];
    9939           0 :                 buf2[4] = buf2[4] + block_index_shift_x +
    9940           0 :                           block_index_shift_y * buf2_stride[4];
    9941           0 :                 buf1[5] = buf1[5] + block_index_shift_x +
    9942           0 :                           block_index_shift_y * buf1_stride[5];
    9943           0 :                 buf2[5] = buf2[5] + block_index_shift_x +
    9944           0 :                           block_index_shift_y * buf2_stride[5];
    9945           0 :                 buf1[6] = buf1[6] + block_index_shift_x +
    9946           0 :                           block_index_shift_y * buf1_stride[6];
    9947           0 :                 buf2[6] = buf2[6] + block_index_shift_x +
    9948           0 :                           block_index_shift_y * buf2_stride[6];
    9949           0 :                 buf1[7] = buf1[7] + block_index_shift_x +
    9950           0 :                           block_index_shift_y * buf1_stride[7];
    9951           0 :                 buf2[7] = buf2[7] + block_index_shift_x +
    9952           0 :                           block_index_shift_y * buf2_stride[7];
    9953           0 :                 quarter_pel_refinemnet_block(
    9954             :                     context_ptr,
    9955           0 :                     &context_ptr->p_best_ssd32x16[nidx],
    9956             :                     src_block_index,
    9957             :                     buf1,
    9958             :                     buf1_stride,
    9959             :                     buf2,
    9960             :                     buf2_stride,
    9961             :                     32,
    9962             :                     16,
    9963             :                     x_search_area_origin,
    9964             :                     y_search_area_origin,
    9965             :                     testmv,
    9966           0 :                     &context_ptr->p_best_sad32x16[nidx],
    9967           0 :                     &context_ptr->p_best_mv32x16[nidx],
    9968             :                     it);
    9969             :             }
    9970             :         }
    9971             :     }
    9972             :     // 16x8
    9973           0 :     for (pu_index = 0; pu_index < 32; ++pu_index) {
    9974           0 :         nidx = tab16x8[pu_index];
    9975           0 :         block_index_shift_x = (pu_index & 0x03) << 4;
    9976           0 :         block_index_shift_y = (pu_index >> 2) << 3;
    9977           0 :         x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv16x8[nidx]);
    9978           0 :         y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv16x8[nidx]);
    9979           0 :         best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
    9980           0 :         best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
    9981           0 :         dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
    9982           0 :         dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
    9983           0 :         skip_qp_pel = 0;
    9984           0 :         if ((dis_x) > Q_PEL_SEARCH_WIND)
    9985           0 :             skip_qp_pel = 1;
    9986           0 :         if ((dis_y) > Q_PEL_SEARCH_WIND)
    9987           0 :             skip_qp_pel = 1;
    9988           0 :         if (!skip_qp_pel) {
    9989           0 :             for (it = 0; it < num_qp_it; it++) {
    9990           0 :                 x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
    9991           0 :                 y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
    9992           0 :                 testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
    9993           0 :                 SetQuarterPelRefinementInputsOnTheFly(
    9994             :                     pos_full,
    9995             :                     full_stride,
    9996             :                     pos_b,
    9997             :                     pos_h,
    9998             :                     pos_j,
    9999             :                     context_ptr->interpolated_stride,
   10000             :                     x_mv,
   10001             :                     y_mv,
   10002             :                     buf1,
   10003             :                     buf1_stride,
   10004             :                     buf2,
   10005             :                     buf2_stride);
   10006           0 :                 src_block_index =
   10007           0 :                     block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
   10008           0 :                 buf1[0] = buf1[0] + block_index_shift_x +
   10009           0 :                           block_index_shift_y * buf1_stride[0];
   10010           0 :                 buf2[0] = buf2[0] + block_index_shift_x +
   10011           0 :                           block_index_shift_y * buf2_stride[0];
   10012           0 :                 buf1[1] = buf1[1] + block_index_shift_x +
   10013           0 :                           block_index_shift_y * buf1_stride[1];
   10014           0 :                 buf2[1] = buf2[1] + block_index_shift_x +
   10015           0 :                           block_index_shift_y * buf2_stride[1];
   10016           0 :                 buf1[2] = buf1[2] + block_index_shift_x +
   10017           0 :                           block_index_shift_y * buf1_stride[2];
   10018           0 :                 buf2[2] = buf2[2] + block_index_shift_x +
   10019           0 :                           block_index_shift_y * buf2_stride[2];
   10020           0 :                 buf1[3] = buf1[3] + block_index_shift_x +
   10021           0 :                           block_index_shift_y * buf1_stride[3];
   10022           0 :                 buf2[3] = buf2[3] + block_index_shift_x +
   10023           0 :                           block_index_shift_y * buf2_stride[3];
   10024           0 :                 buf1[4] = buf1[4] + block_index_shift_x +
   10025           0 :                           block_index_shift_y * buf1_stride[4];
   10026           0 :                 buf2[4] = buf2[4] + block_index_shift_x +
   10027           0 :                           block_index_shift_y * buf2_stride[4];
   10028           0 :                 buf1[5] = buf1[5] + block_index_shift_x +
   10029           0 :                           block_index_shift_y * buf1_stride[5];
   10030           0 :                 buf2[5] = buf2[5] + block_index_shift_x +
   10031           0 :                           block_index_shift_y * buf2_stride[5];
   10032           0 :                 buf1[6] = buf1[6] + block_index_shift_x +
   10033           0 :                           block_index_shift_y * buf1_stride[6];
   10034           0 :                 buf2[6] = buf2[6] + block_index_shift_x +
   10035           0 :                           block_index_shift_y * buf2_stride[6];
   10036           0 :                 buf1[7] = buf1[7] + block_index_shift_x +
   10037           0 :                           block_index_shift_y * buf1_stride[7];
   10038           0 :                 buf2[7] = buf2[7] + block_index_shift_x +
   10039           0 :                           block_index_shift_y * buf2_stride[7];
   10040           0 :                 quarter_pel_refinemnet_block(context_ptr,
   10041           0 :                                              &context_ptr->p_best_ssd16x8[nidx],
   10042             :                                              src_block_index,
   10043             :                                              buf1,
   10044             :                                              buf1_stride,
   10045             :                                              buf2,
   10046             :                                              buf2_stride,
   10047             :                                              16,
   10048             :                                              8,
   10049             :                                              x_search_area_origin,
   10050             :                                              y_search_area_origin,
   10051             :                                              testmv,
   10052           0 :                                              &context_ptr->p_best_sad16x8[nidx],
   10053           0 :                                              &context_ptr->p_best_mv16x8[nidx],
   10054             :                                              it);
   10055             :             }
   10056             :         }
   10057             :     }
   10058             :     // 32x64
   10059           0 :     for (pu_index = 0; pu_index < 2; ++pu_index) {
   10060           0 :         block_index_shift_x = pu_index << 5;
   10061           0 :         block_index_shift_y = 0;
   10062           0 :         x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv32x64[pu_index]);
   10063           0 :         y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv32x64[pu_index]);
   10064           0 :         best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
   10065           0 :         best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
   10066           0 :         dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
   10067           0 :         dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
   10068           0 :         skip_qp_pel = 0;
   10069           0 :         if ((dis_x) > Q_PEL_SEARCH_WIND)
   10070           0 :             skip_qp_pel = 1;
   10071           0 :         if ((dis_y) > Q_PEL_SEARCH_WIND)
   10072           0 :             skip_qp_pel = 1;
   10073           0 :         if (!skip_qp_pel) {
   10074           0 :             for (it = 0; it < num_qp_it; it++) {
   10075           0 :                 x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
   10076           0 :                 y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
   10077           0 :                 testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
   10078           0 :                 SetQuarterPelRefinementInputsOnTheFly(
   10079             :                     pos_full,
   10080             :                     full_stride,
   10081             :                     pos_b,
   10082             :                     pos_h,
   10083             :                     pos_j,
   10084             :                     context_ptr->interpolated_stride,
   10085             :                     x_mv,
   10086             :                     y_mv,
   10087             :                     buf1,
   10088             :                     buf1_stride,
   10089             :                     buf2,
   10090             :                     buf2_stride);
   10091           0 :                 src_block_index =
   10092           0 :                     block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
   10093           0 :                 buf1[0] = buf1[0] + block_index_shift_x +
   10094           0 :                           block_index_shift_y * buf1_stride[0];
   10095           0 :                 buf2[0] = buf2[0] + block_index_shift_x +
   10096           0 :                           block_index_shift_y * buf2_stride[0];
   10097           0 :                 buf1[1] = buf1[1] + block_index_shift_x +
   10098           0 :                           block_index_shift_y * buf1_stride[1];
   10099           0 :                 buf2[1] = buf2[1] + block_index_shift_x +
   10100           0 :                           block_index_shift_y * buf2_stride[1];
   10101           0 :                 buf1[2] = buf1[2] + block_index_shift_x +
   10102           0 :                           block_index_shift_y * buf1_stride[2];
   10103           0 :                 buf2[2] = buf2[2] + block_index_shift_x +
   10104           0 :                           block_index_shift_y * buf2_stride[2];
   10105           0 :                 buf1[3] = buf1[3] + block_index_shift_x +
   10106           0 :                           block_index_shift_y * buf1_stride[3];
   10107           0 :                 buf2[3] = buf2[3] + block_index_shift_x +
   10108           0 :                           block_index_shift_y * buf2_stride[3];
   10109           0 :                 buf1[4] = buf1[4] + block_index_shift_x +
   10110           0 :                           block_index_shift_y * buf1_stride[4];
   10111           0 :                 buf2[4] = buf2[4] + block_index_shift_x +
   10112           0 :                           block_index_shift_y * buf2_stride[4];
   10113           0 :                 buf1[5] = buf1[5] + block_index_shift_x +
   10114           0 :                           block_index_shift_y * buf1_stride[5];
   10115           0 :                 buf2[5] = buf2[5] + block_index_shift_x +
   10116           0 :                           block_index_shift_y * buf2_stride[5];
   10117           0 :                 buf1[6] = buf1[6] + block_index_shift_x +
   10118           0 :                           block_index_shift_y * buf1_stride[6];
   10119           0 :                 buf2[6] = buf2[6] + block_index_shift_x +
   10120           0 :                           block_index_shift_y * buf2_stride[6];
   10121           0 :                 buf1[7] = buf1[7] + block_index_shift_x +
   10122           0 :                           block_index_shift_y * buf1_stride[7];
   10123           0 :                 buf2[7] = buf2[7] + block_index_shift_x +
   10124           0 :                           block_index_shift_y * buf2_stride[7];
   10125           0 :                 quarter_pel_refinemnet_block(
   10126             :                     context_ptr,
   10127           0 :                     &context_ptr->p_best_ssd32x64[pu_index],
   10128             :                     src_block_index,
   10129             :                     buf1,
   10130             :                     buf1_stride,
   10131             :                     buf2,
   10132             :                     buf2_stride,
   10133             :                     32,
   10134             :                     64,
   10135             :                     x_search_area_origin,
   10136             :                     y_search_area_origin,
   10137             :                     testmv,
   10138           0 :                     &context_ptr->p_best_sad32x64[pu_index],
   10139           0 :                     &context_ptr->p_best_mv32x64[pu_index],
   10140             :                     it);
   10141             :             }
   10142             :         }
   10143             :     }
   10144             :     // 16x32
   10145           0 :     for (pu_index = 0; pu_index < 8; ++pu_index) {
   10146           0 :         nidx = tab16x32[pu_index];
   10147           0 :         block_index_shift_x = (pu_index & 0x03) << 4;
   10148           0 :         block_index_shift_y = (pu_index >> 2) << 5;
   10149           0 :         x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv16x32[nidx]);
   10150           0 :         y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv16x32[nidx]);
   10151           0 :         best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
   10152           0 :         best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
   10153           0 :         dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
   10154           0 :         dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
   10155           0 :         skip_qp_pel = 0;
   10156           0 :         if ((dis_x) > Q_PEL_SEARCH_WIND)
   10157           0 :             skip_qp_pel = 1;
   10158           0 :         if ((dis_y) > Q_PEL_SEARCH_WIND)
   10159           0 :             skip_qp_pel = 1;
   10160           0 :         if (!skip_qp_pel) {
   10161           0 :             for (it = 0; it < num_qp_it; it++) {
   10162           0 :                 x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
   10163           0 :                 y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
   10164           0 :                 testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
   10165           0 :                 SetQuarterPelRefinementInputsOnTheFly(
   10166             :                     pos_full,
   10167             :                     full_stride,
   10168             :                     pos_b,
   10169             :                     pos_h,
   10170             :                     pos_j,
   10171             :                     context_ptr->interpolated_stride,
   10172             :                     x_mv,
   10173             :                     y_mv,
   10174             :                     buf1,
   10175             :                     buf1_stride,
   10176             :                     buf2,
   10177             :                     buf2_stride);
   10178           0 :                 src_block_index =
   10179           0 :                     block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
   10180           0 :                 buf1[0] = buf1[0] + block_index_shift_x +
   10181           0 :                           block_index_shift_y * buf1_stride[0];
   10182           0 :                 buf2[0] = buf2[0] + block_index_shift_x +
   10183           0 :                           block_index_shift_y * buf2_stride[0];
   10184           0 :                 buf1[1] = buf1[1] + block_index_shift_x +
   10185           0 :                           block_index_shift_y * buf1_stride[1];
   10186           0 :                 buf2[1] = buf2[1] + block_index_shift_x +
   10187           0 :                           block_index_shift_y * buf2_stride[1];
   10188           0 :                 buf1[2] = buf1[2] + block_index_shift_x +
   10189           0 :                           block_index_shift_y * buf1_stride[2];
   10190           0 :                 buf2[2] = buf2[2] + block_index_shift_x +
   10191           0 :                           block_index_shift_y * buf2_stride[2];
   10192           0 :                 buf1[3] = buf1[3] + block_index_shift_x +
   10193           0 :                           block_index_shift_y * buf1_stride[3];
   10194           0 :                 buf2[3] = buf2[3] + block_index_shift_x +
   10195           0 :                           block_index_shift_y * buf2_stride[3];
   10196           0 :                 buf1[4] = buf1[4] + block_index_shift_x +
   10197           0 :                           block_index_shift_y * buf1_stride[4];
   10198           0 :                 buf2[4] = buf2[4] + block_index_shift_x +
   10199           0 :                           block_index_shift_y * buf2_stride[4];
   10200           0 :                 buf1[5] = buf1[5] + block_index_shift_x +
   10201           0 :                           block_index_shift_y * buf1_stride[5];
   10202           0 :                 buf2[5] = buf2[5] + block_index_shift_x +
   10203           0 :                           block_index_shift_y * buf2_stride[5];
   10204           0 :                 buf1[6] = buf1[6] + block_index_shift_x +
   10205           0 :                           block_index_shift_y * buf1_stride[6];
   10206           0 :                 buf2[6] = buf2[6] + block_index_shift_x +
   10207           0 :                           block_index_shift_y * buf2_stride[6];
   10208           0 :                 buf1[7] = buf1[7] + block_index_shift_x +
   10209           0 :                           block_index_shift_y * buf1_stride[7];
   10210           0 :                 buf2[7] = buf2[7] + block_index_shift_x +
   10211           0 :                           block_index_shift_y * buf2_stride[7];
   10212           0 :                 quarter_pel_refinemnet_block(
   10213             :                     context_ptr,
   10214           0 :                     &context_ptr->p_best_ssd16x32[nidx],
   10215             :                     src_block_index,
   10216             :                     buf1,
   10217             :                     buf1_stride,
   10218             :                     buf2,
   10219             :                     buf2_stride,
   10220             :                     16,
   10221             :                     32,
   10222             :                     x_search_area_origin,
   10223             :                     y_search_area_origin,
   10224             :                     testmv,
   10225           0 :                     &context_ptr->p_best_sad16x32[nidx],
   10226           0 :                     &context_ptr->p_best_mv16x32[nidx],
   10227             :                     it);
   10228             :             }
   10229             :         }
   10230             :     }
   10231             :     // 8x16
   10232           0 :     for (pu_index = 0; pu_index < 32; ++pu_index) {
   10233           0 :         nidx = tab8x16[pu_index];
   10234           0 :         block_index_shift_x = (pu_index & 0x07) << 3;
   10235           0 :         block_index_shift_y = (pu_index >> 3) << 4;
   10236           0 :         x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv8x16[nidx]);
   10237           0 :         y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv8x16[nidx]);
   10238           0 :         best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
   10239           0 :         best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
   10240           0 :         dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
   10241           0 :         dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
   10242           0 :         skip_qp_pel = 0;
   10243           0 :         if ((dis_x) > Q_PEL_SEARCH_WIND)
   10244           0 :             skip_qp_pel = 1;
   10245           0 :         if ((dis_y) > Q_PEL_SEARCH_WIND)
   10246           0 :             skip_qp_pel = 1;
   10247           0 :         if (!skip_qp_pel) {
   10248           0 :             for (it = 0; it < num_qp_it; it++) {
   10249           0 :                 x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
   10250           0 :                 y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
   10251           0 :                 testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
   10252           0 :                 SetQuarterPelRefinementInputsOnTheFly(
   10253             :                     pos_full,
   10254             :                     full_stride,
   10255             :                     pos_b,
   10256             :                     pos_h,
   10257             :                     pos_j,
   10258             :                     context_ptr->interpolated_stride,
   10259             :                     x_mv,
   10260             :                     y_mv,
   10261             :                     buf1,
   10262             :                     buf1_stride,
   10263             :                     buf2,
   10264             :                     buf2_stride);
   10265           0 :                 src_block_index =
   10266           0 :                     block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
   10267           0 :                 buf1[0] = buf1[0] + block_index_shift_x +
   10268           0 :                           block_index_shift_y * buf1_stride[0];
   10269           0 :                 buf2[0] = buf2[0] + block_index_shift_x +
   10270           0 :                           block_index_shift_y * buf2_stride[0];
   10271           0 :                 buf1[1] = buf1[1] + block_index_shift_x +
   10272           0 :                           block_index_shift_y * buf1_stride[1];
   10273           0 :                 buf2[1] = buf2[1] + block_index_shift_x +
   10274           0 :                           block_index_shift_y * buf2_stride[1];
   10275           0 :                 buf1[2] = buf1[2] + block_index_shift_x +
   10276           0 :                           block_index_shift_y * buf1_stride[2];
   10277           0 :                 buf2[2] = buf2[2] + block_index_shift_x +
   10278           0 :                           block_index_shift_y * buf2_stride[2];
   10279           0 :                 buf1[3] = buf1[3] + block_index_shift_x +
   10280           0 :                           block_index_shift_y * buf1_stride[3];
   10281           0 :                 buf2[3] = buf2[3] + block_index_shift_x +
   10282           0 :                           block_index_shift_y * buf2_stride[3];
   10283           0 :                 buf1[4] = buf1[4] + block_index_shift_x +
   10284           0 :                           block_index_shift_y * buf1_stride[4];
   10285           0 :                 buf2[4] = buf2[4] + block_index_shift_x +
   10286           0 :                           block_index_shift_y * buf2_stride[4];
   10287           0 :                 buf1[5] = buf1[5] + block_index_shift_x +
   10288           0 :                           block_index_shift_y * buf1_stride[5];
   10289           0 :                 buf2[5] = buf2[5] + block_index_shift_x +
   10290           0 :                           block_index_shift_y * buf2_stride[5];
   10291           0 :                 buf1[6] = buf1[6] + block_index_shift_x +
   10292           0 :                           block_index_shift_y * buf1_stride[6];
   10293           0 :                 buf2[6] = buf2[6] + block_index_shift_x +
   10294           0 :                           block_index_shift_y * buf2_stride[6];
   10295           0 :                 buf1[7] = buf1[7] + block_index_shift_x +
   10296           0 :                           block_index_shift_y * buf1_stride[7];
   10297           0 :                 buf2[7] = buf2[7] + block_index_shift_x +
   10298           0 :                           block_index_shift_y * buf2_stride[7];
   10299           0 :                 quarter_pel_refinemnet_block(context_ptr,
   10300           0 :                                              &context_ptr->p_best_ssd8x16[nidx],
   10301             :                                              src_block_index,
   10302             :                                              buf1,
   10303             :                                              buf1_stride,
   10304             :                                              buf2,
   10305             :                                              buf2_stride,
   10306             :                                              8,
   10307             :                                              16,
   10308             :                                              x_search_area_origin,
   10309             :                                              y_search_area_origin,
   10310             :                                              testmv,
   10311           0 :                                              &context_ptr->p_best_sad8x16[nidx],
   10312           0 :                                              &context_ptr->p_best_mv8x16[nidx],
   10313             :                                              it);
   10314             :             }
   10315             :         }
   10316             :     }
   10317             :     // 32x8
   10318           0 :     for (pu_index = 0; pu_index < 16; ++pu_index) {
   10319           0 :         nidx = tab32x8[pu_index];
   10320           0 :         block_index_shift_x = (pu_index & 0x01) << 5;
   10321           0 :         block_index_shift_y = (pu_index >> 1) << 3;
   10322           0 :         x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv32x8[nidx]);
   10323           0 :         y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv32x8[nidx]);
   10324           0 :         best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
   10325           0 :         best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
   10326           0 :         dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
   10327           0 :         dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
   10328           0 :         skip_qp_pel = 0;
   10329           0 :         if ((dis_x) > Q_PEL_SEARCH_WIND)
   10330           0 :             skip_qp_pel = 1;
   10331           0 :         if ((dis_y) > Q_PEL_SEARCH_WIND)
   10332           0 :             skip_qp_pel = 1;
   10333           0 :         if (!skip_qp_pel) {
   10334           0 :             for (it = 0; it < num_qp_it; it++) {
   10335           0 :                 x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
   10336           0 :                 y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
   10337           0 :                 testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
   10338           0 :                 SetQuarterPelRefinementInputsOnTheFly(
   10339             :                     pos_full,
   10340             :                     full_stride,
   10341             :                     pos_b,
   10342             :                     pos_h,
   10343             :                     pos_j,
   10344             :                     context_ptr->interpolated_stride,
   10345             :                     x_mv,
   10346             :                     y_mv,
   10347             :                     buf1,
   10348             :                     buf1_stride,
   10349             :                     buf2,
   10350             :                     buf2_stride);
   10351           0 :                 src_block_index =
   10352           0 :                     block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
   10353           0 :                 buf1[0] = buf1[0] + block_index_shift_x +
   10354           0 :                           block_index_shift_y * buf1_stride[0];
   10355           0 :                 buf2[0] = buf2[0] + block_index_shift_x +
   10356           0 :                           block_index_shift_y * buf2_stride[0];
   10357           0 :                 buf1[1] = buf1[1] + block_index_shift_x +
   10358           0 :                           block_index_shift_y * buf1_stride[1];
   10359           0 :                 buf2[1] = buf2[1] + block_index_shift_x +
   10360           0 :                           block_index_shift_y * buf2_stride[1];
   10361           0 :                 buf1[2] = buf1[2] + block_index_shift_x +
   10362           0 :                           block_index_shift_y * buf1_stride[2];
   10363           0 :                 buf2[2] = buf2[2] + block_index_shift_x +
   10364           0 :                           block_index_shift_y * buf2_stride[2];
   10365           0 :                 buf1[3] = buf1[3] + block_index_shift_x +
   10366           0 :                           block_index_shift_y * buf1_stride[3];
   10367           0 :                 buf2[3] = buf2[3] + block_index_shift_x +
   10368           0 :                           block_index_shift_y * buf2_stride[3];
   10369           0 :                 buf1[4] = buf1[4] + block_index_shift_x +
   10370           0 :                           block_index_shift_y * buf1_stride[4];
   10371           0 :                 buf2[4] = buf2[4] + block_index_shift_x +
   10372           0 :                           block_index_shift_y * buf2_stride[4];
   10373           0 :                 buf1[5] = buf1[5] + block_index_shift_x +
   10374           0 :                           block_index_shift_y * buf1_stride[5];
   10375           0 :                 buf2[5] = buf2[5] + block_index_shift_x +
   10376           0 :                           block_index_shift_y * buf2_stride[5];
   10377           0 :                 buf1[6] = buf1[6] + block_index_shift_x +
   10378           0 :                           block_index_shift_y * buf1_stride[6];
   10379           0 :                 buf2[6] = buf2[6] + block_index_shift_x +
   10380           0 :                           block_index_shift_y * buf2_stride[6];
   10381           0 :                 buf1[7] = buf1[7] + block_index_shift_x +
   10382           0 :                           block_index_shift_y * buf1_stride[7];
   10383           0 :                 buf2[7] = buf2[7] + block_index_shift_x +
   10384           0 :                           block_index_shift_y * buf2_stride[7];
   10385           0 :                 quarter_pel_refinemnet_block(context_ptr,
   10386           0 :                                              &context_ptr->p_best_ssd32x8[nidx],
   10387             :                                              src_block_index,
   10388             :                                              buf1,
   10389             :                                              buf1_stride,
   10390             :                                              buf2,
   10391             :                                              buf2_stride,
   10392             :                                              32,
   10393             :                                              8,
   10394             :                                              x_search_area_origin,
   10395             :                                              y_search_area_origin,
   10396             :                                              testmv,
   10397           0 :                                              &context_ptr->p_best_sad32x8[nidx],
   10398           0 :                                              &context_ptr->p_best_mv32x8[nidx],
   10399             :                                              it);
   10400             :             }
   10401             :         }
   10402             :     }
   10403             : 
   10404             :     // 8x32
   10405           0 :     for (pu_index = 0; pu_index < 16; ++pu_index) {
   10406           0 :         nidx = tab8x32[pu_index];
   10407           0 :         block_index_shift_x = (pu_index & 0x07) << 3;
   10408           0 :         block_index_shift_y = (pu_index >> 3) << 5;
   10409           0 :         x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv8x32[nidx]);
   10410           0 :         y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv8x32[nidx]);
   10411           0 :         best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
   10412           0 :         best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
   10413           0 :         dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
   10414           0 :         dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
   10415           0 :         skip_qp_pel = 0;
   10416           0 :         if ((dis_x) > Q_PEL_SEARCH_WIND)
   10417           0 :             skip_qp_pel = 1;
   10418           0 :         if ((dis_y) > Q_PEL_SEARCH_WIND)
   10419           0 :             skip_qp_pel = 1;
   10420           0 :         if (!skip_qp_pel) {
   10421           0 :             for (it = 0; it < num_qp_it; it++) {
   10422           0 :                 x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
   10423           0 :                 y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
   10424           0 :                 testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
   10425           0 :                 SetQuarterPelRefinementInputsOnTheFly(
   10426             :                     pos_full,
   10427             :                     full_stride,
   10428             :                     pos_b,
   10429             :                     pos_h,
   10430             :                     pos_j,
   10431             :                     context_ptr->interpolated_stride,
   10432             :                     x_mv,
   10433             :                     y_mv,
   10434             :                     buf1,
   10435             :                     buf1_stride,
   10436             :                     buf2,
   10437             :                     buf2_stride);
   10438           0 :                 src_block_index =
   10439           0 :                     block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
   10440           0 :                 buf1[0] = buf1[0] + block_index_shift_x +
   10441           0 :                           block_index_shift_y * buf1_stride[0];
   10442           0 :                 buf2[0] = buf2[0] + block_index_shift_x +
   10443           0 :                           block_index_shift_y * buf2_stride[0];
   10444           0 :                 buf1[1] = buf1[1] + block_index_shift_x +
   10445           0 :                           block_index_shift_y * buf1_stride[1];
   10446           0 :                 buf2[1] = buf2[1] + block_index_shift_x +
   10447           0 :                           block_index_shift_y * buf2_stride[1];
   10448           0 :                 buf1[2] = buf1[2] + block_index_shift_x +
   10449           0 :                           block_index_shift_y * buf1_stride[2];
   10450           0 :                 buf2[2] = buf2[2] + block_index_shift_x +
   10451           0 :                           block_index_shift_y * buf2_stride[2];
   10452           0 :                 buf1[3] = buf1[3] + block_index_shift_x +
   10453           0 :                           block_index_shift_y * buf1_stride[3];
   10454           0 :                 buf2[3] = buf2[3] + block_index_shift_x +
   10455           0 :                           block_index_shift_y * buf2_stride[3];
   10456           0 :                 buf1[4] = buf1[4] + block_index_shift_x +
   10457           0 :                           block_index_shift_y * buf1_stride[4];
   10458           0 :                 buf2[4] = buf2[4] + block_index_shift_x +
   10459           0 :                           block_index_shift_y * buf2_stride[4];
   10460           0 :                 buf1[5] = buf1[5] + block_index_shift_x +
   10461           0 :                           block_index_shift_y * buf1_stride[5];
   10462           0 :                 buf2[5] = buf2[5] + block_index_shift_x +
   10463           0 :                           block_index_shift_y * buf2_stride[5];
   10464           0 :                 buf1[6] = buf1[6] + block_index_shift_x +
   10465           0 :                           block_index_shift_y * buf1_stride[6];
   10466           0 :                 buf2[6] = buf2[6] + block_index_shift_x +
   10467           0 :                           block_index_shift_y * buf2_stride[6];
   10468           0 :                 buf1[7] = buf1[7] + block_index_shift_x +
   10469           0 :                           block_index_shift_y * buf1_stride[7];
   10470           0 :                 buf2[7] = buf2[7] + block_index_shift_x +
   10471           0 :                           block_index_shift_y * buf2_stride[7];
   10472           0 :                 quarter_pel_refinemnet_block(context_ptr,
   10473           0 :                                              &context_ptr->p_best_ssd8x32[nidx],
   10474             :                                              src_block_index,
   10475             :                                              buf1,
   10476             :                                              buf1_stride,
   10477             :                                              buf2,
   10478             :                                              buf2_stride,
   10479             :                                              8,
   10480             :                                              32,
   10481             :                                              x_search_area_origin,
   10482             :                                              y_search_area_origin,
   10483             :                                              testmv,
   10484           0 :                                              &context_ptr->p_best_sad8x32[nidx],
   10485           0 :                                              &context_ptr->p_best_mv8x32[nidx],
   10486             :                                              it);
   10487             :             }
   10488             :         }
   10489             :     }
   10490             : 
   10491             :     // 64x16
   10492           0 :     for (pu_index = 0; pu_index < 4; ++pu_index) {
   10493           0 :         nidx = pu_index;
   10494           0 :         block_index_shift_x = 0;
   10495           0 :         block_index_shift_y = pu_index << 4;
   10496           0 :         x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv64x16[nidx]);
   10497           0 :         y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv64x16[nidx]);
   10498           0 :         best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
   10499           0 :         best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
   10500           0 :         dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
   10501           0 :         dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
   10502           0 :         skip_qp_pel = 0;
   10503           0 :         if ((dis_x) > Q_PEL_SEARCH_WIND)
   10504           0 :             skip_qp_pel = 1;
   10505           0 :         if ((dis_y) > Q_PEL_SEARCH_WIND)
   10506           0 :             skip_qp_pel = 1;
   10507           0 :         if (!skip_qp_pel) {
   10508           0 :             for (it = 0; it < num_qp_it; it++) {
   10509           0 :                 x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
   10510           0 :                 y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
   10511           0 :                 testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
   10512           0 :                 SetQuarterPelRefinementInputsOnTheFly(
   10513             :                     pos_full,
   10514             :                     full_stride,
   10515             :                     pos_b,
   10516             :                     pos_h,
   10517             :                     pos_j,
   10518             :                     context_ptr->interpolated_stride,
   10519             :                     x_mv,
   10520             :                     y_mv,
   10521             :                     buf1,
   10522             :                     buf1_stride,
   10523             :                     buf2,
   10524             :                     buf2_stride);
   10525           0 :                 src_block_index =
   10526           0 :                     block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
   10527           0 :                 buf1[0] = buf1[0] + block_index_shift_x +
   10528           0 :                           block_index_shift_y * buf1_stride[0];
   10529           0 :                 buf2[0] = buf2[0] + block_index_shift_x +
   10530           0 :                           block_index_shift_y * buf2_stride[0];
   10531           0 :                 buf1[1] = buf1[1] + block_index_shift_x +
   10532           0 :                           block_index_shift_y * buf1_stride[1];
   10533           0 :                 buf2[1] = buf2[1] + block_index_shift_x +
   10534           0 :                           block_index_shift_y * buf2_stride[1];
   10535           0 :                 buf1[2] = buf1[2] + block_index_shift_x +
   10536           0 :                           block_index_shift_y * buf1_stride[2];
   10537           0 :                 buf2[2] = buf2[2] + block_index_shift_x +
   10538           0 :                           block_index_shift_y * buf2_stride[2];
   10539           0 :                 buf1[3] = buf1[3] + block_index_shift_x +
   10540           0 :                           block_index_shift_y * buf1_stride[3];
   10541           0 :                 buf2[3] = buf2[3] + block_index_shift_x +
   10542           0 :                           block_index_shift_y * buf2_stride[3];
   10543           0 :                 buf1[4] = buf1[4] + block_index_shift_x +
   10544           0 :                           block_index_shift_y * buf1_stride[4];
   10545           0 :                 buf2[4] = buf2[4] + block_index_shift_x +
   10546           0 :                           block_index_shift_y * buf2_stride[4];
   10547           0 :                 buf1[5] = buf1[5] + block_index_shift_x +
   10548           0 :                           block_index_shift_y * buf1_stride[5];
   10549           0 :                 buf2[5] = buf2[5] + block_index_shift_x +
   10550           0 :                           block_index_shift_y * buf2_stride[5];
   10551           0 :                 buf1[6] = buf1[6] + block_index_shift_x +
   10552           0 :                           block_index_shift_y * buf1_stride[6];
   10553           0 :                 buf2[6] = buf2[6] + block_index_shift_x +
   10554           0 :                           block_index_shift_y * buf2_stride[6];
   10555           0 :                 buf1[7] = buf1[7] + block_index_shift_x +
   10556           0 :                           block_index_shift_y * buf1_stride[7];
   10557           0 :                 buf2[7] = buf2[7] + block_index_shift_x +
   10558           0 :                           block_index_shift_y * buf2_stride[7];
   10559           0 :                 quarter_pel_refinemnet_block(
   10560             :                     context_ptr,
   10561           0 :                     &context_ptr->p_best_ssd64x16[nidx],
   10562             :                     src_block_index,
   10563             :                     buf1,
   10564             :                     buf1_stride,
   10565             :                     buf2,
   10566             :                     buf2_stride,
   10567             :                     64,
   10568             :                     16,
   10569             :                     x_search_area_origin,
   10570             :                     y_search_area_origin,
   10571             :                     testmv,
   10572           0 :                     &context_ptr->p_best_sad64x16[nidx],
   10573           0 :                     &context_ptr->p_best_mv64x16[nidx],
   10574             :                     it);
   10575             :             }
   10576             :         }
   10577             :     }
   10578             :     // 16x64
   10579           0 :     for (pu_index = 0; pu_index < 4; ++pu_index) {
   10580           0 :         nidx = pu_index;
   10581           0 :         block_index_shift_x = pu_index << 4;
   10582           0 :         block_index_shift_y = 0;
   10583           0 :         x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv16x64[nidx]);
   10584           0 :         y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv16x64[nidx]);
   10585           0 :         best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
   10586           0 :         best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
   10587           0 :         dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
   10588           0 :         dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
   10589           0 :         skip_qp_pel = 0;
   10590           0 :         if ((dis_x) > Q_PEL_SEARCH_WIND)
   10591           0 :             skip_qp_pel = 1;
   10592           0 :         if ((dis_y) > Q_PEL_SEARCH_WIND)
   10593           0 :             skip_qp_pel = 1;
   10594           0 :         if (!skip_qp_pel) {
   10595           0 :             for (it = 0; it < num_qp_it; it++) {
   10596           0 :                 x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
   10597           0 :                 y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
   10598           0 :                 testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
   10599           0 :                 SetQuarterPelRefinementInputsOnTheFly(
   10600             :                     pos_full,
   10601             :                     full_stride,
   10602             :                     pos_b,
   10603             :                     pos_h,
   10604             :                     pos_j,
   10605             :                     context_ptr->interpolated_stride,
   10606             :                     x_mv,
   10607             :                     y_mv,
   10608             :                     buf1,
   10609             :                     buf1_stride,
   10610             :                     buf2,
   10611             :                     buf2_stride);
   10612           0 :                 src_block_index =
   10613           0 :                     block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
   10614           0 :                 buf1[0] = buf1[0] + block_index_shift_x +
   10615           0 :                           block_index_shift_y * buf1_stride[0];
   10616           0 :                 buf2[0] = buf2[0] + block_index_shift_x +
   10617           0 :                           block_index_shift_y * buf2_stride[0];
   10618           0 :                 buf1[1] = buf1[1] + block_index_shift_x +
   10619           0 :                           block_index_shift_y * buf1_stride[1];
   10620           0 :                 buf2[1] = buf2[1] + block_index_shift_x +
   10621           0 :                           block_index_shift_y * buf2_stride[1];
   10622           0 :                 buf1[2] = buf1[2] + block_index_shift_x +
   10623           0 :                           block_index_shift_y * buf1_stride[2];
   10624           0 :                 buf2[2] = buf2[2] + block_index_shift_x +
   10625           0 :                           block_index_shift_y * buf2_stride[2];
   10626           0 :                 buf1[3] = buf1[3] + block_index_shift_x +
   10627           0 :                           block_index_shift_y * buf1_stride[3];
   10628           0 :                 buf2[3] = buf2[3] + block_index_shift_x +
   10629           0 :                           block_index_shift_y * buf2_stride[3];
   10630           0 :                 buf1[4] = buf1[4] + block_index_shift_x +
   10631           0 :                           block_index_shift_y * buf1_stride[4];
   10632           0 :                 buf2[4] = buf2[4] + block_index_shift_x +
   10633           0 :                           block_index_shift_y * buf2_stride[4];
   10634           0 :                 buf1[5] = buf1[5] + block_index_shift_x +
   10635           0 :                           block_index_shift_y * buf1_stride[5];
   10636           0 :                 buf2[5] = buf2[5] + block_index_shift_x +
   10637           0 :                           block_index_shift_y * buf2_stride[5];
   10638           0 :                 buf1[6] = buf1[6] + block_index_shift_x +
   10639           0 :                           block_index_shift_y * buf1_stride[6];
   10640           0 :                 buf2[6] = buf2[6] + block_index_shift_x +
   10641           0 :                           block_index_shift_y * buf2_stride[6];
   10642           0 :                 buf1[7] = buf1[7] + block_index_shift_x +
   10643           0 :                           block_index_shift_y * buf1_stride[7];
   10644           0 :                 buf2[7] = buf2[7] + block_index_shift_x +
   10645           0 :                           block_index_shift_y * buf2_stride[7];
   10646           0 :                 quarter_pel_refinemnet_block(
   10647             :                     context_ptr,
   10648           0 :                     &context_ptr->p_best_ssd16x64[nidx],
   10649             :                     src_block_index,
   10650             :                     buf1,
   10651             :                     buf1_stride,
   10652             :                     buf2,
   10653             :                     buf2_stride,
   10654             :                     16,
   10655             :                     64,
   10656             :                     x_search_area_origin,
   10657             :                     y_search_area_origin,
   10658             :                     testmv,
   10659           0 :                     &context_ptr->p_best_sad16x64[nidx],
   10660           0 :                     &context_ptr->p_best_mv16x64[nidx],
   10661             :                     it);
   10662             :             }
   10663             :         }
   10664             :     }
   10665           0 :     return;
   10666             : }
   10667           0 : void HmeOneQuadrantLevel0(
   10668             :     PictureParentControlSet *picture_control_set_ptr,
   10669             :     MeContext *context_ptr,  // input/output parameter, ME context Ptr, used to
   10670             :                              // get/update ME results
   10671             :     int16_t origin_x,        // input parameter, SB position in the horizontal
   10672             :                              // direction- sixteenth resolution
   10673             :     int16_t origin_y,        // input parameter, SB position in the vertical
   10674             :                              // direction- sixteenth resolution
   10675             :     uint32_t sb_width,   // input parameter, SB pwidth - sixteenth resolution
   10676             :     uint32_t sb_height,  // input parameter, SB height - sixteenth resolution
   10677             :     int16_t xHmeSearchCenter,  // input parameter, HME search center in the
   10678             :                                // horizontal direction
   10679             :     int16_t yHmeSearchCenter,  // input parameter, HME search center in the
   10680             :                                // vertical direction
   10681             :     EbPictureBufferDesc *
   10682             :         sixteenthRefPicPtr,  // input parameter, sixteenth reference Picture Ptr
   10683             :     uint64_t *level0BestSad,       // output parameter, Level0 SAD at
   10684             :                                    // (searchRegionNumberInWidth,
   10685             :                                    // searchRegionNumberInHeight)
   10686             :     int16_t *xLevel0SearchCenter,  // output parameter, Level0 xMV at
   10687             :                                    // (searchRegionNumberInWidth,
   10688             :                                    // searchRegionNumberInHeight)
   10689             :     int16_t *yLevel0SearchCenter,  // output parameter, Level0 yMV at
   10690             :                                    // (searchRegionNumberInWidth,
   10691             :                                    // searchRegionNumberInHeight)
   10692             :     uint32_t searchAreaMultiplierX,
   10693             :     uint32_t searchAreaMultiplierY)
   10694             : {
   10695             :     int16_t xTopLeftSearchRegion;
   10696             :     int16_t yTopLeftSearchRegion;
   10697             :     uint32_t searchRegionIndex;
   10698             :     int16_t x_search_area_origin;
   10699             :     int16_t y_search_area_origin;
   10700             :     int16_t xSearchRegionDistance;
   10701             :     int16_t ySearchRegionDistance;
   10702             : 
   10703             :     int16_t padWidth;
   10704             :     int16_t padHeight;
   10705             : 
   10706             :     (void)picture_control_set_ptr;
   10707             :     // Round up x_HME_L0 to be a multiple of 16
   10708           0 :     int16_t search_area_width =
   10709           0 :         (int16_t)((((((context_ptr->hme_level0_total_search_area_width *
   10710           0 :                        searchAreaMultiplierX) /
   10711           0 :                       100))) +
   10712           0 :                    15) &
   10713             :                   ~0x0F);
   10714           0 :     int16_t search_area_height =
   10715           0 :         (int16_t)(((context_ptr->hme_level0_total_search_area_height *
   10716           0 :                     searchAreaMultiplierY) /
   10717             :                    100));
   10718           0 :     xSearchRegionDistance = xHmeSearchCenter;
   10719           0 :     ySearchRegionDistance = yHmeSearchCenter;
   10720           0 :     padWidth = (int16_t)(sixteenthRefPicPtr->origin_x) - 1;
   10721           0 :     padHeight = (int16_t)(sixteenthRefPicPtr->origin_y) - 1;
   10722             : 
   10723           0 :     x_search_area_origin =
   10724           0 :         -(int16_t)(search_area_width >> 1) + xSearchRegionDistance;
   10725           0 :     y_search_area_origin =
   10726           0 :         -(int16_t)(search_area_height >> 1) + ySearchRegionDistance;
   10727             : 
   10728             :     // Correct the left edge of the Search Area if it is not on the reference
   10729             :     // Picture
   10730           0 :     x_search_area_origin = ((origin_x + x_search_area_origin) < -padWidth)
   10731           0 :                                ? -padWidth - origin_x
   10732             :                                : x_search_area_origin;
   10733             : 
   10734           0 :     search_area_width =
   10735           0 :         ((origin_x + x_search_area_origin) < -padWidth)
   10736           0 :             ? search_area_width -
   10737           0 :                   (-padWidth - (origin_x + x_search_area_origin))
   10738             :             : search_area_width;
   10739             : 
   10740             :     // Correct the right edge of the Search Area if its not on the reference
   10741             :     // Picture
   10742           0 :     x_search_area_origin =
   10743           0 :         ((origin_x + x_search_area_origin) >
   10744           0 :          (int16_t)sixteenthRefPicPtr->width - 1)
   10745           0 :             ? x_search_area_origin - ((origin_x + x_search_area_origin) -
   10746           0 :                                       ((int16_t)sixteenthRefPicPtr->width - 1))
   10747             :             : x_search_area_origin;
   10748             : 
   10749           0 :     search_area_width =
   10750           0 :         ((origin_x + x_search_area_origin + search_area_width) >
   10751           0 :          (int16_t)sixteenthRefPicPtr->width)
   10752           0 :             ? MAX(1,
   10753             :                   search_area_width -
   10754             :                       ((origin_x + x_search_area_origin + search_area_width) -
   10755             :                        (int16_t)sixteenthRefPicPtr->width))
   10756             :             : search_area_width;
   10757             : 
   10758             :     // Round down x_HME to be a multiple of 16 as cropping already performed
   10759           0 :     search_area_width = (search_area_width < 16) ? search_area_width
   10760             :                                                  : search_area_width & ~0x0F;
   10761             :     // Correct the top edge of the Search Area if it is not on the reference
   10762             :     // Picture
   10763           0 :     y_search_area_origin = ((origin_y + y_search_area_origin) < -padHeight)
   10764           0 :                                ? -padHeight - origin_y
   10765             :                                : y_search_area_origin;
   10766             : 
   10767           0 :     search_area_height =
   10768           0 :         ((origin_y + y_search_area_origin) < -padHeight)
   10769           0 :             ? search_area_height -
   10770           0 :                   (-padHeight - (origin_y + y_search_area_origin))
   10771             :             : search_area_height;
   10772             : 
   10773             :     // Correct the bottom edge of the Search Area if its not on the reference
   10774             :     // Picture
   10775           0 :     y_search_area_origin =
   10776           0 :         ((origin_y + y_search_area_origin) >
   10777           0 :          (int16_t)sixteenthRefPicPtr->height - 1)
   10778           0 :             ? y_search_area_origin - ((origin_y + y_search_area_origin) -
   10779           0 :                                       ((int16_t)sixteenthRefPicPtr->height - 1))
   10780             :             : y_search_area_origin;
   10781             : 
   10782           0 :     search_area_height =
   10783           0 :         (origin_y + y_search_area_origin + search_area_height >
   10784           0 :          (int16_t)sixteenthRefPicPtr->height)
   10785           0 :             ? MAX(1,
   10786             :                   search_area_height -
   10787             :                       ((origin_y + y_search_area_origin + search_area_height) -
   10788             :                        (int16_t)sixteenthRefPicPtr->height))
   10789             :             : search_area_height;
   10790             : 
   10791           0 :     xTopLeftSearchRegion = ((int16_t)sixteenthRefPicPtr->origin_x + origin_x) +
   10792             :                            x_search_area_origin;
   10793           0 :     yTopLeftSearchRegion = ((int16_t)sixteenthRefPicPtr->origin_y + origin_y) +
   10794             :                            y_search_area_origin;
   10795           0 :     searchRegionIndex = xTopLeftSearchRegion +
   10796           0 :                         yTopLeftSearchRegion * sixteenthRefPicPtr->stride_y;
   10797             : 
   10798           0 :     if (context_ptr->hme_search_type == HME_SPARSE) {
   10799           0 :         sad_loop_kernel_sparse(
   10800             :             &context_ptr->sixteenth_sb_buffer[0],
   10801             :             context_ptr->sixteenth_sb_buffer_stride,
   10802           0 :             &sixteenthRefPicPtr->buffer_y[searchRegionIndex],
   10803           0 :             (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   10804           0 :                 ? sixteenthRefPicPtr->stride_y
   10805           0 :                 : sixteenthRefPicPtr->stride_y * 2,
   10806           0 :             (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   10807             :                 ? sb_height
   10808             :                 : sb_height >> 1,
   10809             :             sb_width,
   10810             :             /* results */
   10811             :             level0BestSad,
   10812             :             xLevel0SearchCenter,
   10813             :             yLevel0SearchCenter,
   10814             :             /* range */
   10815           0 :             sixteenthRefPicPtr->stride_y,
   10816             :             search_area_width,
   10817             :             search_area_height);
   10818             :     } else {
   10819           0 :         if ((search_area_width & 15) == 0) {
   10820             :             // Only width equals 16 (LCU equals 64) is updated
   10821             :             // other width sizes work with the old code as the one
   10822             :             // in"sad_loop_kernel_sse4_1_intrin"
   10823           0 :             sad_loop_kernel_hme_l0(
   10824             :                 &context_ptr->sixteenth_sb_buffer[0],
   10825             :                 context_ptr->sixteenth_sb_buffer_stride,
   10826           0 :                 &sixteenthRefPicPtr->buffer_y[searchRegionIndex],
   10827           0 :                 (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   10828           0 :                     ? sixteenthRefPicPtr->stride_y
   10829           0 :                     : sixteenthRefPicPtr->stride_y * 2,
   10830           0 :                 (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   10831             :                     ? sb_height
   10832             :                     : sb_height >> 1,
   10833             :                 sb_width,
   10834             :                 /* results */
   10835             :                 level0BestSad,
   10836             :                 xLevel0SearchCenter,
   10837             :                 yLevel0SearchCenter,
   10838             :                 /* range */
   10839           0 :                 sixteenthRefPicPtr->stride_y,
   10840             :                 search_area_width,
   10841             :                 search_area_height);
   10842             :         } else {
   10843             :             // Put the first search location into level0 results
   10844           0 :             sad_loop_kernel(
   10845             :                 &context_ptr->sixteenth_sb_buffer[0],
   10846             :                 context_ptr->sixteenth_sb_buffer_stride,
   10847           0 :                 &sixteenthRefPicPtr->buffer_y[searchRegionIndex],
   10848           0 :                 (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   10849           0 :                     ? sixteenthRefPicPtr->stride_y
   10850           0 :                     : sixteenthRefPicPtr->stride_y * 2,
   10851           0 :                 (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   10852             :                     ? sb_height
   10853             :                     : sb_height >> 1,
   10854             :                 sb_width,
   10855             :                 /* results */
   10856             :                 level0BestSad,
   10857             :                 xLevel0SearchCenter,
   10858             :                 yLevel0SearchCenter,
   10859             :                 /* range */
   10860           0 :                 sixteenthRefPicPtr->stride_y,
   10861             :                 search_area_width,
   10862             :                 search_area_height);
   10863             :         }
   10864             :     }
   10865             : 
   10866           0 :     *level0BestSad =
   10867           0 :         (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   10868             :             ? *level0BestSad
   10869           0 :             : *level0BestSad *
   10870             :                   2;  // Multiply by 2 because considered only ever other line
   10871           0 :     *xLevel0SearchCenter += x_search_area_origin;
   10872           0 :     *xLevel0SearchCenter *=
   10873             :         4;  // Multiply by 4 because operating on 1/4 resolution
   10874           0 :     *yLevel0SearchCenter += y_search_area_origin;
   10875           0 :     *yLevel0SearchCenter *=
   10876             :         4;  // Multiply by 4 because operating on 1/4 resolution
   10877             : 
   10878           0 :     return;
   10879             : }
   10880             : 
   10881           0 : void HmeLevel0(
   10882             :     PictureParentControlSet *picture_control_set_ptr,
   10883             :     MeContext *context_ptr,  // input/output parameter, ME context Ptr, used to
   10884             :                              // get/update ME results
   10885             :     int16_t origin_x,        // input parameter, SB position in the horizontal
   10886             :                              // direction- sixteenth resolution
   10887             :     int16_t origin_y,        // input parameter, SB position in the vertical
   10888             :                              // direction- sixteenth resolution
   10889             :     uint32_t sb_width,   // input parameter, SB pwidth - sixteenth resolution
   10890             :     uint32_t sb_height,  // input parameter, SB height - sixteenth resolution
   10891             :     int16_t xHmeSearchCenter,  // input parameter, HME search center in the
   10892             :                                // horizontal direction
   10893             :     int16_t yHmeSearchCenter,  // input parameter, HME search center in the
   10894             :                                // vertical direction
   10895             :     EbPictureBufferDesc *
   10896             :         sixteenthRefPicPtr,  // input parameter, sixteenth reference Picture Ptr
   10897             :     uint32_t searchRegionNumberInWidth,   // input parameter, search region
   10898             :                                           // number in the horizontal direction
   10899             :     uint32_t searchRegionNumberInHeight,  // input parameter, search region
   10900             :                                           // number in the vertical direction
   10901             :     uint64_t *level0BestSad,              // output parameter, Level0 SAD at
   10902             :                                           // (searchRegionNumberInWidth,
   10903             :                                           // searchRegionNumberInHeight)
   10904             :     int16_t *xLevel0SearchCenter,         // output parameter, Level0 xMV at
   10905             :                                           // (searchRegionNumberInWidth,
   10906             :                                           // searchRegionNumberInHeight)
   10907             :     int16_t *yLevel0SearchCenter,         // output parameter, Level0 yMV at
   10908             :                                           // (searchRegionNumberInWidth,
   10909             :                                           // searchRegionNumberInHeight)
   10910             :     uint32_t searchAreaMultiplierX,
   10911             :     uint32_t searchAreaMultiplierY)
   10912             : {
   10913             :     int16_t xTopLeftSearchRegion;
   10914             :     int16_t yTopLeftSearchRegion;
   10915             :     uint32_t searchRegionIndex;
   10916             :     int16_t x_search_area_origin;
   10917             :     int16_t y_search_area_origin;
   10918             :     int16_t xSearchRegionDistance;
   10919             :     int16_t ySearchRegionDistance;
   10920             : 
   10921             :     int16_t padWidth;
   10922             :     int16_t padHeight;
   10923             : 
   10924             :     // Adjust SR size based on the searchAreaShift
   10925             :     (void)picture_control_set_ptr;
   10926             :     // Round up x_HME_L0 to be a multiple of 16
   10927           0 :     int16_t search_area_width =
   10928             :         (int16_t)((((((context_ptr->hme_level0_search_area_in_width_array
   10929           0 :                            [searchRegionNumberInWidth] *
   10930           0 :                        searchAreaMultiplierX) /
   10931           0 :                       100))) +
   10932           0 :                    15) &
   10933             :                   ~0x0F);
   10934           0 :     int16_t search_area_height =
   10935             :         (int16_t)(((context_ptr->hme_level0_search_area_in_height_array
   10936           0 :                         [searchRegionNumberInHeight] *
   10937           0 :                     searchAreaMultiplierY) /
   10938             :                    100));
   10939             : 
   10940           0 :     xSearchRegionDistance = xHmeSearchCenter;
   10941           0 :     ySearchRegionDistance = yHmeSearchCenter;
   10942           0 :     padWidth = (int16_t)(sixteenthRefPicPtr->origin_x) - 1;
   10943           0 :     padHeight = (int16_t)(sixteenthRefPicPtr->origin_y) - 1;
   10944             : 
   10945           0 :     while (searchRegionNumberInWidth) {
   10946           0 :         searchRegionNumberInWidth--;
   10947           0 :         xSearchRegionDistance +=
   10948           0 :             (int16_t)(((context_ptr->hme_level0_search_area_in_width_array
   10949           0 :                             [searchRegionNumberInWidth] *
   10950           0 :                         searchAreaMultiplierX) /
   10951             :                        100));
   10952             :     }
   10953             : 
   10954           0 :     while (searchRegionNumberInHeight) {
   10955           0 :         searchRegionNumberInHeight--;
   10956           0 :         ySearchRegionDistance +=
   10957           0 :             (int16_t)(((context_ptr->hme_level0_search_area_in_height_array
   10958           0 :                             [searchRegionNumberInHeight] *
   10959           0 :                         searchAreaMultiplierY) /
   10960             :                        100));
   10961             :     }
   10962           0 :     x_search_area_origin =
   10963           0 :         -(int16_t)((((context_ptr->hme_level0_total_search_area_width *
   10964           0 :                       searchAreaMultiplierX) /
   10965           0 :                      100)) >>
   10966           0 :                    1) +
   10967             :         xSearchRegionDistance;
   10968           0 :     y_search_area_origin =
   10969           0 :         -(int16_t)((((context_ptr->hme_level0_total_search_area_height *
   10970           0 :                       searchAreaMultiplierY) /
   10971           0 :                      100)) >>
   10972           0 :                    1) +
   10973             :         ySearchRegionDistance;
   10974             : 
   10975             :     // Correct the left edge of the Search Area if it is not on the reference
   10976             :     // Picture
   10977           0 :     x_search_area_origin = ((origin_x + x_search_area_origin) < -padWidth)
   10978           0 :                                ? -padWidth - origin_x
   10979             :                                : x_search_area_origin;
   10980             : 
   10981           0 :     search_area_width =
   10982           0 :         ((origin_x + x_search_area_origin) < -padWidth)
   10983           0 :             ? search_area_width -
   10984           0 :                   (-padWidth - (origin_x + x_search_area_origin))
   10985             :             : search_area_width;
   10986             : 
   10987             :     // Correct the right edge of the Search Area if its not on the reference
   10988             :     // Picture
   10989           0 :     x_search_area_origin =
   10990           0 :         ((origin_x + x_search_area_origin) >
   10991           0 :          (int16_t)sixteenthRefPicPtr->width - 1)
   10992           0 :             ? x_search_area_origin - ((origin_x + x_search_area_origin) -
   10993           0 :                                       ((int16_t)sixteenthRefPicPtr->width - 1))
   10994             :             : x_search_area_origin;
   10995             : 
   10996           0 :     search_area_width =
   10997           0 :         ((origin_x + x_search_area_origin + search_area_width) >
   10998           0 :          (int16_t)sixteenthRefPicPtr->width)
   10999           0 :             ? MAX(1,
   11000             :                   search_area_width -
   11001             :                       ((origin_x + x_search_area_origin + search_area_width) -
   11002             :                        (int16_t)sixteenthRefPicPtr->width))
   11003             :             : search_area_width;
   11004             : 
   11005             :     // Round down x_HME to be a multiple of 16 as cropping already performed
   11006           0 :     search_area_width = (search_area_width < 16) ? search_area_width
   11007             :                                                  : search_area_width & ~0x0F;
   11008             : 
   11009             :     // Correct the top edge of the Search Area if it is not on the reference
   11010             :     // Picture
   11011           0 :     y_search_area_origin = ((origin_y + y_search_area_origin) < -padHeight)
   11012           0 :                                ? -padHeight - origin_y
   11013             :                                : y_search_area_origin;
   11014             : 
   11015           0 :     search_area_height =
   11016           0 :         ((origin_y + y_search_area_origin) < -padHeight)
   11017           0 :             ? search_area_height -
   11018           0 :                   (-padHeight - (origin_y + y_search_area_origin))
   11019             :             : search_area_height;
   11020             : 
   11021             :     // Correct the bottom edge of the Search Area if its not on the reference
   11022             :     // Picture
   11023           0 :     y_search_area_origin =
   11024           0 :         ((origin_y + y_search_area_origin) >
   11025           0 :          (int16_t)sixteenthRefPicPtr->height - 1)
   11026           0 :             ? y_search_area_origin - ((origin_y + y_search_area_origin) -
   11027           0 :                                       ((int16_t)sixteenthRefPicPtr->height - 1))
   11028             :             : y_search_area_origin;
   11029             : 
   11030           0 :     search_area_height =
   11031           0 :         (origin_y + y_search_area_origin + search_area_height >
   11032           0 :          (int16_t)sixteenthRefPicPtr->height)
   11033           0 :             ? MAX(1,
   11034             :                   search_area_height -
   11035             :                       ((origin_y + y_search_area_origin + search_area_height) -
   11036             :                        (int16_t)sixteenthRefPicPtr->height))
   11037             :             : search_area_height;
   11038             : 
   11039           0 :     xTopLeftSearchRegion = ((int16_t)sixteenthRefPicPtr->origin_x + origin_x) +
   11040             :                            x_search_area_origin;
   11041           0 :     yTopLeftSearchRegion = ((int16_t)sixteenthRefPicPtr->origin_y + origin_y) +
   11042             :                            y_search_area_origin;
   11043           0 :     searchRegionIndex = xTopLeftSearchRegion +
   11044           0 :                         yTopLeftSearchRegion * sixteenthRefPicPtr->stride_y;
   11045             : 
   11046           0 :     if (((sb_width & 7) == 0) || (sb_width == 4)) {
   11047           0 :         if ((search_area_width & 15) == 0) {
   11048             :             // Only width equals 16 (LCU equals 64) is updated
   11049             :             // other width sizes work with the old code as the one
   11050             :             // in"sad_loop_kernel_sse4_1_intrin"
   11051           0 :             sad_loop_kernel_hme_l0(
   11052             :                 &context_ptr->sixteenth_sb_buffer[0],
   11053             :                 context_ptr->sixteenth_sb_buffer_stride,
   11054           0 :                 &sixteenthRefPicPtr->buffer_y[searchRegionIndex],
   11055           0 :                 (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11056           0 :                     ? sixteenthRefPicPtr->stride_y
   11057           0 :                     : sixteenthRefPicPtr->stride_y * 2,
   11058           0 :                 (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11059             :                     ? sb_height
   11060             :                     : sb_height >> 1,
   11061             :                 sb_width,
   11062             :                 /* results */
   11063             :                 level0BestSad,
   11064             :                 xLevel0SearchCenter,
   11065             :                 yLevel0SearchCenter,
   11066             :                 /* range */
   11067           0 :                 sixteenthRefPicPtr->stride_y,
   11068             :                 search_area_width,
   11069             :                 search_area_height);
   11070             :         } else {
   11071             :             // Put the first search location into level0 results
   11072           0 :             sad_loop_kernel(
   11073             :                 &context_ptr->sixteenth_sb_buffer[0],
   11074             :                 context_ptr->sixteenth_sb_buffer_stride,
   11075           0 :                 &sixteenthRefPicPtr->buffer_y[searchRegionIndex],
   11076           0 :                 (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11077           0 :                     ? sixteenthRefPicPtr->stride_y
   11078           0 :                     : sixteenthRefPicPtr->stride_y * 2,
   11079           0 :                 (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11080             :                     ? sb_height
   11081             :                     : sb_height >> 1,
   11082             :                 sb_width,
   11083             :                 /* results */
   11084             :                 level0BestSad,
   11085             :                 xLevel0SearchCenter,
   11086             :                 yLevel0SearchCenter,
   11087             :                 /* range */
   11088           0 :                 sixteenthRefPicPtr->stride_y,
   11089             :                 search_area_width,
   11090             :                 search_area_height);
   11091             :         }
   11092             :     } else {
   11093           0 :         sad_loop_kernel_c(&context_ptr->sixteenth_sb_buffer[0],
   11094             :                         context_ptr->sixteenth_sb_buffer_stride,
   11095           0 :                         &sixteenthRefPicPtr->buffer_y[searchRegionIndex],
   11096           0 :                         (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11097           0 :                             ? sixteenthRefPicPtr->stride_y
   11098           0 :                             : sixteenthRefPicPtr->stride_y * 2,
   11099           0 :                         (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11100             :                             ? sb_height
   11101             :                             : sb_height >> 1,
   11102             :                         sb_width,
   11103             :                         /* results */
   11104             :                         level0BestSad,
   11105             :                         xLevel0SearchCenter,
   11106             :                         yLevel0SearchCenter,
   11107             :                         /* range */
   11108           0 :                         sixteenthRefPicPtr->stride_y,
   11109             :                         search_area_width,
   11110             :                         search_area_height);
   11111             :     }
   11112             : 
   11113           0 :     *level0BestSad =
   11114           0 :         (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11115             :             ? *level0BestSad
   11116           0 :             : *level0BestSad *
   11117             :                   2;  // Multiply by 2 because considered only ever other line
   11118           0 :     *xLevel0SearchCenter += x_search_area_origin;
   11119           0 :     *xLevel0SearchCenter *=
   11120             :         4;  // Multiply by 4 because operating on 1/4 resolution
   11121           0 :     *yLevel0SearchCenter += y_search_area_origin;
   11122           0 :     *yLevel0SearchCenter *=
   11123             :         4;  // Multiply by 4 because operating on 1/4 resolution
   11124             : 
   11125           0 :     return;
   11126             : }
   11127             : 
   11128           0 : void HmeLevel1(
   11129             :     MeContext *context_ptr,  // input/output parameter, ME context Ptr, used to
   11130             :                              // get/update ME results
   11131             :     int16_t origin_x,        // input parameter, SB position in the horizontal
   11132             :                              // direction - quarter resolution
   11133             :     int16_t origin_y,  // input parameter, SB position in the vertical direction
   11134             :                        // - quarter resolution
   11135             :     uint32_t sb_width,   // input parameter, SB pwidth - quarter resolution
   11136             :     uint32_t sb_height,  // input parameter, SB height - quarter resolution
   11137             :     EbPictureBufferDesc
   11138             :         *quarterRefPicPtr,  // input parameter, quarter reference Picture Ptr
   11139             :     int16_t hmeLevel1SearchAreaInWidth,   // input parameter, hme level 1 search
   11140             :                                           // area in width
   11141             :     int16_t hmeLevel1SearchAreaInHeight,  // input parameter, hme level 1 search
   11142             :                                           // area in height
   11143             :     int16_t xLevel0SearchCenter,          // input parameter, best Level0 xMV at
   11144             :                                           // (searchRegionNumberInWidth,
   11145             :                                           // searchRegionNumberInHeight)
   11146             :     int16_t yLevel0SearchCenter,          // input parameter, best Level0 yMV at
   11147             :                                           // (searchRegionNumberInWidth,
   11148             :                                           // searchRegionNumberInHeight)
   11149             :     uint64_t *level1BestSad,              // output parameter, Level1 SAD at
   11150             :                                           // (searchRegionNumberInWidth,
   11151             :                                           // searchRegionNumberInHeight)
   11152             :     int16_t *xLevel1SearchCenter,         // output parameter, Level1 xMV at
   11153             :                                           // (searchRegionNumberInWidth,
   11154             :                                           // searchRegionNumberInHeight)
   11155             :     int16_t *yLevel1SearchCenter         // output parameter, Level1 yMV at
   11156             :                                           // (searchRegionNumberInWidth,
   11157             :                                           // searchRegionNumberInHeight)
   11158             :     ) {
   11159             :     int16_t xTopLeftSearchRegion;
   11160             :     int16_t yTopLeftSearchRegion;
   11161             :     uint32_t searchRegionIndex;
   11162             :     // Round up x_HME_L0 to be a multiple of 8
   11163           0 :     int16_t search_area_width =
   11164           0 :         (int16_t)((hmeLevel1SearchAreaInWidth + 7) & ~0x07);
   11165           0 :     int16_t search_area_height = hmeLevel1SearchAreaInHeight;
   11166             : 
   11167             :     int16_t x_search_area_origin;
   11168             :     int16_t y_search_area_origin;
   11169             : 
   11170           0 :     int16_t padWidth = (int16_t)(quarterRefPicPtr->origin_x) - 1;
   11171           0 :     int16_t padHeight = (int16_t)(quarterRefPicPtr->origin_y) - 1;
   11172             : 
   11173           0 :     x_search_area_origin = -(search_area_width >> 1) + xLevel0SearchCenter;
   11174           0 :     y_search_area_origin = -(search_area_height >> 1) + yLevel0SearchCenter;
   11175             : 
   11176             :     // Correct the left edge of the Search Area if it is not on the reference
   11177             :     // Picture
   11178           0 :     x_search_area_origin = ((origin_x + x_search_area_origin) < -padWidth)
   11179           0 :                                ? -padWidth - origin_x
   11180             :                                : x_search_area_origin;
   11181             : 
   11182           0 :     search_area_width =
   11183           0 :         ((origin_x + x_search_area_origin) < -padWidth)
   11184           0 :             ? search_area_width -
   11185           0 :                   (-padWidth - (origin_x + x_search_area_origin))
   11186             :             : search_area_width;
   11187             :     // Correct the right edge of the Search Area if its not on the reference
   11188             :     // Picture
   11189           0 :     x_search_area_origin =
   11190           0 :         ((origin_x + x_search_area_origin) >
   11191           0 :          (int16_t)quarterRefPicPtr->width - 1)
   11192           0 :             ? x_search_area_origin - ((origin_x + x_search_area_origin) -
   11193           0 :                                       ((int16_t)quarterRefPicPtr->width - 1))
   11194             :             : x_search_area_origin;
   11195             : 
   11196           0 :     search_area_width =
   11197           0 :         ((origin_x + x_search_area_origin + search_area_width) >
   11198           0 :          (int16_t)quarterRefPicPtr->width)
   11199           0 :             ? MAX(1,
   11200             :                   search_area_width -
   11201             :                       ((origin_x + x_search_area_origin + search_area_width) -
   11202             :                        (int16_t)quarterRefPicPtr->width))
   11203             :             : search_area_width;
   11204             : 
   11205             :     // Constrain x_HME_L1 to be a multiple of 8 (round down as cropping already
   11206             :     // performed)
   11207           0 :     search_area_width =
   11208             :         (search_area_width < 8) ? search_area_width : search_area_width & ~0x07;
   11209             :     // Correct the top edge of the Search Area if it is not on the reference
   11210             :     // Picture
   11211           0 :     y_search_area_origin = ((origin_y + y_search_area_origin) < -padHeight)
   11212           0 :                                ? -padHeight - origin_y
   11213             :                                : y_search_area_origin;
   11214             : 
   11215           0 :     search_area_height =
   11216           0 :         ((origin_y + y_search_area_origin) < -padHeight)
   11217           0 :             ? search_area_height -
   11218           0 :                   (-padHeight - (origin_y + y_search_area_origin))
   11219             :             : search_area_height;
   11220             : 
   11221             :     // Correct the bottom edge of the Search Area if its not on the reference
   11222             :     // Picture
   11223           0 :     y_search_area_origin =
   11224           0 :         ((origin_y + y_search_area_origin) >
   11225           0 :          (int16_t)quarterRefPicPtr->height - 1)
   11226           0 :             ? y_search_area_origin - ((origin_y + y_search_area_origin) -
   11227           0 :                                       ((int16_t)quarterRefPicPtr->height - 1))
   11228             :             : y_search_area_origin;
   11229             : 
   11230           0 :     search_area_height =
   11231           0 :         (origin_y + y_search_area_origin + search_area_height >
   11232           0 :          (int16_t)quarterRefPicPtr->height)
   11233           0 :             ? MAX(1,
   11234             :                   search_area_height -
   11235             :                       ((origin_y + y_search_area_origin + search_area_height) -
   11236             :                        (int16_t)quarterRefPicPtr->height))
   11237             :             : search_area_height;
   11238             : 
   11239             :     // Move to the top left of the search region
   11240           0 :     xTopLeftSearchRegion =
   11241           0 :         ((int16_t)quarterRefPicPtr->origin_x + origin_x) + x_search_area_origin;
   11242           0 :     yTopLeftSearchRegion =
   11243           0 :         ((int16_t)quarterRefPicPtr->origin_y + origin_y) + y_search_area_origin;
   11244           0 :     searchRegionIndex = xTopLeftSearchRegion +
   11245           0 :                         yTopLeftSearchRegion * quarterRefPicPtr->stride_y;
   11246             : 
   11247           0 :     if (((sb_width & 7) == 0) || (sb_width == 4)) {
   11248             :         // Put the first search location into level0 results
   11249           0 :         sad_loop_kernel(
   11250             :             &context_ptr->quarter_sb_buffer[0],
   11251           0 :             (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11252             :                 ? context_ptr->quarter_sb_buffer_stride
   11253           0 :                 : context_ptr->quarter_sb_buffer_stride * 2,
   11254           0 :             &quarterRefPicPtr->buffer_y[searchRegionIndex],
   11255           0 :             (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11256           0 :                 ? quarterRefPicPtr->stride_y
   11257           0 :                 : quarterRefPicPtr->stride_y * 2,
   11258           0 :             (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11259             :                 ? sb_height
   11260             :                 : sb_height >> 1,
   11261             :             sb_width,
   11262             :             /* results */
   11263             :             level1BestSad,
   11264             :             xLevel1SearchCenter,
   11265             :             yLevel1SearchCenter,
   11266             :             /* range */
   11267           0 :             quarterRefPicPtr->stride_y,
   11268             :             search_area_width,
   11269             :             search_area_height);
   11270             :     } else {
   11271           0 :         sad_loop_kernel_c(&context_ptr->quarter_sb_buffer[0],
   11272           0 :                         (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11273             :                             ? context_ptr->quarter_sb_buffer_stride
   11274           0 :                             : context_ptr->quarter_sb_buffer_stride * 2,
   11275           0 :                         &quarterRefPicPtr->buffer_y[searchRegionIndex],
   11276           0 :                         (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11277           0 :                             ? quarterRefPicPtr->stride_y
   11278           0 :                             : quarterRefPicPtr->stride_y * 2,
   11279           0 :                         (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11280             :                             ? sb_height
   11281             :                             : sb_height >> 1,
   11282             :                         sb_width,
   11283             :                         /* results */
   11284             :                         level1BestSad,
   11285             :                         xLevel1SearchCenter,
   11286             :                         yLevel1SearchCenter,
   11287             :                         /* range */
   11288           0 :                         quarterRefPicPtr->stride_y,
   11289             :                         search_area_width,
   11290             :                         search_area_height);
   11291             :     }
   11292             : 
   11293           0 :     *level1BestSad =
   11294           0 :         (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11295             :             ? *level1BestSad
   11296           0 :             : *level1BestSad *
   11297             :                   2;  // Multiply by 2 because considered only ever other line
   11298           0 :     *xLevel1SearchCenter += x_search_area_origin;
   11299           0 :     *xLevel1SearchCenter *=
   11300             :         2;  // Multiply by 2 because operating on 1/2 resolution
   11301           0 :     *yLevel1SearchCenter += y_search_area_origin;
   11302           0 :     *yLevel1SearchCenter *=
   11303             :         2;  // Multiply by 2 because operating on 1/2 resolution
   11304             : 
   11305           0 :     return;
   11306             : }
   11307             : 
   11308           0 : void HmeLevel2(
   11309             :     PictureParentControlSet
   11310             :         *picture_control_set_ptr,  // input parameter, Picture control set Ptr
   11311             :     MeContext *context_ptr,  // input/output parameter, ME context Ptr, used to
   11312             :                              // get/update ME results
   11313             :     int16_t
   11314             :         origin_x,  // input parameter, SB position in the horizontal direction
   11315             :     int16_t origin_y,  // input parameter, SB position in the vertical direction
   11316             :     uint32_t sb_width,   // input parameter, SB pwidth - full resolution
   11317             :     uint32_t sb_height,  // input parameter, SB height - full resolution
   11318             :     EbPictureBufferDesc *refPicPtr,  // input parameter, reference Picture Ptr
   11319             :     uint32_t searchRegionNumberInWidth,   // input parameter, search region
   11320             :                                           // number in the horizontal direction
   11321             :     uint32_t searchRegionNumberInHeight,  // input parameter, search region
   11322             :                                           // number in the vertical direction
   11323             :     int16_t xLevel1SearchCenter,          // input parameter, best Level1 xMV
   11324             :                                           // at(searchRegionNumberInWidth,
   11325             :                                           // searchRegionNumberInHeight)
   11326             :     int16_t yLevel1SearchCenter,          // input parameter, best Level1 yMV
   11327             :                                           // at(searchRegionNumberInWidth,
   11328             :                                           // searchRegionNumberInHeight)
   11329             :     uint64_t *level2BestSad,              // output parameter, Level2 SAD at
   11330             :                                           // (searchRegionNumberInWidth,
   11331             :                                           // searchRegionNumberInHeight)
   11332             :     int16_t *xLevel2SearchCenter,         // output parameter, Level2 xMV at
   11333             :                                           // (searchRegionNumberInWidth,
   11334             :                                           // searchRegionNumberInHeight)
   11335             :     int16_t *yLevel2SearchCenter         // output parameter, Level2 yMV at
   11336             :                                           // (searchRegionNumberInWidth,
   11337             :                                           // searchRegionNumberInHeight)
   11338             :     ) {
   11339             :     int16_t xTopLeftSearchRegion;
   11340             :     int16_t yTopLeftSearchRegion;
   11341             :     uint32_t searchRegionIndex;
   11342             : 
   11343             :     // round the search region width to nearest multiple of 8 if it is less than
   11344             :     // 8 or non multiple of 8 SAD calculation performance is the same for
   11345             :     // searchregion width from 1 to 8
   11346             :     (void)picture_control_set_ptr;
   11347           0 :     int16_t hmeLevel2SearchAreaInWidth =
   11348             :         (int16_t)context_ptr
   11349           0 :             ->hme_level2_search_area_in_width_array[searchRegionNumberInWidth];
   11350             :     // Round up x_HME_L0 to be a multiple of 8
   11351           0 :     int16_t search_area_width =
   11352           0 :         (int16_t)((hmeLevel2SearchAreaInWidth + 7) & ~0x07);
   11353           0 :     int16_t search_area_height =
   11354             :         (int16_t)context_ptr->hme_level2_search_area_in_height_array
   11355           0 :             [searchRegionNumberInHeight];
   11356             :     int16_t x_search_area_origin;
   11357             :     int16_t y_search_area_origin;
   11358             : 
   11359           0 :     int16_t padWidth = (int16_t)BLOCK_SIZE_64 - 1;
   11360           0 :     int16_t padHeight = (int16_t)BLOCK_SIZE_64 - 1;
   11361             : 
   11362           0 :     x_search_area_origin = -(search_area_width >> 1) + xLevel1SearchCenter;
   11363           0 :     y_search_area_origin = -(search_area_height >> 1) + yLevel1SearchCenter;
   11364             : 
   11365             :     // Correct the left edge of the Search Area if it is not on the reference
   11366             :     // Picture
   11367           0 :     x_search_area_origin = ((origin_x + x_search_area_origin) < -padWidth)
   11368           0 :                                ? -padWidth - origin_x
   11369             :                                : x_search_area_origin;
   11370             : 
   11371           0 :     search_area_width =
   11372           0 :         ((origin_x + x_search_area_origin) < -padWidth)
   11373           0 :             ? search_area_width -
   11374           0 :                   (-padWidth - (origin_x + x_search_area_origin))
   11375             :             : search_area_width;
   11376             : 
   11377             :     // Correct the right edge of the Search Area if its not on the reference
   11378             :     // Picture
   11379           0 :     x_search_area_origin =
   11380           0 :         ((origin_x + x_search_area_origin) > (int16_t)refPicPtr->width - 1)
   11381           0 :             ? x_search_area_origin - ((origin_x + x_search_area_origin) -
   11382           0 :                                       ((int16_t)refPicPtr->width - 1))
   11383             :             : x_search_area_origin;
   11384             : 
   11385           0 :     search_area_width =
   11386           0 :         ((origin_x + x_search_area_origin + search_area_width) >
   11387           0 :          (int16_t)refPicPtr->width)
   11388           0 :             ? MAX(1,
   11389             :                   search_area_width -
   11390             :                       ((origin_x + x_search_area_origin + search_area_width) -
   11391             :                        (int16_t)refPicPtr->width))
   11392             :             : search_area_width;
   11393             : 
   11394             :     // Constrain x_HME_L1 to be a multiple of 8 (round down as cropping already
   11395             :     // performed)
   11396           0 :     search_area_width =
   11397             :         (search_area_width < 8) ? search_area_width : search_area_width & ~0x07;
   11398             :     // Correct the top edge of the Search Area if it is not on the reference
   11399             :     // Picture
   11400           0 :     y_search_area_origin = ((origin_y + y_search_area_origin) < -padHeight)
   11401           0 :                                ? -padHeight - origin_y
   11402             :                                : y_search_area_origin;
   11403             : 
   11404           0 :     search_area_height =
   11405           0 :         ((origin_y + y_search_area_origin) < -padHeight)
   11406           0 :             ? search_area_height -
   11407           0 :                   (-padHeight - (origin_y + y_search_area_origin))
   11408             :             : search_area_height;
   11409             : 
   11410             :     // Correct the bottom edge of the Search Area if its not on the reference
   11411             :     // Picture
   11412           0 :     y_search_area_origin =
   11413           0 :         ((origin_y + y_search_area_origin) > (int16_t)refPicPtr->height - 1)
   11414           0 :             ? y_search_area_origin - ((origin_y + y_search_area_origin) -
   11415           0 :                                       ((int16_t)refPicPtr->height - 1))
   11416             :             : y_search_area_origin;
   11417             : 
   11418           0 :     search_area_height =
   11419           0 :         (origin_y + y_search_area_origin + search_area_height >
   11420           0 :          (int16_t)refPicPtr->height)
   11421           0 :             ? MAX(1,
   11422             :                   search_area_height -
   11423             :                       ((origin_y + y_search_area_origin + search_area_height) -
   11424             :                        (int16_t)refPicPtr->height))
   11425             :             : search_area_height;
   11426             : 
   11427             :     // Move to the top left of the search region
   11428           0 :     xTopLeftSearchRegion =
   11429           0 :         ((int16_t)refPicPtr->origin_x + origin_x) + x_search_area_origin;
   11430           0 :     yTopLeftSearchRegion =
   11431           0 :         ((int16_t)refPicPtr->origin_y + origin_y) + y_search_area_origin;
   11432           0 :     searchRegionIndex =
   11433           0 :         xTopLeftSearchRegion + yTopLeftSearchRegion * refPicPtr->stride_y;
   11434           0 :     if ((((sb_width & 7) == 0) && (sb_width != 40) && (sb_width != 56))) {
   11435             :         // Put the first search location into level0 results
   11436           0 :         sad_loop_kernel(
   11437             :             context_ptr->sb_src_ptr,
   11438           0 :             (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11439             :                 ? context_ptr->sb_src_stride
   11440           0 :                 : context_ptr->sb_src_stride * 2,
   11441           0 :             &refPicPtr->buffer_y[searchRegionIndex],
   11442           0 :             (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11443           0 :                 ? refPicPtr->stride_y
   11444           0 :                 : refPicPtr->stride_y * 2,
   11445           0 :             (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11446             :                 ? sb_height
   11447             :                 : sb_height >> 1,
   11448             :             sb_width,
   11449             :             /* results */
   11450             :             level2BestSad,
   11451             :             xLevel2SearchCenter,
   11452             :             yLevel2SearchCenter,
   11453             :             /* range */
   11454           0 :             refPicPtr->stride_y,
   11455             :             search_area_width,
   11456             :             search_area_height);
   11457             :     } else {
   11458             :         // Put the first search location into level0 results
   11459           0 :         sad_loop_kernel_c(context_ptr->sb_src_ptr,
   11460           0 :                         (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11461             :                             ? context_ptr->sb_src_stride
   11462           0 :                             : context_ptr->sb_src_stride * 2,
   11463           0 :                         &refPicPtr->buffer_y[searchRegionIndex],
   11464           0 :                         (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11465           0 :                             ? refPicPtr->stride_y
   11466           0 :                             : refPicPtr->stride_y * 2,
   11467           0 :                         (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11468             :                             ? sb_height
   11469             :                             : sb_height >> 1,
   11470             :                         sb_width,
   11471             :                         /* results */
   11472             :                         level2BestSad,
   11473             :                         xLevel2SearchCenter,
   11474             :                         yLevel2SearchCenter,
   11475             :                         /* range */
   11476           0 :                         refPicPtr->stride_y,
   11477             :                         search_area_width,
   11478             :                         search_area_height);
   11479             :     }
   11480             : 
   11481           0 :     *level2BestSad =
   11482           0 :         (context_ptr->hme_search_method == FULL_SAD_SEARCH)
   11483             :             ? *level2BestSad
   11484           0 :             : *level2BestSad *
   11485             :                   2;  // Multiply by 2 because considered only ever other line
   11486           0 :     *xLevel2SearchCenter += x_search_area_origin;
   11487           0 :     *yLevel2SearchCenter += y_search_area_origin;
   11488             : 
   11489           0 :     return;
   11490             : }
   11491             : 
   11492           0 : static void SelectBuffer(
   11493             :     uint32_t pu_index,       //[IN]
   11494             :     uint8_t fracPosition,    //[IN]
   11495             :     uint32_t pu_width,       //[IN] Refrence picture list index
   11496             :     uint32_t pu_height,      //[IN] Refrence picture index in the list
   11497             :     uint8_t *pos_Full,       //[IN]
   11498             :     uint8_t *pos_b,          //[IN]
   11499             :     uint8_t *pos_h,          //[IN]
   11500             :     uint8_t *pos_j,          //[IN]
   11501             :     uint32_t refHalfStride,  //[IN]
   11502             :     uint32_t refBufferFullStride,
   11503             :     uint8_t **dst_ptr,       //[OUT]
   11504             :     uint32_t *DstPtrStride)  //[OUT]
   11505             : {
   11506             :     (void)pu_width;
   11507             :     (void)pu_height;
   11508             : 
   11509           0 :     uint32_t puShiftXIndex = pu_search_index_map[pu_index][0];
   11510           0 :     uint32_t puShiftYIndex = pu_search_index_map[pu_index][1];
   11511           0 :     uint32_t ref_stride = refHalfStride;
   11512             : 
   11513             :     // for each one of the 8 positions, we need to determine the 2 buffers to do
   11514             :     // averaging
   11515           0 :     uint8_t *buf1 = pos_Full;
   11516             : 
   11517           0 :     switch (fracPosition) {
   11518           0 :     case 0:  // integer
   11519           0 :         buf1 = pos_Full;
   11520           0 :         ref_stride = refBufferFullStride;
   11521           0 :         break;
   11522           0 :     case 2:  // b
   11523           0 :         buf1 = pos_b;
   11524           0 :         break;
   11525           0 :     case 8:  // h
   11526           0 :         buf1 = pos_h;
   11527           0 :         break;
   11528           0 :     case 10:  // j
   11529           0 :         buf1 = pos_j;
   11530           0 :         break;
   11531           0 :     default: break;
   11532             :     }
   11533             : 
   11534           0 :     buf1 = buf1 + puShiftXIndex + puShiftYIndex * ref_stride;
   11535             : 
   11536           0 :     *dst_ptr = buf1;
   11537           0 :     *DstPtrStride = ref_stride;
   11538             : 
   11539           0 :     return;
   11540             : }
   11541             : 
   11542           0 : static void QuarterPelCompensation(
   11543             :     uint32_t pu_index,       //[IN]
   11544             :     uint8_t fracPosition,    //[IN]
   11545             :     uint32_t pu_width,       //[IN] Refrence picture list index
   11546             :     uint32_t pu_height,      //[IN] Refrence picture index in the list
   11547             :     uint8_t *pos_Full,       //[IN]
   11548             :     uint8_t *pos_b,          //[IN]
   11549             :     uint8_t *pos_h,          //[IN]
   11550             :     uint8_t *pos_j,          //[IN]
   11551             :     uint32_t refHalfStride,  //[IN]
   11552             :     uint32_t refBufferFullStride,
   11553             :     uint8_t *Dst,        //[IN]
   11554             :     uint32_t DstStride) { //[IN]
   11555             : 
   11556           0 :     uint32_t puShiftXIndex = pu_search_index_map[pu_index][0];
   11557           0 :     uint32_t puShiftYIndex = pu_search_index_map[pu_index][1];
   11558           0 :     uint32_t refStride1 = refHalfStride;
   11559           0 :     uint32_t refStride2 = refHalfStride;
   11560             : 
   11561             :     // for each one of the 8 positions, we need to determine the 2 buffers to do
   11562             :     // averaging
   11563           0 :     uint8_t *buf1 = pos_Full;
   11564           0 :     uint8_t *buf2 = pos_Full;
   11565             : 
   11566           0 :     switch (fracPosition) {
   11567           0 :     case 1:  // a
   11568           0 :         buf1 = pos_Full;
   11569           0 :         buf2 = pos_b;
   11570           0 :         refStride1 = refBufferFullStride;
   11571           0 :         break;
   11572             : 
   11573           0 :     case 3:  // c
   11574           0 :         buf1 = pos_b;
   11575           0 :         buf2 = pos_Full + 1;
   11576           0 :         refStride2 = refBufferFullStride;
   11577           0 :         break;
   11578             : 
   11579           0 :     case 4:  // d
   11580           0 :         buf1 = pos_Full;
   11581           0 :         buf2 = pos_h;
   11582           0 :         refStride1 = refBufferFullStride;
   11583           0 :         break;
   11584             : 
   11585           0 :     case 5:  // e
   11586           0 :         buf1 = pos_b;
   11587           0 :         buf2 = pos_h;
   11588           0 :         break;
   11589             : 
   11590           0 :     case 6:  // f
   11591           0 :         buf1 = pos_b;
   11592           0 :         buf2 = pos_j;
   11593           0 :         break;
   11594             : 
   11595           0 :     case 7:  // g
   11596           0 :         buf1 = pos_b;
   11597           0 :         buf2 = pos_h + 1;
   11598           0 :         break;
   11599             : 
   11600           0 :     case 9:  // i
   11601           0 :         buf1 = pos_h;
   11602           0 :         buf2 = pos_j;
   11603           0 :         break;
   11604             : 
   11605           0 :     case 11:  // k
   11606           0 :         buf1 = pos_j;
   11607           0 :         buf2 = pos_h + 1;
   11608           0 :         break;
   11609             : 
   11610           0 :     case 12:  // L
   11611           0 :         buf1 = pos_h;
   11612           0 :         buf2 = pos_Full + refBufferFullStride;
   11613           0 :         refStride2 = refBufferFullStride;
   11614           0 :         break;
   11615             : 
   11616           0 :     case 13:  // m
   11617           0 :         buf1 = pos_h;
   11618           0 :         buf2 = pos_b + refHalfStride;
   11619           0 :         break;
   11620             : 
   11621           0 :     case 14:  // n
   11622           0 :         buf1 = pos_j;
   11623           0 :         buf2 = pos_b + refHalfStride;
   11624           0 :         break;
   11625           0 :     case 15:  // 0
   11626           0 :         buf1 = pos_h + 1;
   11627           0 :         buf2 = pos_b + refHalfStride;
   11628           0 :         break;
   11629           0 :     default: break;
   11630             :     }
   11631             : 
   11632           0 :     buf1 = buf1 + puShiftXIndex + puShiftYIndex * refStride1;
   11633           0 :     buf2 = buf2 + puShiftXIndex + puShiftYIndex * refStride2;
   11634             : 
   11635           0 :     picture_average_kernel(buf1,
   11636             :                             refStride1,
   11637             :                             buf2,
   11638             :                             refStride2,
   11639             :                             Dst,
   11640             :                             DstStride,
   11641             :                             pu_width,
   11642             :                             pu_height);
   11643             : 
   11644           0 :     return;
   11645             : }
   11646             : 
   11647             : // TODO: Alt-refs - change previous SelectBuffer and QuarterPelCompensation to
   11648             : // be applicable for both chroma and luma
   11649           0 : static void select_buffer(
   11650             :     uint32_t pu_index,  //[IN]
   11651             :     EbBool chroma,
   11652             :     uint8_t fracPosition,    //[IN]
   11653             :     uint32_t pu_width,       //[IN] Refrence picture list index
   11654             :     uint32_t pu_height,      //[IN] Refrence picture index in the list
   11655             :     uint8_t *pos_Full,       //[IN]
   11656             :     uint8_t *pos_b,          //[IN]
   11657             :     uint8_t *pos_h,          //[IN]
   11658             :     uint8_t *pos_j,          //[IN]
   11659             :     uint32_t refHalfStride,  //[IN]
   11660             :     uint32_t refBufferFullStride,
   11661             :     uint8_t **dst_ptr,       //[OUT]
   11662             :     uint32_t *DstPtrStride)  //[OUT]
   11663             : {
   11664             :     (void)pu_width;
   11665             :     (void)pu_height;
   11666             : 
   11667             :     uint32_t puShiftXIndex;
   11668             :     uint32_t puShiftYIndex;
   11669             : 
   11670           0 :     if (chroma == EB_TRUE) {
   11671           0 :         puShiftXIndex = (pu_search_index_map[pu_index][0]) >> 1;
   11672           0 :         puShiftYIndex = (pu_search_index_map[pu_index][1]) >> 1;
   11673             :     } else {
   11674           0 :         puShiftXIndex = pu_search_index_map[pu_index][0];
   11675           0 :         puShiftYIndex = pu_search_index_map[pu_index][1];
   11676             :     }
   11677             : 
   11678           0 :     uint32_t ref_stride = refHalfStride;
   11679             : 
   11680             :     // for each one of the 8 positions, we need to determine the 2 buffers to do
   11681             :     // averaging
   11682           0 :     uint8_t *buf1 = pos_Full;
   11683             : 
   11684           0 :     switch (fracPosition) {
   11685           0 :     case 0:  // integer
   11686           0 :         buf1 = pos_Full;
   11687           0 :         ref_stride = refBufferFullStride;
   11688           0 :         break;
   11689           0 :     case 2:  // b
   11690           0 :         buf1 = pos_b;
   11691           0 :         break;
   11692           0 :     case 8:  // h
   11693           0 :         buf1 = pos_h;
   11694           0 :         break;
   11695           0 :     case 10:  // j
   11696           0 :         buf1 = pos_j;
   11697           0 :         break;
   11698           0 :     default: break;
   11699             :     }
   11700             : 
   11701           0 :     buf1 = buf1 + puShiftXIndex + puShiftYIndex * ref_stride;
   11702             : 
   11703           0 :     *dst_ptr = buf1;
   11704           0 :     *DstPtrStride = ref_stride;
   11705             : 
   11706           0 :     return;
   11707             : }
   11708             : 
   11709             : // TODO: Alt-refs - change previous SelectBuffer and QuarterPelCompensation to
   11710             : // be applicable for both chroma and luma
   11711           0 : static void quarter_pel_compensation(
   11712             :     uint32_t pu_index,  //[IN]
   11713             :     EbBool chroma,
   11714             :     uint8_t fracPosition,    //[IN]
   11715             :     uint32_t pu_width,       //[IN] Refrence picture list index
   11716             :     uint32_t pu_height,      //[IN] Refrence picture index in the list
   11717             :     uint8_t *pos_Full,       //[IN]
   11718             :     uint8_t *pos_b,          //[IN]
   11719             :     uint8_t *pos_h,          //[IN]
   11720             :     uint8_t *pos_j,          //[IN]
   11721             :     uint32_t refHalfStride,  //[IN]
   11722             :     uint32_t refBufferFullStride,
   11723             :     uint8_t *Dst,        //[IN]
   11724             :     uint32_t DstStride) {  //[IN]
   11725             :     uint32_t puShiftXIndex;
   11726             :     uint32_t puShiftYIndex;
   11727             : 
   11728           0 :     if (chroma == EB_TRUE) {
   11729           0 :         puShiftXIndex = (pu_search_index_map[pu_index][0]) >> 1;
   11730           0 :         puShiftYIndex = (pu_search_index_map[pu_index][1]) >> 1;
   11731             :     } else {
   11732           0 :         puShiftXIndex = pu_search_index_map[pu_index][0];
   11733           0 :         puShiftYIndex = pu_search_index_map[pu_index][1];
   11734             :     }
   11735             : 
   11736           0 :     uint32_t refStride1 = refHalfStride;
   11737           0 :     uint32_t refStride2 = refHalfStride;
   11738             : 
   11739             :     // for each one of the 8 positions, we need to determine the 2 buffers to do
   11740             :     // averaging
   11741           0 :     uint8_t *buf1 = pos_Full;
   11742           0 :     uint8_t *buf2 = pos_Full;
   11743             : 
   11744           0 :     switch (fracPosition) {
   11745           0 :     case 1:  // a
   11746           0 :         buf1 = pos_Full;
   11747           0 :         buf2 = pos_b;
   11748           0 :         refStride1 = refBufferFullStride;
   11749           0 :         break;
   11750             : 
   11751           0 :     case 3:  // c
   11752           0 :         buf1 = pos_b;
   11753           0 :         buf2 = pos_Full + 1;
   11754           0 :         refStride2 = refBufferFullStride;
   11755           0 :         break;
   11756             : 
   11757           0 :     case 4:  // d
   11758           0 :         buf1 = pos_Full;
   11759           0 :         buf2 = pos_h;
   11760           0 :         refStride1 = refBufferFullStride;
   11761           0 :         break;
   11762             : 
   11763           0 :     case 5:  // e
   11764           0 :         buf1 = pos_b;
   11765           0 :         buf2 = pos_h;
   11766           0 :         break;
   11767             : 
   11768           0 :     case 6:  // f
   11769           0 :         buf1 = pos_b;
   11770           0 :         buf2 = pos_j;
   11771           0 :         break;
   11772             : 
   11773           0 :     case 7:  // g
   11774           0 :         buf1 = pos_b;
   11775           0 :         buf2 = pos_h + 1;
   11776           0 :         break;
   11777             : 
   11778           0 :     case 9:  // i
   11779           0 :         buf1 = pos_h;
   11780           0 :         buf2 = pos_j;
   11781           0 :         break;
   11782             : 
   11783           0 :     case 11:  // k
   11784           0 :         buf1 = pos_j;
   11785           0 :         buf2 = pos_h + 1;
   11786           0 :         break;
   11787             : 
   11788           0 :     case 12:  // L
   11789           0 :         buf1 = pos_h;
   11790           0 :         buf2 = pos_Full + refBufferFullStride;
   11791           0 :         refStride2 = refBufferFullStride;
   11792           0 :         break;
   11793             : 
   11794           0 :     case 13:  // m
   11795           0 :         buf1 = pos_h;
   11796           0 :         buf2 = pos_b + refHalfStride;
   11797           0 :         break;
   11798             : 
   11799           0 :     case 14:  // n
   11800           0 :         buf1 = pos_j;
   11801           0 :         buf2 = pos_b + refHalfStride;
   11802           0 :         break;
   11803           0 :     case 15:  // 0
   11804           0 :         buf1 = pos_h + 1;
   11805           0 :         buf2 = pos_b + refHalfStride;
   11806           0 :         break;
   11807           0 :     default: break;
   11808             :     }
   11809             : 
   11810           0 :     buf1 = buf1 + puShiftXIndex + puShiftYIndex * refStride1;
   11811           0 :     buf2 = buf2 + puShiftXIndex + puShiftYIndex * refStride2;
   11812             : 
   11813           0 :     picture_average_kernel(buf1,
   11814             :                             refStride1,
   11815             :                             buf2,
   11816             :                             refStride2,
   11817             :                             Dst,
   11818             :                             DstStride,
   11819             :                             pu_width,
   11820             :                             pu_height);
   11821             : 
   11822           0 :     return;
   11823             : }
   11824             : 
   11825             : /*******************************************************************************
   11826             :  * Requirement: pu_width      = 8, 16, 24, 32, 48 or 64
   11827             :  * Requirement: pu_height % 2 = 0
   11828             :  * Requirement: skip         = 0 or 1
   11829             :  * Requirement (x86 only): temp_buf % 16 = 0
   11830             :  * Requirement (x86 only): (dst->buffer_y  + dstLumaIndex  ) % 16 = 0 when
   11831             :  *pu_width %16 = 0 Requirement (x86 only): (dst->bufferCb + dstChromaIndex) % 16
   11832             :  *= 0 when pu_width %32 = 0 Requirement (x86 only): (dst->bufferCr +
   11833             :  *dstChromaIndex) % 16 = 0 when pu_width %32 = 0 Requirement (x86 only):
   11834             :  *dst->stride_y   % 16 = 0 when pu_width %16 = 0 Requirement (x86 only):
   11835             :  *dst->chromaStride % 16 = 0 when pu_width %32 = 0
   11836             :  *******************************************************************************/
   11837           0 : void uni_pred_averaging(uint32_t pu_index, EbBool chroma, uint8_t firstFracPos,
   11838             :                         uint32_t pu_width, uint32_t pu_height,
   11839             :                         uint8_t *firstRefInteger, uint8_t *firstRefPosB,
   11840             :                         uint8_t *firstRefPosH, uint8_t *firstRefPosJ,
   11841             :                         uint32_t refBufferStride,
   11842             :                         uint32_t refBufferFullList0Stride,
   11843             :                         uint8_t *firstRefTempDst, uint8_t **comp_blk_ptr,
   11844             :                         uint32_t *comp_blk_ptr_stride)
   11845             : {
   11846             :     // Buffer Selection and quater-pel compensation on the fly
   11847           0 :     if (sub_position_type[firstFracPos] != 2) {
   11848           0 :         select_buffer(pu_index,
   11849             :                       chroma,
   11850             :                       firstFracPos,
   11851             :                       pu_width,
   11852             :                       pu_height,
   11853             :                       firstRefInteger,
   11854             :                       firstRefPosB,
   11855             :                       firstRefPosH,
   11856             :                       firstRefPosJ,
   11857             :                       refBufferStride,
   11858             :                       refBufferFullList0Stride,
   11859             :                       comp_blk_ptr,
   11860             :                       comp_blk_ptr_stride);
   11861             :     } else {
   11862           0 :         quarter_pel_compensation(pu_index,
   11863             :                                  chroma,
   11864             :                                  firstFracPos,
   11865             :                                  pu_width,
   11866             :                                  pu_height,
   11867             :                                  firstRefInteger,
   11868             :                                  firstRefPosB,
   11869             :                                  firstRefPosH,
   11870             :                                  firstRefPosJ,
   11871             :                                  refBufferStride,
   11872             :                                  refBufferFullList0Stride,
   11873             :                                  firstRefTempDst,
   11874             :                                  BLOCK_SIZE_64);
   11875             : 
   11876           0 :         *comp_blk_ptr = firstRefTempDst;
   11877           0 :         *comp_blk_ptr_stride = BLOCK_SIZE_64;
   11878             :     }
   11879           0 : }
   11880             : 
   11881             : /*******************************************************************************
   11882             :  * Requirement: pu_width      = 8, 16, 24, 32, 48 or 64
   11883             :  * Requirement: pu_height % 2 = 0
   11884             :  * Requirement: skip         = 0 or 1
   11885             :  * Requirement (x86 only): temp_buf % 16 = 0
   11886             :  * Requirement (x86 only): (dst->buffer_y  + dst_luma_index  ) % 16 = 0 when
   11887             :  *pu_width %16 = 0 Requirement (x86 only): (dst->buffer_cb + dst_chroma_index) %
   11888             :  *16 = 0 when pu_width %32 = 0 Requirement (x86 only): (dst->buffer_cr +
   11889             :  *dst_chroma_index) % 16 = 0 when pu_width %32 = 0 Requirement (x86 only):
   11890             :  *dst->stride_y   % 16 = 0 when pu_width %16 = 0 Requirement (x86 only):
   11891             :  *dst->chromaStride % 16 = 0 when pu_width %32 = 0
   11892             :  *******************************************************************************/
   11893           0 : uint32_t BiPredAverging(
   11894             :     MeContext *context_ptr, MePredUnit *me_candidate, uint32_t pu_index,
   11895             :     uint8_t *sourcePic, uint32_t lumaStride, uint8_t firstFracPos,
   11896             :     uint8_t secondFracPos, uint32_t pu_width, uint32_t pu_height,
   11897             :     uint8_t *firstRefInteger, uint8_t *firstRefPosB, uint8_t *firstRefPosH,
   11898             :     uint8_t *firstRefPosJ, uint8_t *secondRefInteger, uint8_t *secondRefPosB,
   11899             :     uint8_t *secondRefPosH, uint8_t *secondRefPosJ, uint32_t refBufferStride,
   11900             :     uint32_t refBufferFullList0Stride, uint32_t refBufferFullList1Stride,
   11901             :     uint8_t *firstRefTempDst, uint8_t *secondRefTempDst)
   11902             : {
   11903             :     uint8_t *ptrList0, *ptrList1;
   11904             :     uint32_t ptrList0Stride, ptrList1Stride;
   11905             : 
   11906             :     // Buffer Selection and quater-pel compensation on the fly
   11907           0 :     if (sub_position_type[firstFracPos] != 2) {
   11908           0 :         SelectBuffer(pu_index,
   11909             :                      firstFracPos,
   11910             :                      pu_width,
   11911             :                      pu_height,
   11912             :                      firstRefInteger,
   11913             :                      firstRefPosB,
   11914             :                      firstRefPosH,
   11915             :                      firstRefPosJ,
   11916             :                      refBufferStride,
   11917             :                      refBufferFullList0Stride,
   11918             :                      &ptrList0,
   11919             :                      &ptrList0Stride);
   11920             :     } else {
   11921           0 :         QuarterPelCompensation(pu_index,
   11922             :                                firstFracPos,
   11923             :                                pu_width,
   11924             :                                pu_height,
   11925             :                                firstRefInteger,
   11926             :                                firstRefPosB,
   11927             :                                firstRefPosH,
   11928             :                                firstRefPosJ,
   11929             :                                refBufferStride,
   11930             :                                refBufferFullList0Stride,
   11931             :                                firstRefTempDst,
   11932             :                                BLOCK_SIZE_64);
   11933             : 
   11934           0 :         ptrList0 = firstRefTempDst;
   11935           0 :         ptrList0Stride = BLOCK_SIZE_64;
   11936             :     }
   11937             : 
   11938           0 :     if (sub_position_type[secondFracPos] != 2) {
   11939           0 :         SelectBuffer(pu_index,
   11940             :                      secondFracPos,
   11941             :                      pu_width,
   11942             :                      pu_height,
   11943             :                      secondRefInteger,
   11944             :                      secondRefPosB,
   11945             :                      secondRefPosH,
   11946             :                      secondRefPosJ,
   11947             :                      refBufferStride,
   11948             :                      refBufferFullList1Stride,
   11949             :                      &ptrList1,
   11950             :                      &ptrList1Stride);
   11951             :     } else {
   11952             :         // uni-prediction List1 luma
   11953             :         // doing the luma interpolation
   11954           0 :         QuarterPelCompensation(pu_index,
   11955             :                                secondFracPos,
   11956             :                                pu_width,
   11957             :                                pu_height,
   11958             :                                secondRefInteger,
   11959             :                                secondRefPosB,
   11960             :                                secondRefPosH,
   11961             :                                secondRefPosJ,
   11962             :                                refBufferStride,
   11963             :                                refBufferFullList1Stride,
   11964             :                                secondRefTempDst,
   11965             :                                BLOCK_SIZE_64);
   11966             : 
   11967           0 :         ptrList1 = secondRefTempDst;
   11968           0 :         ptrList1Stride = BLOCK_SIZE_64;
   11969             :     }
   11970             : 
   11971             :     // bi-pred luma
   11972           0 :     me_candidate->distortion =
   11973           0 :         (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
   11974           0 :             ? nxm_sad_avg_kernel(
   11975             :                   sourcePic,
   11976             :                   lumaStride << 1,
   11977             :                   ptrList0,
   11978             :                   ptrList0Stride << 1,
   11979             :                   ptrList1,
   11980             :                   ptrList1Stride << 1,
   11981             :                   pu_height >> 1,
   11982             :                   pu_width)
   11983             :                   << 1
   11984           0 :             : nxm_sad_avg_kernel(
   11985             :                   sourcePic,
   11986             :                   lumaStride,
   11987             :                   ptrList0,
   11988             :                   ptrList0Stride,
   11989             :                   ptrList1,
   11990             :                   ptrList1Stride,
   11991             :                   pu_height,
   11992             :                   pu_width);
   11993             : 
   11994           0 :     return me_candidate->distortion;
   11995             : }
   11996             : 
   11997             : /*******************************************
   11998             :  * BiPredictionComponsation
   11999             :  *   performs componsation fro List 0 and
   12000             :  *   List1 Candidates and then compute the
   12001             :  *   average
   12002             :  *******************************************/
   12003           0 : EbErrorType BiPredictionCompensation(MeContext *context_ptr, uint32_t pu_index,
   12004             :                                      MePredUnit *me_candidate,
   12005             :                                      uint32_t firstList,
   12006             :                                      uint8_t first_list_ref_pic_idx,
   12007             :                                      uint32_t firstRefMv, uint32_t secondList,
   12008             :                                      uint8_t second_list_ref_pic_idx,
   12009             :                                      uint32_t secondRefMv)
   12010             : {
   12011           0 :     EbErrorType return_error = EB_ErrorNone;
   12012             : 
   12013             :     int16_t firstRefPosX;
   12014             :     int16_t firstRefPosY;
   12015             :     int16_t firstRefIntegPosx;
   12016             :     int16_t firstRefIntegPosy;
   12017             :     uint8_t firstRefFracPosx;
   12018             :     uint8_t firstRefFracPosy;
   12019             :     uint8_t firstRefFracPos;
   12020             :     int32_t xfirstSearchIndex;
   12021             :     int32_t yfirstSearchIndex;
   12022             :     int32_t firstSearchRegionIndexPosInteg;
   12023             :     int32_t firstSearchRegionIndexPosb;
   12024             :     int32_t firstSearchRegionIndexPosh;
   12025             :     int32_t firstSearchRegionIndexPosj;
   12026             : 
   12027             :     int16_t secondRefPosX;
   12028             :     int16_t secondRefPosY;
   12029             :     int16_t secondRefIntegPosx;
   12030             :     int16_t secondRefIntegPosy;
   12031             :     uint8_t secondRefFracPosx;
   12032             :     uint8_t secondRefFracPosy;
   12033             :     uint8_t secondRefFracPos;
   12034             :     int32_t xsecondSearchIndex;
   12035             :     int32_t ysecondSearchIndex;
   12036             :     int32_t secondSearchRegionIndexPosInteg;
   12037             :     int32_t secondSearchRegionIndexPosb;
   12038             :     int32_t secondSearchRegionIndexPosh;
   12039             :     int32_t secondSearchRegionIndexPosj;
   12040             : 
   12041           0 :     uint32_t puShiftXIndex = pu_search_index_map[pu_index][0];
   12042           0 :     uint32_t puShiftYIndex = pu_search_index_map[pu_index][1];
   12043             : 
   12044           0 :     const uint32_t puLcuBufferIndex =
   12045           0 :         puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
   12046             : 
   12047           0 :     me_candidate->prediction_direction = BI_PRED;
   12048             : 
   12049             :     // First refrence
   12050             :     // Set Candidate information
   12051           0 :     firstRefPosX = _MVXT(firstRefMv);
   12052           0 :     firstRefPosY = _MVYT(firstRefMv);
   12053           0 :     me_candidate->ref_index[0] = (uint8_t)first_list_ref_pic_idx;
   12054           0 :     me_candidate->ref0_list = (uint8_t)firstList;
   12055             : 
   12056           0 :     firstRefIntegPosx = (firstRefPosX >> 2);
   12057           0 :     firstRefIntegPosy = (firstRefPosY >> 2);
   12058           0 :     firstRefFracPosx = (uint8_t)firstRefPosX & 0x03;
   12059           0 :     firstRefFracPosy = (uint8_t)firstRefPosY & 0x03;
   12060             : 
   12061           0 :     firstRefFracPos = (uint8_t)(firstRefFracPosx + (firstRefFracPosy << 2));
   12062           0 :     xfirstSearchIndex =
   12063           0 :         (int32_t)firstRefIntegPosx -
   12064           0 :         context_ptr->x_search_area_origin[firstList][first_list_ref_pic_idx];
   12065           0 :     yfirstSearchIndex =
   12066           0 :         (int32_t)firstRefIntegPosy -
   12067           0 :         context_ptr->y_search_area_origin[firstList][first_list_ref_pic_idx];
   12068           0 :     firstSearchRegionIndexPosInteg =
   12069           0 :         (int32_t)(xfirstSearchIndex + (ME_FILTER_TAP >> 1)) +
   12070           0 :         (int32_t)context_ptr
   12071           0 :                 ->interpolated_full_stride[firstList][first_list_ref_pic_idx] *
   12072           0 :             (int32_t)(yfirstSearchIndex + (ME_FILTER_TAP >> 1));
   12073             : 
   12074           0 :     firstSearchRegionIndexPosb =
   12075           0 :         (int32_t)(xfirstSearchIndex + (ME_FILTER_TAP >> 1) - 1) +
   12076           0 :         (int32_t)context_ptr->interpolated_stride *
   12077           0 :             (int32_t)(yfirstSearchIndex + (ME_FILTER_TAP >> 1));
   12078           0 :     firstSearchRegionIndexPosh =
   12079           0 :         (int32_t)(xfirstSearchIndex + (ME_FILTER_TAP >> 1) - 1) +
   12080           0 :         (int32_t)context_ptr->interpolated_stride *
   12081           0 :             (int32_t)(yfirstSearchIndex + (ME_FILTER_TAP >> 1) - 1);
   12082           0 :     firstSearchRegionIndexPosj =
   12083           0 :         (int32_t)(xfirstSearchIndex + (ME_FILTER_TAP >> 1) - 1) +
   12084           0 :         (int32_t)context_ptr->interpolated_stride *
   12085           0 :             (int32_t)(yfirstSearchIndex + (ME_FILTER_TAP >> 1) - 1);
   12086             : 
   12087             :     // Second refrence
   12088             : 
   12089             :     // Set Candidate information
   12090           0 :     secondRefPosX = _MVXT(secondRefMv);
   12091           0 :     secondRefPosY = _MVYT(secondRefMv);
   12092           0 :     me_candidate->ref_index[1] = (uint8_t)second_list_ref_pic_idx;
   12093           0 :     me_candidate->ref1_list = (uint8_t)secondList;
   12094           0 :     secondRefIntegPosx = (secondRefPosX >> 2);
   12095           0 :     secondRefIntegPosy = (secondRefPosY >> 2);
   12096           0 :     secondRefFracPosx = (uint8_t)secondRefPosX & 0x03;
   12097           0 :     secondRefFracPosy = (uint8_t)secondRefPosY & 0x03;
   12098             : 
   12099           0 :     secondRefFracPos = (uint8_t)(secondRefFracPosx + (secondRefFracPosy << 2));
   12100           0 :     xsecondSearchIndex =
   12101           0 :         secondRefIntegPosx -
   12102           0 :         context_ptr->x_search_area_origin[secondList][second_list_ref_pic_idx];
   12103           0 :     ysecondSearchIndex =
   12104           0 :         secondRefIntegPosy -
   12105           0 :         context_ptr->y_search_area_origin[secondList][second_list_ref_pic_idx];
   12106           0 :     secondSearchRegionIndexPosInteg =
   12107           0 :         (int32_t)(xsecondSearchIndex + (ME_FILTER_TAP >> 1)) +
   12108           0 :         (int32_t)
   12109             :                 context_ptr->interpolated_full_stride[secondList]
   12110           0 :                                                      [second_list_ref_pic_idx] *
   12111           0 :             (int32_t)(ysecondSearchIndex + (ME_FILTER_TAP >> 1));
   12112           0 :     secondSearchRegionIndexPosb =
   12113           0 :         (int32_t)(xsecondSearchIndex + (ME_FILTER_TAP >> 1) - 1) +
   12114           0 :         (int32_t)context_ptr->interpolated_stride *
   12115           0 :             (int32_t)(ysecondSearchIndex + (ME_FILTER_TAP >> 1));
   12116           0 :     secondSearchRegionIndexPosh =
   12117           0 :         (int32_t)(xsecondSearchIndex + (ME_FILTER_TAP >> 1) - 1) +
   12118           0 :         (int32_t)context_ptr->interpolated_stride *
   12119           0 :             (int32_t)(ysecondSearchIndex + (ME_FILTER_TAP >> 1) - 1);
   12120           0 :     secondSearchRegionIndexPosj =
   12121           0 :         (int32_t)(xsecondSearchIndex + (ME_FILTER_TAP >> 1) - 1) +
   12122           0 :         (int32_t)context_ptr->interpolated_stride *
   12123           0 :             (int32_t)(ysecondSearchIndex + (ME_FILTER_TAP >> 1) - 1);
   12124             : 
   12125             :     uint32_t nIndex;
   12126             : 
   12127           0 :     if (pu_index > 200)
   12128           0 :         nIndex = pu_index;
   12129           0 :     else if (pu_index > 184)
   12130           0 :         nIndex = tab8x32[pu_index - 185] + 185;
   12131           0 :     else if (pu_index > 168)
   12132           0 :         nIndex = tab32x8[pu_index - 169] + 169;
   12133           0 :     else if (pu_index > 136)
   12134           0 :         nIndex = tab8x16[pu_index - 137] + 137;
   12135           0 :     else if (pu_index > 128)
   12136           0 :         nIndex = tab16x32[pu_index - 129] + 129;
   12137           0 :     else if (pu_index > 126)
   12138           0 :         nIndex = pu_index;
   12139           0 :     else if (pu_index > 94)
   12140           0 :         nIndex = tab16x8[pu_index - 95] + 95;
   12141           0 :     else if (pu_index > 86)
   12142           0 :         nIndex = tab32x16[pu_index - 87] + 87;
   12143           0 :     else if (pu_index > 84)
   12144           0 :         nIndex = pu_index;
   12145           0 :     else if (pu_index > 20)
   12146           0 :         nIndex = tab8x8[pu_index - 21] + 21;
   12147           0 :     else if (pu_index > 4)
   12148           0 :         nIndex = tab16x16[pu_index - 5] + 5;
   12149             :     else
   12150           0 :         nIndex = pu_index;
   12151           0 :     context_ptr->p_sb_bipred_sad[nIndex] =
   12152             : 
   12153           0 :         BiPredAverging(
   12154             :             context_ptr,
   12155             :             me_candidate,
   12156             :             pu_index,
   12157           0 :             &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
   12158             :             context_ptr->sb_src_stride,
   12159             :             firstRefFracPos,
   12160             :             secondRefFracPos,
   12161             :             partition_width[pu_index],
   12162             :             partition_height[pu_index],
   12163           0 :             &(context_ptr->integer_buffer_ptr[firstList][first_list_ref_pic_idx]
   12164             :                                              [firstSearchRegionIndexPosInteg]),
   12165           0 :             &(context_ptr->pos_b_buffer[firstList][first_list_ref_pic_idx]
   12166             :                                        [firstSearchRegionIndexPosb]),
   12167           0 :             &(context_ptr->pos_h_buffer[firstList][first_list_ref_pic_idx]
   12168             :                                        [firstSearchRegionIndexPosh]),
   12169           0 :             &(context_ptr->pos_j_buffer[firstList][first_list_ref_pic_idx]
   12170             :                                        [firstSearchRegionIndexPosj]),
   12171             :             &(context_ptr
   12172           0 :                   ->integer_buffer_ptr[secondList][second_list_ref_pic_idx]
   12173             :                                       [secondSearchRegionIndexPosInteg]),
   12174           0 :             &(context_ptr->pos_b_buffer[secondList][second_list_ref_pic_idx]
   12175             :                                        [secondSearchRegionIndexPosb]),
   12176           0 :             &(context_ptr->pos_h_buffer[secondList][second_list_ref_pic_idx]
   12177             :                                        [secondSearchRegionIndexPosh]),
   12178           0 :             &(context_ptr->pos_j_buffer[secondList][second_list_ref_pic_idx]
   12179             :                                        [secondSearchRegionIndexPosj]),
   12180             :             context_ptr->interpolated_stride,
   12181             :             context_ptr
   12182             :                 ->interpolated_full_stride[firstList][first_list_ref_pic_idx],
   12183             :             context_ptr
   12184             :                 ->interpolated_full_stride[secondList][second_list_ref_pic_idx],
   12185             :             &(context_ptr->one_d_intermediate_results_buf0[0]),
   12186             :             &(context_ptr->one_d_intermediate_results_buf1[0]));
   12187             : 
   12188           0 :     return return_error;
   12189             : }
   12190             : 
   12191           0 : uint8_t skip_bi_pred(
   12192             :     PictureParentControlSet *picture_control_set_ptr,
   12193             :     uint8_t ref_type,
   12194             :     uint8_t ref_type_table[7]) {
   12195             : 
   12196           0 :     if (!picture_control_set_ptr->prune_unipred_at_me)
   12197           0 :         return 1;
   12198             : 
   12199           0 :     uint8_t allow_cand = 0;
   12200             :     uint8_t ref_idx;
   12201           0 :     for (ref_idx = 0; ref_idx < PRUNE_REF_ME_TH; ref_idx++) {
   12202           0 :         if (ref_type == ref_type_table[ref_idx])
   12203           0 :             allow_cand = 1;
   12204             :     }
   12205           0 :     return allow_cand;
   12206             : }
   12207             : 
   12208             : /*******************************************
   12209             :  * BiPredictionSearch
   12210             :  *   performs Bi-Prediction Search (LCU)
   12211             :  *******************************************/
   12212             : // This function enables all 16 Bipred candidates when MRP is ON
   12213           0 : EbErrorType BiPredictionSearch(
   12214             :     SequenceControlSet *sequence_control_set_ptr,
   12215             :     MeContext *context_ptr, uint32_t pu_index, uint8_t candidateIndex,
   12216             :     uint32_t activeRefPicFirstLisNum, uint32_t activeRefPicSecondLisNum,
   12217             :     uint8_t *total_me_candidate_index,
   12218             :     uint8_t ref_type_table[7],
   12219             :     PictureParentControlSet *picture_control_set_ptr) {
   12220           0 :     EbErrorType return_error = EB_ErrorNone;
   12221             : 
   12222             :     uint32_t firstListRefPictdx;
   12223             :     uint32_t secondListRefPictdx;
   12224             : 
   12225             :     (void)picture_control_set_ptr;
   12226             : 
   12227             :     uint32_t nIndex;
   12228             : 
   12229           0 :     if (pu_index > 200)
   12230           0 :         nIndex = pu_index;
   12231           0 :     else if (pu_index > 184)
   12232           0 :         nIndex = tab8x32[pu_index - 185] + 185;
   12233           0 :     else if (pu_index > 168)
   12234           0 :         nIndex = tab32x8[pu_index - 169] + 169;
   12235           0 :     else if (pu_index > 136)
   12236           0 :         nIndex = tab8x16[pu_index - 137] + 137;
   12237           0 :     else if (pu_index > 128)
   12238           0 :         nIndex = tab16x32[pu_index - 129] + 129;
   12239           0 :     else if (pu_index > 126)
   12240           0 :         nIndex = pu_index;
   12241           0 :     else if (pu_index > 94)
   12242           0 :         nIndex = tab16x8[pu_index - 95] + 95;
   12243           0 :     else if (pu_index > 86)
   12244           0 :         nIndex = tab32x16[pu_index - 87] + 87;
   12245           0 :     else if (pu_index > 84)
   12246           0 :         nIndex = pu_index;
   12247           0 :     else if (pu_index > 20)
   12248           0 :         nIndex = tab8x8[pu_index - 21] + 21;
   12249           0 :     else if (pu_index > 4)
   12250           0 :         nIndex = tab16x16[pu_index - 5] + 5;
   12251             :     else
   12252           0 :         nIndex = pu_index;
   12253             :     // NM: Inter list bipred.
   12254             :     //(LAST,BWD) , (LAST,ALT)  and (LAST,ALT2)
   12255             :     //(LAST2,BWD), (LAST2,ALT) and (LAST2,ALT2)
   12256             :     //(LAST3,BWD), (LAST3,ALT) and (LAST3,ALT2)
   12257             :     //(GOLD,BWD) , (GOLD,ALT)  and (GOLD,ALT2)
   12258           0 :     for (firstListRefPictdx = 0; firstListRefPictdx < activeRefPicFirstLisNum;
   12259           0 :          firstListRefPictdx++) {
   12260           0 :         for (secondListRefPictdx = 0;
   12261             :              secondListRefPictdx < activeRefPicSecondLisNum;
   12262           0 :              secondListRefPictdx++) {
   12263             :             {
   12264           0 :                      uint8_t to_inject_ref_type_0 = svt_get_ref_frame_type(REF_LIST_0, firstListRefPictdx);
   12265           0 :                      uint8_t to_inject_ref_type_1 = svt_get_ref_frame_type(REF_LIST_1, secondListRefPictdx);
   12266           0 :                      uint8_t add_bi = skip_bi_pred(
   12267             :                          picture_control_set_ptr,
   12268             :                          to_inject_ref_type_0,
   12269             :                          ref_type_table);
   12270           0 :                      add_bi += skip_bi_pred(
   12271             :                          picture_control_set_ptr,
   12272             :                          to_inject_ref_type_1,
   12273             :                          ref_type_table);
   12274             : 
   12275           0 :                      if (add_bi) {
   12276           0 :                 BiPredictionCompensation(
   12277             :                     context_ptr,
   12278             :                     pu_index,
   12279           0 :                     &(context_ptr->me_candidate[candidateIndex].pu[pu_index]),
   12280             :                     REFERENCE_PIC_LIST_0,
   12281             :                     firstListRefPictdx,
   12282             :                     context_ptr->p_sb_best_mv[REFERENCE_PIC_LIST_0]
   12283             :                                              [firstListRefPictdx][nIndex],
   12284             :                     REFERENCE_PIC_LIST_1,
   12285             :                     secondListRefPictdx,
   12286             :                     context_ptr->p_sb_best_mv[REFERENCE_PIC_LIST_1]
   12287             :                                              [secondListRefPictdx][nIndex]);
   12288             : 
   12289           0 :                 candidateIndex++;
   12290             :                      }
   12291             :             }
   12292             :         }
   12293             :     }
   12294             : 
   12295           0 :     if (sequence_control_set_ptr->mrp_mode == 0)
   12296             :     {
   12297             :         // NM: Within list 0    bipred: (LAST,LAST2)    (LAST,LAST3) (LAST,GOLD)
   12298           0 :         for (firstListRefPictdx = 1;
   12299             :              firstListRefPictdx < activeRefPicFirstLisNum;
   12300           0 :              firstListRefPictdx++) {
   12301           0 :             uint8_t to_inject_ref_type_0 = svt_get_ref_frame_type(REF_LIST_0, firstListRefPictdx);
   12302           0 :             uint8_t add_bi = skip_bi_pred(
   12303             :                 picture_control_set_ptr,
   12304             :                 to_inject_ref_type_0,
   12305             :                 ref_type_table);
   12306           0 :             if (add_bi) {
   12307           0 :             BiPredictionCompensation(
   12308             :                 context_ptr,
   12309             :                 pu_index,
   12310           0 :                 &(context_ptr->me_candidate[candidateIndex].pu[pu_index]),
   12311             :                 REFERENCE_PIC_LIST_0,
   12312             :                 0,
   12313             :                 context_ptr->p_sb_best_mv[REFERENCE_PIC_LIST_0][0][nIndex],
   12314             :                 REFERENCE_PIC_LIST_0,
   12315             :                 firstListRefPictdx,
   12316             :                 context_ptr->p_sb_best_mv[REFERENCE_PIC_LIST_0]
   12317             :                                          [firstListRefPictdx][nIndex]);
   12318             : 
   12319           0 :             candidateIndex++;
   12320             :             }
   12321             :         }
   12322             :         // NM: Within list 1    bipred: (BWD, ALT)
   12323           0 :         for (secondListRefPictdx = 1;
   12324           0 :              secondListRefPictdx < MIN(activeRefPicSecondLisNum, 1);
   12325           0 :              secondListRefPictdx++) {
   12326           0 :             uint8_t to_inject_ref_type_0 = svt_get_ref_frame_type(REF_LIST_0, firstListRefPictdx);
   12327           0 :             uint8_t add_bi = skip_bi_pred(
   12328             :                 picture_control_set_ptr,
   12329             :                 to_inject_ref_type_0,
   12330             :                 ref_type_table);
   12331           0 :             if (add_bi) {
   12332           0 :             BiPredictionCompensation(
   12333             :                 context_ptr,
   12334             :                 pu_index,
   12335           0 :                 &(context_ptr->me_candidate[candidateIndex].pu[pu_index]),
   12336             :                 REFERENCE_PIC_LIST_1,
   12337             :                 0,
   12338             :                 context_ptr->p_sb_best_mv[REFERENCE_PIC_LIST_1][0][nIndex],
   12339             :                 REFERENCE_PIC_LIST_1,
   12340             :                 secondListRefPictdx,
   12341             :                 context_ptr->p_sb_best_mv[REFERENCE_PIC_LIST_1]
   12342             :                                          [secondListRefPictdx][nIndex]);
   12343             : 
   12344           0 :             candidateIndex++;
   12345             :             }
   12346             :         }
   12347             :     }
   12348           0 :     *total_me_candidate_index = candidateIndex;
   12349             : 
   12350           0 :     return return_error;
   12351             : }
   12352             : 
   12353             : // Nader - to be replaced by loock-up table
   12354             : /*******************************************
   12355             :  * get_me_info_index
   12356             :  *   search the correct index of the motion
   12357             :  *   info that corresponds to the input
   12358             :  *   md candidate
   12359             :  *******************************************/
   12360           0 : uint32_t get_me_info_index(uint32_t max_me_block, const BlockGeom *blk_geom,
   12361             :                            uint32_t geom_offset_x, uint32_t geom_offset_y) {
   12362             :     // search for motion info
   12363             :     uint32_t block_index;
   12364           0 :     uint32_t me_info_index = 0xFFFFFFF;
   12365             : 
   12366           0 :     for (block_index = 0; block_index < max_me_block; block_index++) {
   12367           0 :         if ((blk_geom->bwidth == partition_width[block_index]) &&
   12368           0 :             (blk_geom->bheight == partition_height[block_index]) &&
   12369           0 :             ((blk_geom->origin_x - geom_offset_x) ==
   12370           0 :              pu_search_index_map[block_index][0]) &&
   12371           0 :             ((blk_geom->origin_y - geom_offset_y) ==
   12372           0 :              pu_search_index_map[block_index][1])) {
   12373           0 :             me_info_index = block_index;
   12374           0 :             break;
   12375             :         }
   12376             :     }
   12377           0 :     return me_info_index;
   12378             : }
   12379             : 
   12380             : // Nader - to be replaced by loock-up table
   12381             : /*******************************************
   12382             :  * get_me_info_index
   12383             :  *   search the correct index of the motion
   12384             :  *   info that corresponds to the input
   12385             :  *   md candidate
   12386             :  *******************************************/
   12387           0 : uint32_t get_in_loop_me_info_index(uint32_t max_me_block, uint8_t is_128_sb,
   12388             :                                    const BlockGeom *blk_geom) {
   12389             :     // search for motion info
   12390             :     uint32_t block_index;
   12391           0 :     uint32_t me_info_index = 0xFFFFFFF;
   12392           0 :     if (is_128_sb) {
   12393           0 :         for (block_index = 0; block_index < max_me_block; block_index++) {
   12394           0 :             if (blk_geom->bwidth ==
   12395           0 :                     in_loop_me_block_width_128_sb[block_index] &&
   12396           0 :                 blk_geom->bheight ==
   12397           0 :                     in_loop_me_block_height_128_sb[block_index] &&
   12398           0 :                 blk_geom->origin_x ==
   12399           0 :                     in_loop_me_block_index_128_sb[block_index][0] &&
   12400           0 :                 blk_geom->origin_y ==
   12401           0 :                     in_loop_me_block_index_128_sb[block_index][1]) {
   12402           0 :                 me_info_index = block_index;
   12403           0 :                 break;
   12404             :             }
   12405             :         }
   12406             :     } else {
   12407           0 :         for (block_index = 0; block_index < max_me_block; block_index++) {
   12408           0 :             if (blk_geom->bwidth == in_loop_me_block_width[block_index] &&
   12409           0 :                 blk_geom->bheight == in_loop_me_block_height[block_index] &&
   12410           0 :                 blk_geom->origin_x == in_loop_me_block_index[block_index][0] &&
   12411           0 :                 blk_geom->origin_y == in_loop_me_block_index[block_index][1]) {
   12412           0 :                 me_info_index = block_index;
   12413           0 :                 break;
   12414             :             }
   12415             :         }
   12416             :     }
   12417             : 
   12418           0 :     return me_info_index;
   12419             : }
   12420             : 
   12421             : #define NSET_CAND(mePuResult, num, dist, dir)                      \
   12422             :     (mePuResult)->distortion_direction[(num)].distortion = (dist); \
   12423             :     (mePuResult)->distortion_direction[(num)].direction = (dir);
   12424             : 
   12425           0 : int8_t sort_3_elements(uint32_t a, uint32_t b, uint32_t c) {
   12426           0 :     uint8_t sortCode = 0;
   12427           0 :     if (a <= b && a <= c) {
   12428           0 :         if (b <= c)
   12429           0 :             sortCode = a_b_c;
   12430             :         else
   12431           0 :             sortCode = a_c_b;
   12432           0 :     } else if (b <= a && b <= c) {
   12433           0 :         if (a <= c)
   12434           0 :             sortCode = b_a_c;
   12435             :         else
   12436           0 :             sortCode = b_c_a;
   12437           0 :     } else if (a <= b)
   12438           0 :         sortCode = c_a_b;
   12439             :     else
   12440           0 :         sortCode = c_b_a;
   12441           0 :     return sortCode;
   12442             : }
   12443             : 
   12444           0 : EbErrorType CheckZeroZeroCenter(EbPictureBufferDesc *refPicPtr,
   12445             :                                 MeContext *context_ptr, uint32_t sb_origin_x,
   12446             :                                 uint32_t sb_origin_y, uint32_t sb_width,
   12447             :                                 uint32_t sb_height, int16_t *x_search_center,
   12448             :                                 int16_t *y_search_center)
   12449             : 
   12450             : {
   12451           0 :     EbErrorType return_error = EB_ErrorNone;
   12452             :     uint32_t searchRegionIndex, zeroMvSad, hmeMvSad, hmeMvdRate;
   12453             :     uint64_t hmeMvCost, zeroMvCost, searchCenterCost;
   12454           0 :     int16_t origin_x = (int16_t)sb_origin_x;
   12455           0 :     int16_t origin_y = (int16_t)sb_origin_y;
   12456           0 :     uint32_t subsampleSad = 1;
   12457           0 :     int16_t pad_width = (int16_t)BLOCK_SIZE_64 - 1;
   12458           0 :     int16_t pad_height = (int16_t)BLOCK_SIZE_64 - 1;
   12459             : 
   12460           0 :     searchRegionIndex =
   12461           0 :         (int16_t)refPicPtr->origin_x + origin_x +
   12462           0 :         ((int16_t)refPicPtr->origin_y + origin_y) * refPicPtr->stride_y;
   12463             : 
   12464           0 :     zeroMvSad = nxm_sad_kernel(
   12465           0 :         context_ptr->sb_src_ptr,
   12466           0 :         context_ptr->sb_src_stride << subsampleSad,
   12467           0 :         &(refPicPtr->buffer_y[searchRegionIndex]),
   12468           0 :         refPicPtr->stride_y << subsampleSad,
   12469             :         sb_height >> subsampleSad,
   12470             :         sb_width);
   12471             : 
   12472           0 :     zeroMvSad = zeroMvSad << subsampleSad;
   12473             : 
   12474             :     // FIX
   12475             :     // Correct the left edge of the Search Area if it is not on the reference
   12476             :     // Picture
   12477           0 :     *x_search_center = ((origin_x + *x_search_center) < -pad_width)
   12478           0 :                            ? -pad_width - origin_x
   12479             :                            : *x_search_center;
   12480             :     // Correct the right edge of the Search Area if its not on the reference
   12481             :     // Picture
   12482           0 :     *x_search_center =
   12483           0 :         ((origin_x + *x_search_center) > (int16_t)refPicPtr->width - 1)
   12484           0 :             ? *x_search_center - ((origin_x + *x_search_center) -
   12485           0 :                                   ((int16_t)refPicPtr->width - 1))
   12486             :             : *x_search_center;
   12487             :     // Correct the top edge of the Search Area if it is not on the reference
   12488             :     // Picture
   12489           0 :     *y_search_center = ((origin_y + *y_search_center) < -pad_height)
   12490           0 :                            ? -pad_height - origin_y
   12491             :                            : *y_search_center;
   12492             :     // Correct the bottom edge of the Search Area if its not on the reference
   12493             :     // Picture
   12494           0 :     *y_search_center =
   12495           0 :         ((origin_y + *y_search_center) > (int16_t)refPicPtr->height - 1)
   12496           0 :             ? *y_search_center - ((origin_y + *y_search_center) -
   12497           0 :                                   ((int16_t)refPicPtr->height - 1))
   12498             :             : *y_search_center;
   12499             :     ///
   12500             : 
   12501           0 :     zeroMvCost = zeroMvSad << COST_PRECISION;
   12502           0 :     searchRegionIndex =
   12503           0 :         (int16_t)(refPicPtr->origin_x + origin_x) + *x_search_center +
   12504           0 :         ((int16_t)(refPicPtr->origin_y + origin_y) + *y_search_center) *
   12505           0 :             refPicPtr->stride_y;
   12506             : 
   12507           0 :     hmeMvSad = nxm_sad_kernel(
   12508           0 :         context_ptr->sb_src_ptr,
   12509           0 :         context_ptr->sb_src_stride << subsampleSad,
   12510           0 :         &(refPicPtr->buffer_y[searchRegionIndex]),
   12511           0 :         refPicPtr->stride_y << subsampleSad,
   12512             :         sb_height >> subsampleSad,
   12513             :         sb_width);
   12514             : 
   12515           0 :     hmeMvSad = hmeMvSad << subsampleSad;
   12516             : 
   12517           0 :     hmeMvdRate = 0;
   12518             :     // AMIR use AV1 rate estimation functions
   12519             :     // MeGetMvdFractionBits(
   12520             :     //    ABS(*x_search_center << 2),
   12521             :     //    ABS(*y_search_center << 2),
   12522             :     //    context_ptr->mvd_bits_array,
   12523             :     //    &hmeMvdRate);
   12524             : 
   12525           0 :     hmeMvCost = (hmeMvSad << COST_PRECISION) +
   12526           0 :                 (((context_ptr->lambda * hmeMvdRate) + MD_OFFSET) >> MD_SHIFT);
   12527           0 :     searchCenterCost = MIN(zeroMvCost, hmeMvCost);
   12528             : 
   12529           0 :     *x_search_center = (searchCenterCost == zeroMvCost) ? 0 : *x_search_center;
   12530           0 :     *y_search_center = (searchCenterCost == zeroMvCost) ? 0 : *y_search_center;
   12531             : 
   12532           0 :     return return_error;
   12533             : }
   12534             : 
   12535           0 : EbErrorType suPelEnable(MeContext *context_ptr,
   12536             :                         PictureParentControlSet *picture_control_set_ptr,
   12537             :                         uint32_t listIndex, uint32_t refPicIndex,
   12538             :                         EbBool *enableHalfPel32x32, EbBool *enableHalfPel16x16,
   12539             :                         EbBool *enableHalfPel8x8) {
   12540           0 :     EbErrorType return_error = EB_ErrorNone;
   12541             : 
   12542           0 :     uint32_t mvMag32x32 = 0;
   12543           0 :     uint32_t mvMag16x16 = 0;
   12544           0 :     uint32_t mvMag8x8 = 0;
   12545           0 :     uint32_t avgSad32x32 = 0;
   12546           0 :     uint32_t avgSad16x16 = 0;
   12547           0 :     uint32_t avgSad8x8 = 0;
   12548           0 :     uint32_t avgMvx32x32 = 0;
   12549           0 :     uint32_t avgMvy32x32 = 0;
   12550           0 :     uint32_t avgMvx16x16 = 0;
   12551           0 :     uint32_t avgMvy16x16 = 0;
   12552           0 :     uint32_t avgMvx8x8 = 0;
   12553           0 :     uint32_t avgMvy8x8 = 0;
   12554             : 
   12555           0 :     avgMvx32x32 = (_MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12556           0 :                                                   [ME_TIER_ZERO_PU_32x32_0]) +
   12557           0 :                    _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12558           0 :                                                   [ME_TIER_ZERO_PU_32x32_1]) +
   12559           0 :                    _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12560           0 :                                                   [ME_TIER_ZERO_PU_32x32_2]) +
   12561           0 :                    _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12562           0 :                                                   [ME_TIER_ZERO_PU_32x32_3])) >>
   12563             :                   2;
   12564           0 :     avgMvy32x32 = (_MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12565           0 :                                                   [ME_TIER_ZERO_PU_32x32_0]) +
   12566           0 :                    _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12567           0 :                                                   [ME_TIER_ZERO_PU_32x32_1]) +
   12568           0 :                    _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12569           0 :                                                   [ME_TIER_ZERO_PU_32x32_2]) +
   12570           0 :                    _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12571           0 :                                                   [ME_TIER_ZERO_PU_32x32_3])) >>
   12572             :                   2;
   12573           0 :     mvMag32x32 = SQR(avgMvx32x32) + SQR(avgMvy32x32);
   12574             : 
   12575           0 :     avgMvx16x16 =
   12576           0 :         (_MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12577           0 :                                         [ME_TIER_ZERO_PU_16x16_0]) +
   12578           0 :          _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12579           0 :                                         [ME_TIER_ZERO_PU_16x16_1]) +
   12580           0 :          _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12581           0 :                                         [ME_TIER_ZERO_PU_16x16_2]) +
   12582           0 :          _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12583           0 :                                         [ME_TIER_ZERO_PU_16x16_3]) +
   12584           0 :          _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12585           0 :                                         [ME_TIER_ZERO_PU_16x16_4]) +
   12586           0 :          _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12587           0 :                                         [ME_TIER_ZERO_PU_16x16_5]) +
   12588           0 :          _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12589           0 :                                         [ME_TIER_ZERO_PU_16x16_6]) +
   12590           0 :          _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12591           0 :                                         [ME_TIER_ZERO_PU_16x16_7]) +
   12592           0 :          _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12593           0 :                                         [ME_TIER_ZERO_PU_16x16_8]) +
   12594           0 :          _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12595           0 :                                         [ME_TIER_ZERO_PU_16x16_9]) +
   12596           0 :          _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12597           0 :                                         [ME_TIER_ZERO_PU_16x16_10]) +
   12598           0 :          _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12599           0 :                                         [ME_TIER_ZERO_PU_16x16_11]) +
   12600           0 :          _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12601           0 :                                         [ME_TIER_ZERO_PU_16x16_12]) +
   12602           0 :          _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12603           0 :                                         [ME_TIER_ZERO_PU_16x16_13]) +
   12604           0 :          _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12605           0 :                                         [ME_TIER_ZERO_PU_16x16_14]) +
   12606           0 :          _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12607           0 :                                         [ME_TIER_ZERO_PU_16x16_15])) >>
   12608             :         4;
   12609           0 :     avgMvy16x16 =
   12610           0 :         (_MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12611           0 :                                         [ME_TIER_ZERO_PU_16x16_0]) +
   12612           0 :          _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12613           0 :                                         [ME_TIER_ZERO_PU_16x16_1]) +
   12614           0 :          _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12615           0 :                                         [ME_TIER_ZERO_PU_16x16_2]) +
   12616           0 :          _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12617           0 :                                         [ME_TIER_ZERO_PU_16x16_3]) +
   12618           0 :          _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12619           0 :                                         [ME_TIER_ZERO_PU_16x16_4]) +
   12620           0 :          _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12621           0 :                                         [ME_TIER_ZERO_PU_16x16_5]) +
   12622           0 :          _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12623           0 :                                         [ME_TIER_ZERO_PU_16x16_6]) +
   12624           0 :          _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12625           0 :                                         [ME_TIER_ZERO_PU_16x16_7]) +
   12626           0 :          _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12627           0 :                                         [ME_TIER_ZERO_PU_16x16_8]) +
   12628           0 :          _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12629           0 :                                         [ME_TIER_ZERO_PU_16x16_9]) +
   12630           0 :          _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12631           0 :                                         [ME_TIER_ZERO_PU_16x16_10]) +
   12632           0 :          _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12633           0 :                                         [ME_TIER_ZERO_PU_16x16_11]) +
   12634           0 :          _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12635           0 :                                         [ME_TIER_ZERO_PU_16x16_12]) +
   12636           0 :          _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12637           0 :                                         [ME_TIER_ZERO_PU_16x16_13]) +
   12638           0 :          _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12639           0 :                                         [ME_TIER_ZERO_PU_16x16_14]) +
   12640           0 :          _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12641           0 :                                         [ME_TIER_ZERO_PU_16x16_15])) >>
   12642             :         4;
   12643           0 :     mvMag16x16 = SQR(avgMvx16x16) + SQR(avgMvy16x16);
   12644             : 
   12645           0 :     avgMvx8x8 = (_MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12646           0 :                                                 [ME_TIER_ZERO_PU_8x8_0]) +
   12647           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12648           0 :                                                 [ME_TIER_ZERO_PU_8x8_1]) +
   12649           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12650           0 :                                                 [ME_TIER_ZERO_PU_8x8_2]) +
   12651           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12652           0 :                                                 [ME_TIER_ZERO_PU_8x8_3]) +
   12653           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12654           0 :                                                 [ME_TIER_ZERO_PU_8x8_4]) +
   12655           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12656           0 :                                                 [ME_TIER_ZERO_PU_8x8_5]) +
   12657           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12658           0 :                                                 [ME_TIER_ZERO_PU_8x8_6]) +
   12659           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12660           0 :                                                 [ME_TIER_ZERO_PU_8x8_7]) +
   12661           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12662           0 :                                                 [ME_TIER_ZERO_PU_8x8_8]) +
   12663           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12664           0 :                                                 [ME_TIER_ZERO_PU_8x8_9]) +
   12665           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12666           0 :                                                 [ME_TIER_ZERO_PU_8x8_10]) +
   12667           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12668           0 :                                                 [ME_TIER_ZERO_PU_8x8_11]) +
   12669           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12670           0 :                                                 [ME_TIER_ZERO_PU_8x8_12]) +
   12671           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12672           0 :                                                 [ME_TIER_ZERO_PU_8x8_13]) +
   12673           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12674           0 :                                                 [ME_TIER_ZERO_PU_8x8_14]) +
   12675           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12676           0 :                                                 [ME_TIER_ZERO_PU_8x8_15]) +
   12677           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12678           0 :                                                 [ME_TIER_ZERO_PU_8x8_16]) +
   12679           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12680           0 :                                                 [ME_TIER_ZERO_PU_8x8_17]) +
   12681           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12682           0 :                                                 [ME_TIER_ZERO_PU_8x8_18]) +
   12683           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12684           0 :                                                 [ME_TIER_ZERO_PU_8x8_19]) +
   12685           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12686           0 :                                                 [ME_TIER_ZERO_PU_8x8_20]) +
   12687           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12688           0 :                                                 [ME_TIER_ZERO_PU_8x8_21]) +
   12689           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12690           0 :                                                 [ME_TIER_ZERO_PU_8x8_22]) +
   12691           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12692           0 :                                                 [ME_TIER_ZERO_PU_8x8_23]) +
   12693           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12694           0 :                                                 [ME_TIER_ZERO_PU_8x8_24]) +
   12695           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12696           0 :                                                 [ME_TIER_ZERO_PU_8x8_25]) +
   12697           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12698           0 :                                                 [ME_TIER_ZERO_PU_8x8_26]) +
   12699           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12700           0 :                                                 [ME_TIER_ZERO_PU_8x8_27]) +
   12701           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12702           0 :                                                 [ME_TIER_ZERO_PU_8x8_28]) +
   12703           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12704           0 :                                                 [ME_TIER_ZERO_PU_8x8_29]) +
   12705           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12706           0 :                                                 [ME_TIER_ZERO_PU_8x8_30]) +
   12707           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12708           0 :                                                 [ME_TIER_ZERO_PU_8x8_31]) +
   12709           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12710           0 :                                                 [ME_TIER_ZERO_PU_8x8_32]) +
   12711           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12712           0 :                                                 [ME_TIER_ZERO_PU_8x8_33]) +
   12713           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12714           0 :                                                 [ME_TIER_ZERO_PU_8x8_34]) +
   12715           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12716           0 :                                                 [ME_TIER_ZERO_PU_8x8_35]) +
   12717           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12718           0 :                                                 [ME_TIER_ZERO_PU_8x8_36]) +
   12719           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12720           0 :                                                 [ME_TIER_ZERO_PU_8x8_37]) +
   12721           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12722           0 :                                                 [ME_TIER_ZERO_PU_8x8_38]) +
   12723           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12724           0 :                                                 [ME_TIER_ZERO_PU_8x8_39]) +
   12725           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12726           0 :                                                 [ME_TIER_ZERO_PU_8x8_40]) +
   12727           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12728           0 :                                                 [ME_TIER_ZERO_PU_8x8_41]) +
   12729           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12730           0 :                                                 [ME_TIER_ZERO_PU_8x8_42]) +
   12731           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12732           0 :                                                 [ME_TIER_ZERO_PU_8x8_43]) +
   12733           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12734           0 :                                                 [ME_TIER_ZERO_PU_8x8_44]) +
   12735           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12736           0 :                                                 [ME_TIER_ZERO_PU_8x8_45]) +
   12737           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12738           0 :                                                 [ME_TIER_ZERO_PU_8x8_46]) +
   12739           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12740           0 :                                                 [ME_TIER_ZERO_PU_8x8_47]) +
   12741           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12742           0 :                                                 [ME_TIER_ZERO_PU_8x8_48]) +
   12743           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12744           0 :                                                 [ME_TIER_ZERO_PU_8x8_49]) +
   12745           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12746           0 :                                                 [ME_TIER_ZERO_PU_8x8_50]) +
   12747           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12748           0 :                                                 [ME_TIER_ZERO_PU_8x8_51]) +
   12749           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12750           0 :                                                 [ME_TIER_ZERO_PU_8x8_52]) +
   12751           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12752           0 :                                                 [ME_TIER_ZERO_PU_8x8_53]) +
   12753           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12754           0 :                                                 [ME_TIER_ZERO_PU_8x8_54]) +
   12755           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12756           0 :                                                 [ME_TIER_ZERO_PU_8x8_55]) +
   12757           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12758           0 :                                                 [ME_TIER_ZERO_PU_8x8_56]) +
   12759           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12760           0 :                                                 [ME_TIER_ZERO_PU_8x8_57]) +
   12761           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12762           0 :                                                 [ME_TIER_ZERO_PU_8x8_58]) +
   12763           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12764           0 :                                                 [ME_TIER_ZERO_PU_8x8_59]) +
   12765           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12766           0 :                                                 [ME_TIER_ZERO_PU_8x8_60]) +
   12767           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12768           0 :                                                 [ME_TIER_ZERO_PU_8x8_61]) +
   12769           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12770           0 :                                                 [ME_TIER_ZERO_PU_8x8_62]) +
   12771           0 :                  _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12772           0 :                                                 [ME_TIER_ZERO_PU_8x8_63])) >>
   12773             :                 6;
   12774           0 :     avgMvy8x8 = (_MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12775           0 :                                                 [ME_TIER_ZERO_PU_8x8_0]) +
   12776           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12777           0 :                                                 [ME_TIER_ZERO_PU_8x8_1]) +
   12778           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12779           0 :                                                 [ME_TIER_ZERO_PU_8x8_2]) +
   12780           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12781           0 :                                                 [ME_TIER_ZERO_PU_8x8_3]) +
   12782           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12783           0 :                                                 [ME_TIER_ZERO_PU_8x8_4]) +
   12784           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12785           0 :                                                 [ME_TIER_ZERO_PU_8x8_5]) +
   12786           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12787           0 :                                                 [ME_TIER_ZERO_PU_8x8_6]) +
   12788           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12789           0 :                                                 [ME_TIER_ZERO_PU_8x8_7]) +
   12790           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12791           0 :                                                 [ME_TIER_ZERO_PU_8x8_8]) +
   12792           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12793           0 :                                                 [ME_TIER_ZERO_PU_8x8_9]) +
   12794           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12795           0 :                                                 [ME_TIER_ZERO_PU_8x8_10]) +
   12796           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12797           0 :                                                 [ME_TIER_ZERO_PU_8x8_11]) +
   12798           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12799           0 :                                                 [ME_TIER_ZERO_PU_8x8_12]) +
   12800           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12801           0 :                                                 [ME_TIER_ZERO_PU_8x8_13]) +
   12802           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12803           0 :                                                 [ME_TIER_ZERO_PU_8x8_14]) +
   12804           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12805           0 :                                                 [ME_TIER_ZERO_PU_8x8_15]) +
   12806           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12807           0 :                                                 [ME_TIER_ZERO_PU_8x8_16]) +
   12808           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12809           0 :                                                 [ME_TIER_ZERO_PU_8x8_17]) +
   12810           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12811           0 :                                                 [ME_TIER_ZERO_PU_8x8_18]) +
   12812           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12813           0 :                                                 [ME_TIER_ZERO_PU_8x8_19]) +
   12814           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12815           0 :                                                 [ME_TIER_ZERO_PU_8x8_20]) +
   12816           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12817           0 :                                                 [ME_TIER_ZERO_PU_8x8_21]) +
   12818           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12819           0 :                                                 [ME_TIER_ZERO_PU_8x8_22]) +
   12820           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12821           0 :                                                 [ME_TIER_ZERO_PU_8x8_23]) +
   12822           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12823           0 :                                                 [ME_TIER_ZERO_PU_8x8_24]) +
   12824           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12825           0 :                                                 [ME_TIER_ZERO_PU_8x8_25]) +
   12826           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12827           0 :                                                 [ME_TIER_ZERO_PU_8x8_26]) +
   12828           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12829           0 :                                                 [ME_TIER_ZERO_PU_8x8_27]) +
   12830           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12831           0 :                                                 [ME_TIER_ZERO_PU_8x8_28]) +
   12832           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12833           0 :                                                 [ME_TIER_ZERO_PU_8x8_29]) +
   12834           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12835           0 :                                                 [ME_TIER_ZERO_PU_8x8_30]) +
   12836           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12837           0 :                                                 [ME_TIER_ZERO_PU_8x8_31]) +
   12838           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12839           0 :                                                 [ME_TIER_ZERO_PU_8x8_32]) +
   12840           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12841           0 :                                                 [ME_TIER_ZERO_PU_8x8_33]) +
   12842           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12843           0 :                                                 [ME_TIER_ZERO_PU_8x8_34]) +
   12844           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12845           0 :                                                 [ME_TIER_ZERO_PU_8x8_35]) +
   12846           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12847           0 :                                                 [ME_TIER_ZERO_PU_8x8_36]) +
   12848           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12849           0 :                                                 [ME_TIER_ZERO_PU_8x8_37]) +
   12850           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12851           0 :                                                 [ME_TIER_ZERO_PU_8x8_38]) +
   12852           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12853           0 :                                                 [ME_TIER_ZERO_PU_8x8_39]) +
   12854           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12855           0 :                                                 [ME_TIER_ZERO_PU_8x8_40]) +
   12856           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12857           0 :                                                 [ME_TIER_ZERO_PU_8x8_41]) +
   12858           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12859           0 :                                                 [ME_TIER_ZERO_PU_8x8_42]) +
   12860           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12861           0 :                                                 [ME_TIER_ZERO_PU_8x8_43]) +
   12862           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12863           0 :                                                 [ME_TIER_ZERO_PU_8x8_44]) +
   12864           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12865           0 :                                                 [ME_TIER_ZERO_PU_8x8_45]) +
   12866           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12867           0 :                                                 [ME_TIER_ZERO_PU_8x8_46]) +
   12868           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12869           0 :                                                 [ME_TIER_ZERO_PU_8x8_47]) +
   12870           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12871           0 :                                                 [ME_TIER_ZERO_PU_8x8_48]) +
   12872           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12873           0 :                                                 [ME_TIER_ZERO_PU_8x8_49]) +
   12874           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12875           0 :                                                 [ME_TIER_ZERO_PU_8x8_50]) +
   12876           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12877           0 :                                                 [ME_TIER_ZERO_PU_8x8_51]) +
   12878           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12879           0 :                                                 [ME_TIER_ZERO_PU_8x8_52]) +
   12880           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12881           0 :                                                 [ME_TIER_ZERO_PU_8x8_53]) +
   12882           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12883           0 :                                                 [ME_TIER_ZERO_PU_8x8_54]) +
   12884           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12885           0 :                                                 [ME_TIER_ZERO_PU_8x8_55]) +
   12886           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12887           0 :                                                 [ME_TIER_ZERO_PU_8x8_56]) +
   12888           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12889           0 :                                                 [ME_TIER_ZERO_PU_8x8_57]) +
   12890           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12891           0 :                                                 [ME_TIER_ZERO_PU_8x8_58]) +
   12892           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12893           0 :                                                 [ME_TIER_ZERO_PU_8x8_59]) +
   12894           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12895           0 :                                                 [ME_TIER_ZERO_PU_8x8_60]) +
   12896           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12897           0 :                                                 [ME_TIER_ZERO_PU_8x8_61]) +
   12898           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12899           0 :                                                 [ME_TIER_ZERO_PU_8x8_62]) +
   12900           0 :                  _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
   12901           0 :                                                 [ME_TIER_ZERO_PU_8x8_63])) >>
   12902             :                 6;
   12903           0 :     mvMag8x8 = SQR(avgMvx8x8) + SQR(avgMvy8x8);
   12904             : 
   12905           0 :     avgSad32x32 =
   12906             :         (context_ptr
   12907           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_32x32_0] +
   12908             :          context_ptr
   12909           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_32x32_1] +
   12910             :          context_ptr
   12911           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_32x32_2] +
   12912             :          context_ptr->p_sb_best_sad[listIndex][refPicIndex]
   12913           0 :                                    [ME_TIER_ZERO_PU_32x32_3]) >>
   12914             :         2;
   12915           0 :     avgSad16x16 =
   12916             :         (context_ptr
   12917           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_0] +
   12918             :          context_ptr
   12919           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_1] +
   12920             :          context_ptr
   12921           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_2] +
   12922             :          context_ptr
   12923           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_3] +
   12924             :          context_ptr
   12925           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_4] +
   12926             :          context_ptr
   12927           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_5] +
   12928             :          context_ptr
   12929           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_6] +
   12930             :          context_ptr
   12931           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_7] +
   12932             :          context_ptr
   12933           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_8] +
   12934             :          context_ptr
   12935           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_9] +
   12936             :          context_ptr
   12937           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_10] +
   12938             :          context_ptr
   12939           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_11] +
   12940             :          context_ptr
   12941           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_12] +
   12942             :          context_ptr
   12943           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_13] +
   12944             :          context_ptr
   12945           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_14] +
   12946             :          context_ptr->p_sb_best_sad[listIndex][refPicIndex]
   12947           0 :                                    [ME_TIER_ZERO_PU_16x16_15]) >>
   12948             :         4;
   12949           0 :     avgSad8x8 =
   12950             :         (context_ptr
   12951           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_0] +
   12952             :          context_ptr
   12953           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_1] +
   12954             :          context_ptr
   12955           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_2] +
   12956             :          context_ptr
   12957           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_3] +
   12958             :          context_ptr
   12959           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_4] +
   12960             :          context_ptr
   12961           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_5] +
   12962             :          context_ptr
   12963           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_6] +
   12964             :          context_ptr
   12965           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_7] +
   12966             :          context_ptr
   12967           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_8] +
   12968             :          context_ptr
   12969           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_9] +
   12970             :          context_ptr
   12971           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_10] +
   12972             :          context_ptr
   12973           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_11] +
   12974             :          context_ptr
   12975           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_12] +
   12976             :          context_ptr
   12977           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_13] +
   12978             :          context_ptr
   12979           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_14] +
   12980             :          context_ptr
   12981           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_15] +
   12982             :          context_ptr
   12983           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_16] +
   12984             :          context_ptr
   12985           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_17] +
   12986             :          context_ptr
   12987           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_18] +
   12988             :          context_ptr
   12989           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_19] +
   12990             :          context_ptr
   12991           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_20] +
   12992             :          context_ptr
   12993           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_21] +
   12994             :          context_ptr
   12995           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_22] +
   12996             :          context_ptr
   12997           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_23] +
   12998             :          context_ptr
   12999           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_24] +
   13000             :          context_ptr
   13001           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_25] +
   13002             :          context_ptr
   13003           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_26] +
   13004             :          context_ptr
   13005           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_27] +
   13006             :          context_ptr
   13007           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_28] +
   13008             :          context_ptr
   13009           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_29] +
   13010             :          context_ptr
   13011           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_30] +
   13012             :          context_ptr
   13013           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_31] +
   13014             :          context_ptr
   13015           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_32] +
   13016             :          context_ptr
   13017           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_33] +
   13018             :          context_ptr
   13019           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_34] +
   13020             :          context_ptr
   13021           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_35] +
   13022             :          context_ptr
   13023           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_36] +
   13024             :          context_ptr
   13025           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_37] +
   13026             :          context_ptr
   13027           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_38] +
   13028             :          context_ptr
   13029           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_39] +
   13030             :          context_ptr
   13031           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_40] +
   13032             :          context_ptr
   13033           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_41] +
   13034             :          context_ptr
   13035           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_42] +
   13036             :          context_ptr
   13037           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_43] +
   13038             :          context_ptr
   13039           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_44] +
   13040             :          context_ptr
   13041           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_45] +
   13042             :          context_ptr
   13043           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_46] +
   13044             :          context_ptr
   13045           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_47] +
   13046             :          context_ptr
   13047           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_48] +
   13048             :          context_ptr
   13049           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_49] +
   13050             :          context_ptr
   13051           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_50] +
   13052             :          context_ptr
   13053           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_51] +
   13054             :          context_ptr
   13055           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_52] +
   13056             :          context_ptr
   13057           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_53] +
   13058             :          context_ptr
   13059           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_54] +
   13060             :          context_ptr
   13061           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_55] +
   13062             :          context_ptr
   13063           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_56] +
   13064             :          context_ptr
   13065           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_57] +
   13066             :          context_ptr
   13067           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_58] +
   13068             :          context_ptr
   13069           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_59] +
   13070             :          context_ptr
   13071           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_60] +
   13072             :          context_ptr
   13073           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_61] +
   13074             :          context_ptr
   13075           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_62] +
   13076             :          context_ptr
   13077           0 :              ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_63]) >>
   13078             :         6;
   13079             : 
   13080           0 :     if (picture_control_set_ptr->temporal_layer_index == 0) {
   13081             :         // 32x32
   13082           0 :         if ((mvMag32x32 < SQR(48)) && (avgSad32x32 < 32 * 32 * 6))
   13083           0 :             *enableHalfPel32x32 = EB_TRUE;  // CLASS_0
   13084           0 :         else if ((mvMag32x32 < SQR(48)) && !(avgSad32x32 < 32 * 32 * 6))
   13085           0 :             *enableHalfPel32x32 = EB_FALSE;  // CLASS_1
   13086           0 :         else if (!(mvMag32x32 < SQR(48)) && (avgSad32x32 < 32 * 32 * 6))
   13087           0 :             *enableHalfPel32x32 = EB_TRUE;  // CLASS_2
   13088             :         else
   13089           0 :             *enableHalfPel32x32 = EB_FALSE;  // CLASS_3
   13090             :         // 16x16
   13091           0 :         if ((mvMag16x16 < SQR(48)) && (avgSad16x16 < 16 * 16 * 2))
   13092           0 :             *enableHalfPel16x16 = EB_FALSE;  // CLASS_0
   13093           0 :         else if ((mvMag16x16 < SQR(48)) && !(avgSad16x16 < 16 * 16 * 2))
   13094           0 :             *enableHalfPel16x16 = EB_TRUE;  // CLASS_1
   13095           0 :         else if (!(mvMag16x16 < SQR(48)) && (avgSad16x16 < 16 * 16 * 2))
   13096           0 :             *enableHalfPel16x16 = EB_FALSE;  // CLASS_2
   13097             :         else
   13098           0 :             *enableHalfPel16x16 = EB_TRUE;  // CLASS_3
   13099             :         // 8x8
   13100           0 :         if ((mvMag8x8 < SQR(48)) && (avgSad8x8 < 8 * 8 * 2))
   13101           0 :             *enableHalfPel8x8 = EB_FALSE;  // CLASS_0
   13102           0 :         else if ((mvMag8x8 < SQR(48)) && !(avgSad8x8 < 8 * 8 * 2))
   13103           0 :             *enableHalfPel8x8 = EB_TRUE;  // CLASS_1
   13104           0 :         else if (!(mvMag8x8 < SQR(48)) && (avgSad8x8 < 8 * 8 * 2))
   13105           0 :             *enableHalfPel8x8 = EB_FALSE;  // CLASS_2
   13106             :         else
   13107           0 :             *enableHalfPel8x8 = EB_TRUE;  // CLASS_3
   13108             :     }
   13109             : 
   13110           0 :     else if (picture_control_set_ptr->temporal_layer_index == 1) {
   13111             :         // 32x32
   13112           0 :         if ((mvMag32x32 < SQR(32)) && (avgSad32x32 < 32 * 32 * 6))
   13113           0 :             *enableHalfPel32x32 = EB_TRUE;  // CLASS_0
   13114           0 :         else if ((mvMag32x32 < SQR(32)) && !(avgSad32x32 < 32 * 32 * 6))
   13115           0 :             *enableHalfPel32x32 = EB_FALSE;  // CLASS_1
   13116           0 :         else if (!(mvMag32x32 < SQR(32)) && (avgSad32x32 < 32 * 32 * 6))
   13117           0 :             *enableHalfPel32x32 = EB_TRUE;  // CLASS_2
   13118             :         else
   13119           0 :             *enableHalfPel32x32 = EB_TRUE;  // CLASS_3
   13120             :         // 16x16
   13121           0 :         if ((mvMag16x16 < SQR(32)) && (avgSad16x16 < 16 * 16 * 2))
   13122           0 :             *enableHalfPel16x16 = EB_FALSE;  // CLASS_0
   13123           0 :         else if ((mvMag16x16 < SQR(32)) && !(avgSad16x16 < 16 * 16 * 2))
   13124           0 :             *enableHalfPel16x16 = EB_TRUE;  // CLASS_1
   13125           0 :         else if (!(mvMag16x16 < SQR(32)) && (avgSad16x16 < 16 * 16 * 2))
   13126           0 :             *enableHalfPel16x16 = EB_FALSE;  // CLASS_2
   13127             :         else
   13128           0 :             *enableHalfPel16x16 = EB_TRUE;  // CLASS_3
   13129             :         // 8x8
   13130           0 :         if ((mvMag8x8 < SQR(32)) && (avgSad8x8 < 8 * 8 * 2))
   13131           0 :             *enableHalfPel8x8 = EB_FALSE;  // CLASS_0
   13132           0 :         else if ((mvMag8x8 < SQR(32)) && !(avgSad8x8 < 8 * 8 * 2))
   13133           0 :             *enableHalfPel8x8 = EB_TRUE;  // CLASS_1
   13134           0 :         else if (!(mvMag8x8 < SQR(32)) && (avgSad8x8 < 8 * 8 * 2))
   13135           0 :             *enableHalfPel8x8 = EB_FALSE;  // CLASS_2
   13136             :         else
   13137           0 :             *enableHalfPel8x8 = EB_TRUE;  // CLASS_3
   13138           0 :     } else if (picture_control_set_ptr->temporal_layer_index == 2) {
   13139             :         // 32x32
   13140           0 :         if ((mvMag32x32 < SQR(80)) && (avgSad32x32 < 32 * 32 * 6))
   13141           0 :             *enableHalfPel32x32 = EB_TRUE;  // CLASS_0
   13142           0 :         else if ((mvMag32x32 < SQR(80)) && !(avgSad32x32 < 32 * 32 * 6))
   13143           0 :             *enableHalfPel32x32 = EB_FALSE;  // CLASS_1
   13144           0 :         else if (!(mvMag32x32 < SQR(80)) && (avgSad32x32 < 32 * 32 * 6))
   13145           0 :             *enableHalfPel32x32 = EB_TRUE;  // CLASS_2
   13146             :         else
   13147           0 :             *enableHalfPel32x32 = EB_FALSE;  // CLASS_3
   13148             :         // 16x16
   13149           0 :         if ((mvMag16x16 < SQR(80)) && (avgSad16x16 < 16 * 16 * 2))
   13150           0 :             *enableHalfPel16x16 = EB_FALSE;  // CLASS_0
   13151           0 :         else if ((mvMag16x16 < SQR(80)) && !(avgSad16x16 < 16 * 16 * 2))
   13152           0 :             *enableHalfPel16x16 = EB_TRUE;  // CLASS_1
   13153           0 :         else if (!(mvMag16x16 < SQR(80)) && (avgSad16x16 < 16 * 16 * 2))
   13154           0 :             *enableHalfPel16x16 = EB_FALSE;  // CLASS_2
   13155             :         else
   13156           0 :             *enableHalfPel16x16 = EB_TRUE;  // CLASS_3
   13157             :         // 8x8
   13158           0 :         if ((mvMag8x8 < SQR(80)) && (avgSad8x8 < 8 * 8 * 2))
   13159           0 :             *enableHalfPel8x8 = EB_FALSE;  // CLASS_0
   13160           0 :         else if ((mvMag8x8 < SQR(80)) && !(avgSad8x8 < 8 * 8 * 2))
   13161           0 :             *enableHalfPel8x8 = EB_TRUE;  // CLASS_1
   13162           0 :         else if (!(mvMag8x8 < SQR(80)) && (avgSad8x8 < 8 * 8 * 2))
   13163           0 :             *enableHalfPel8x8 = EB_FALSE;  // CLASS_2
   13164             :         else
   13165           0 :             *enableHalfPel8x8 = EB_TRUE;  // CLASS_3
   13166             :     } else {
   13167             :         // 32x32
   13168           0 :         if ((mvMag32x32 < SQR(48)) && (avgSad32x32 < 32 * 32 * 6))
   13169           0 :             *enableHalfPel32x32 = EB_TRUE;  // CLASS_0
   13170           0 :         else if ((mvMag32x32 < SQR(48)) && !(avgSad32x32 < 32 * 32 * 6))
   13171           0 :             *enableHalfPel32x32 = EB_TRUE;  // CLASS_1
   13172           0 :         else if (!(mvMag32x32 < SQR(48)) && (avgSad32x32 < 32 * 32 * 6))
   13173           0 :             *enableHalfPel32x32 = EB_TRUE;  // CLASS_2
   13174             :         else
   13175           0 :             *enableHalfPel32x32 = EB_FALSE;  // CLASS_3
   13176             :         // 16x16
   13177           0 :         if ((mvMag16x16 < SQR(48)) && (avgSad16x16 < 16 * 16 * 2))
   13178           0 :             *enableHalfPel16x16 = EB_FALSE;  // CLASS_0
   13179           0 :         else if ((mvMag16x16 < SQR(48)) && !(avgSad16x16 < 16 * 16 * 2))
   13180           0 :             *enableHalfPel16x16 = EB_TRUE;  // CLASS_1
   13181           0 :         else if (!(mvMag16x16 < SQR(48)) && (avgSad16x16 < 16 * 16 * 2))
   13182           0 :             *enableHalfPel16x16 = EB_FALSE;  // CLASS_2
   13183             :         else
   13184           0 :             *enableHalfPel16x16 = EB_TRUE;  // CLASS_3
   13185             :         // 8x8
   13186           0 :         if ((mvMag8x8 < SQR(48)) && (avgSad8x8 < 8 * 8 * 2))
   13187           0 :             *enableHalfPel8x8 = EB_FALSE;  // CLASS_0
   13188           0 :         else if ((mvMag8x8 < SQR(48)) && !(avgSad8x8 < 8 * 8 * 2))
   13189           0 :             *enableHalfPel8x8 = EB_TRUE;  // CLASS_1
   13190           0 :         else if (!(mvMag8x8 < SQR(48)) && (avgSad8x8 < 8 * 8 * 2))
   13191           0 :             *enableHalfPel8x8 = EB_FALSE;  // CLASS_2
   13192             :         else
   13193           0 :             *enableHalfPel8x8 = EB_FALSE;  // EB_TRUE; //CLASS_3
   13194             :     }
   13195             : 
   13196           0 :     return return_error;
   13197             : }
   13198             : 
   13199           0 : static void hme_mv_center_check(EbPictureBufferDesc *ref_pic_ptr,
   13200             :                                 MeContext *context_ptr, int16_t *xsc,
   13201             :                                 int16_t *ysc, uint32_t list_index,
   13202             :                                 int16_t origin_x, int16_t origin_y,
   13203             :                                 uint32_t sb_width, uint32_t sb_height)
   13204             : {
   13205             :     // Search for (-srx/2, 0),  (+srx/2, 0),  (0, -sry/2), (0, +sry/2),
   13206             :     /*
   13207             :     |------------C-------------|
   13208             :     |--------------------------|
   13209             :     |--------------------------|
   13210             :     A            0             B
   13211             :     |--------------------------|
   13212             :     |--------------------------|
   13213             :     |------------D-------------|
   13214             :     */
   13215             :     uint32_t search_region_index;
   13216           0 :     int16_t search_center_x = *xsc;
   13217           0 :     int16_t search_center_y = *ysc;
   13218             :     uint64_t best_cost;
   13219           0 :     uint64_t direct_mv_cost = 0xFFFFFFFFFFFFF;
   13220           0 :     uint8_t sparce_scale = 1;
   13221           0 :     int16_t pad_width = (int16_t)BLOCK_SIZE_64 - 1;
   13222           0 :     int16_t pad_height = (int16_t)BLOCK_SIZE_64 - 1;
   13223             :     // O pos
   13224             : 
   13225           0 :     search_region_index =
   13226           0 :         (int16_t)ref_pic_ptr->origin_x + origin_x +
   13227           0 :         ((int16_t)ref_pic_ptr->origin_y + origin_y) * ref_pic_ptr->stride_y;
   13228             : 
   13229           0 :     uint32_t sub_sampled_sad = 1;
   13230           0 :     uint64_t zero_mv_sad = nxm_sad_kernel(
   13231           0 :             context_ptr->sb_src_ptr,
   13232           0 :             context_ptr->sb_src_stride << sub_sampled_sad,
   13233           0 :             &(ref_pic_ptr->buffer_y[search_region_index]),
   13234           0 :             ref_pic_ptr->stride_y << sub_sampled_sad,
   13235             :             sb_height >> sub_sampled_sad,
   13236             :             sb_width);
   13237             : 
   13238           0 :     zero_mv_sad = zero_mv_sad << sub_sampled_sad;
   13239             : 
   13240           0 :     uint64_t zero_mv_cost = zero_mv_sad << COST_PRECISION;
   13241             : 
   13242             :     // A pos
   13243           0 :     search_center_x =
   13244           0 :         0 - (context_ptr->hme_level0_total_search_area_width * sparce_scale);
   13245           0 :     search_center_y = 0;
   13246             : 
   13247             :     // Correct the left edge of the Search Area if it is not on the reference
   13248             :     // Picture
   13249           0 :     search_center_x = ((origin_x + search_center_x) < -pad_width)
   13250           0 :                           ? -pad_width - origin_x
   13251             :                           : search_center_x;
   13252             :     // Correct the right edge of the Search Area if its not on the reference
   13253             :     // Picture
   13254           0 :     search_center_x =
   13255           0 :         ((origin_x + search_center_x) > (int16_t)ref_pic_ptr->width - 1)
   13256           0 :             ? search_center_x - ((origin_x + search_center_x) -
   13257           0 :                                  ((int16_t)ref_pic_ptr->width - 1))
   13258             :             : search_center_x;
   13259             :     // Correct the top edge of the Search Area if it is not on the reference
   13260             :     // Picture
   13261           0 :     search_center_y = ((origin_y + search_center_y) < -pad_height)
   13262           0 :                           ? -pad_height - origin_y
   13263             :                           : search_center_y;
   13264             : 
   13265             :     // Correct the bottom edge of the Search Area if its not on the reference
   13266             :     // Picture
   13267           0 :     search_center_y =
   13268           0 :         ((origin_y + search_center_y) > (int16_t)ref_pic_ptr->height - 1)
   13269           0 :             ? search_center_y - ((origin_y + search_center_y) -
   13270           0 :                                  ((int16_t)ref_pic_ptr->height - 1))
   13271             :             : search_center_y;
   13272             : 
   13273           0 :     uint64_t mv_a_sad = nxm_sad_kernel(
   13274           0 :         context_ptr->sb_src_ptr,
   13275           0 :         context_ptr->sb_src_stride << sub_sampled_sad,
   13276           0 :         &(ref_pic_ptr->buffer_y[search_region_index]),
   13277           0 :         ref_pic_ptr->stride_y << sub_sampled_sad,
   13278             :         sb_height >> sub_sampled_sad,
   13279             :         sb_width);
   13280             : 
   13281           0 :     mv_a_sad = mv_a_sad << sub_sampled_sad;
   13282             : 
   13283           0 :     uint64_t mv_a_cost = mv_a_sad << COST_PRECISION;
   13284             : 
   13285             :     // B pos
   13286           0 :     search_center_x =
   13287           0 :         (context_ptr->hme_level0_total_search_area_width * sparce_scale);
   13288           0 :     search_center_y = 0;
   13289             :     ///////////////// correct
   13290             :     // Correct the left edge of the Search Area if it is not on the reference
   13291             :     // Picture
   13292           0 :     search_center_x = ((origin_x + search_center_x) < -pad_width)
   13293           0 :                           ? -pad_width - origin_x
   13294             :                           : search_center_x;
   13295             :     // Correct the right edge of the Search Area if its not on the reference
   13296             :     // Picture
   13297           0 :     search_center_x =
   13298           0 :         ((origin_x + search_center_x) > (int16_t)ref_pic_ptr->width - 1)
   13299           0 :             ? search_center_x - ((origin_x + search_center_x) -
   13300           0 :                                  ((int16_t)ref_pic_ptr->width - 1))
   13301             :             : search_center_x;
   13302             :     // Correct the top edge of the Search Area if it is not on the reference
   13303             :     // Picture
   13304           0 :     search_center_y = ((origin_y + search_center_y) < -pad_height)
   13305           0 :                           ? -pad_height - origin_y
   13306             :                           : search_center_y;
   13307             :     // Correct the bottom edge of the Search Area if its not on the reference
   13308             :     // Picture
   13309           0 :     search_center_y =
   13310           0 :         ((origin_y + search_center_y) > (int16_t)ref_pic_ptr->height - 1)
   13311           0 :             ? search_center_y - ((origin_y + search_center_y) -
   13312           0 :                                  ((int16_t)ref_pic_ptr->height - 1))
   13313             :             : search_center_y;
   13314             : 
   13315           0 :     search_region_index =
   13316           0 :         (int16_t)(ref_pic_ptr->origin_x + origin_x) + search_center_x +
   13317           0 :         ((int16_t)(ref_pic_ptr->origin_y + origin_y) + search_center_y) *
   13318           0 :             ref_pic_ptr->stride_y;
   13319             : 
   13320           0 :     uint64_t mv_b_sad = nxm_sad_kernel(
   13321           0 :         context_ptr->sb_src_ptr,
   13322           0 :         context_ptr->sb_src_stride << sub_sampled_sad,
   13323           0 :         &(ref_pic_ptr->buffer_y[search_region_index]),
   13324           0 :         ref_pic_ptr->stride_y << sub_sampled_sad,
   13325             :         sb_height >> sub_sampled_sad,
   13326             :         sb_width);
   13327             : 
   13328           0 :     mv_b_sad = mv_b_sad << sub_sampled_sad;
   13329             : 
   13330           0 :     uint64_t mv_b_cost = mv_b_sad << COST_PRECISION;
   13331             :     // C pos
   13332           0 :     search_center_x = 0;
   13333           0 :     search_center_y =
   13334           0 :         0 - (context_ptr->hme_level0_total_search_area_height * sparce_scale);
   13335             :     ///////////////// correct
   13336             :     // Correct the left edge of the Search Area if it is not on the reference
   13337             :     // Picture
   13338           0 :     search_center_x = ((origin_x + search_center_x) < -pad_width)
   13339           0 :                           ? -pad_width - origin_x
   13340             :                           : search_center_x;
   13341             : 
   13342             :     // Correct the right edge of the Search Area if its not on the reference
   13343             :     // Picture
   13344           0 :     search_center_x =
   13345           0 :         ((origin_x + search_center_x) > (int16_t)ref_pic_ptr->width - 1)
   13346           0 :             ? search_center_x - ((origin_x + search_center_x) -
   13347           0 :                                  ((int16_t)ref_pic_ptr->width - 1))
   13348             :             : search_center_x;
   13349             : 
   13350             :     // Correct the top edge of the Search Area if it is not on the reference
   13351             :     // Picture
   13352           0 :     search_center_y = ((origin_y + search_center_y) < -pad_height)
   13353           0 :                           ? -pad_height - origin_y
   13354             :                           : search_center_y;
   13355             : 
   13356             :     // Correct the bottom edge of the Search Area if its not on the reference
   13357             :     // Picture
   13358           0 :     search_center_y =
   13359           0 :         ((origin_y + search_center_y) > (int16_t)ref_pic_ptr->height - 1)
   13360           0 :             ? search_center_y - ((origin_y + search_center_y) -
   13361           0 :                                  ((int16_t)ref_pic_ptr->height - 1))
   13362             :             : search_center_y;
   13363             : 
   13364           0 :     search_region_index =
   13365           0 :         (int16_t)(ref_pic_ptr->origin_x + origin_x) + search_center_x +
   13366           0 :         ((int16_t)(ref_pic_ptr->origin_y + origin_y) + search_center_y) *
   13367           0 :             ref_pic_ptr->stride_y;
   13368             : 
   13369           0 :     uint64_t mv_c_sad = nxm_sad_kernel(
   13370           0 :         context_ptr->sb_src_ptr,
   13371           0 :         context_ptr->sb_src_stride << sub_sampled_sad,
   13372           0 :         &(ref_pic_ptr->buffer_y[search_region_index]),
   13373           0 :         ref_pic_ptr->stride_y << sub_sampled_sad,
   13374             :         sb_height >> sub_sampled_sad,
   13375             :         sb_width);
   13376             : 
   13377           0 :     mv_c_sad = mv_c_sad << sub_sampled_sad;
   13378             : 
   13379           0 :     uint64_t mv_c_cost = mv_c_sad << COST_PRECISION;
   13380             : 
   13381             :     // D pos
   13382           0 :     search_center_x = 0;
   13383           0 :     search_center_y =
   13384           0 :         (context_ptr->hme_level0_total_search_area_height * sparce_scale);
   13385             :     // Correct the left edge of the Search Area if it is not on the reference
   13386             :     // Picture
   13387           0 :     search_center_x = ((origin_x + search_center_x) < -pad_width)
   13388           0 :                           ? -pad_width - origin_x
   13389             :                           : search_center_x;
   13390             :     // Correct the right edge of the Search Area if its not on the reference
   13391             :     // Picture
   13392           0 :     search_center_x =
   13393           0 :         ((origin_x + search_center_x) > (int16_t)ref_pic_ptr->width - 1)
   13394           0 :             ? search_center_x - ((origin_x + search_center_x) -
   13395           0 :                                  ((int16_t)ref_pic_ptr->width - 1))
   13396             :             : search_center_x;
   13397             :     // Correct the top edge of the Search Area if it is not on the reference
   13398             :     // Picture
   13399           0 :     search_center_y = ((origin_y + search_center_y) < -pad_height)
   13400           0 :                           ? -pad_height - origin_y
   13401             :                           : search_center_y;
   13402             :     // Correct the bottom edge of the Search Area if its not on the reference
   13403             :     // Picture
   13404           0 :     search_center_y =
   13405           0 :         ((origin_y + search_center_y) > (int16_t)ref_pic_ptr->height - 1)
   13406           0 :             ? search_center_y - ((origin_y + search_center_y) -
   13407           0 :                                  ((int16_t)ref_pic_ptr->height - 1))
   13408             :             : search_center_y;
   13409           0 :     search_region_index =
   13410           0 :         (int16_t)(ref_pic_ptr->origin_x + origin_x) + search_center_x +
   13411           0 :         ((int16_t)(ref_pic_ptr->origin_y + origin_y) + search_center_y) *
   13412           0 :             ref_pic_ptr->stride_y;
   13413           0 :     uint64_t mv_d_sad = nxm_sad_kernel(
   13414           0 :         context_ptr->sb_src_ptr,
   13415           0 :         context_ptr->sb_src_stride << sub_sampled_sad,
   13416           0 :         &(ref_pic_ptr->buffer_y[search_region_index]),
   13417           0 :         ref_pic_ptr->stride_y << sub_sampled_sad,
   13418             :         sb_height >> sub_sampled_sad,
   13419             :         sb_width);
   13420             : 
   13421           0 :     mv_d_sad = mv_d_sad << sub_sampled_sad;
   13422             : 
   13423           0 :     uint64_t mv_d_cost = mv_d_sad << COST_PRECISION;
   13424             : 
   13425           0 :     if (list_index == 1) {
   13426           0 :         search_center_x =
   13427           0 :             list_index ? 0 - (_MVXT(context_ptr->p_sb_best_mv[0][0][0]) >> 2)
   13428             :                        : 0;
   13429           0 :         search_center_y =
   13430           0 :             list_index ? 0 - (_MVYT(context_ptr->p_sb_best_mv[0][0][0]) >> 2)
   13431             :                        : 0;
   13432             :         ///////////////// correct
   13433             :         // Correct the left edge of the Search Area if it is not on the
   13434             :         // reference Picture
   13435           0 :         search_center_x = ((origin_x + search_center_x) < -pad_width)
   13436           0 :                               ? -pad_width - origin_x
   13437             :                               : search_center_x;
   13438             :         // Correct the right edge of the Search Area if its not on the reference
   13439             :         // Picture
   13440           0 :         search_center_x =
   13441           0 :             ((origin_x + search_center_x) > (int16_t)ref_pic_ptr->width - 1)
   13442           0 :                 ? search_center_x - ((origin_x + search_center_x) -
   13443           0 :                                      ((int16_t)ref_pic_ptr->width - 1))
   13444             :                 : search_center_x;
   13445             :         // Correct the top edge of the Search Area if it is not on the reference
   13446             :         // Picture
   13447           0 :         search_center_y = ((origin_y + search_center_y) < -pad_height)
   13448           0 :                               ? -pad_height - origin_y
   13449             :                               : search_center_y;
   13450             :         // Correct the bottom edge of the Search Area if its not on the
   13451             :         // reference Picture
   13452           0 :         search_center_y =
   13453           0 :             ((origin_y + search_center_y) > (int16_t)ref_pic_ptr->height - 1)
   13454           0 :                 ? search_center_y - ((origin_y + search_center_y) -
   13455           0 :                                      ((int16_t)ref_pic_ptr->height - 1))
   13456             :                 : search_center_y;
   13457             : 
   13458           0 :         search_region_index =
   13459           0 :             (int16_t)(ref_pic_ptr->origin_x + origin_x) + search_center_x +
   13460           0 :             ((int16_t)(ref_pic_ptr->origin_y + origin_y) + search_center_y) *
   13461           0 :                 ref_pic_ptr->stride_y;
   13462             : 
   13463           0 :         uint64_t direct_mv_sad = nxm_sad_kernel(
   13464           0 :                 context_ptr->sb_src_ptr,
   13465           0 :                 context_ptr->sb_src_stride << sub_sampled_sad,
   13466           0 :                 &(ref_pic_ptr->buffer_y[search_region_index]),
   13467           0 :                 ref_pic_ptr->stride_y << sub_sampled_sad,
   13468             :                 sb_height >> sub_sampled_sad,
   13469             :                 sb_width);
   13470             : 
   13471           0 :         direct_mv_sad = direct_mv_sad << sub_sampled_sad;
   13472             : 
   13473           0 :         direct_mv_cost = (direct_mv_sad << COST_PRECISION);
   13474             :     }
   13475             : 
   13476           0 :     best_cost = MIN(
   13477             :         zero_mv_cost,
   13478             :         MIN(mv_a_cost,
   13479             :             MIN(mv_b_cost, MIN(mv_c_cost, MIN(mv_d_cost, direct_mv_cost)))));
   13480             : 
   13481           0 :     if (best_cost == zero_mv_cost) {
   13482           0 :         search_center_x = 0;
   13483           0 :         search_center_y = 0;
   13484           0 :     } else if (best_cost == mv_a_cost) {
   13485           0 :         search_center_x = 0 - (context_ptr->hme_level0_total_search_area_width *
   13486             :                                sparce_scale);
   13487           0 :         search_center_y = 0;
   13488           0 :     } else if (best_cost == mv_b_cost) {
   13489           0 :         search_center_x =
   13490           0 :             (context_ptr->hme_level0_total_search_area_width * sparce_scale);
   13491           0 :         search_center_y = 0;
   13492           0 :     } else if (best_cost == mv_c_cost) {
   13493           0 :         search_center_x = 0;
   13494           0 :         search_center_y =
   13495             :             0 -
   13496           0 :             (context_ptr->hme_level0_total_search_area_height * sparce_scale);
   13497           0 :     } else if (best_cost == direct_mv_cost) {
   13498           0 :         search_center_x =
   13499           0 :             list_index ? 0 - (_MVXT(context_ptr->p_sb_best_mv[0][0][0]) >> 2)
   13500             :                        : 0;
   13501           0 :         search_center_y =
   13502           0 :             list_index ? 0 - (_MVYT(context_ptr->p_sb_best_mv[0][0][0]) >> 2)
   13503             :                        : 0;
   13504           0 :     } else if (best_cost == mv_d_cost) {
   13505           0 :         search_center_x = 0;
   13506           0 :         search_center_y =
   13507           0 :             (context_ptr->hme_level0_total_search_area_height * sparce_scale);
   13508             :     }
   13509             : 
   13510             :     else
   13511           0 :         SVT_LOG("error no center selected");
   13512           0 :     *xsc = search_center_x;
   13513           0 :     *ysc = search_center_y;
   13514           0 : }
   13515             : 
   13516           0 : void SwapMeCandidate(MePredUnit *a, MePredUnit *b) {
   13517             :     MePredUnit tempPtr;
   13518           0 :     tempPtr = *a;
   13519           0 :     *a = *b;
   13520           0 :     *b = tempPtr;
   13521           0 : }
   13522             : 
   13523             : /*******************************************
   13524             :  * motion_estimate_lcu
   13525             :  *   performs ME (LCU)
   13526             :  *******************************************/
   13527           0 : EbErrorType motion_estimate_lcu(
   13528             :         PictureParentControlSet   *picture_control_set_ptr,  // input parameter, Picture Control Set Ptr
   13529             :         uint32_t                   sb_index,              // input parameter, SB Index
   13530             :         uint32_t                   sb_origin_x,            // input parameter, SB Origin X
   13531             :         uint32_t                   sb_origin_y,            // input parameter, SB Origin X
   13532             :         MeContext                 *context_ptr,                        // input parameter, ME Context Ptr, used to store decimated/interpolated LCU/SR
   13533             :         EbPictureBufferDesc       *input_ptr)              // input parameter, source Picture Ptr
   13534             : 
   13535             : {
   13536           0 :     EbErrorType return_error = EB_ErrorNone;
   13537             : 
   13538           0 :     SequenceControlSet *sequence_control_set_ptr =
   13539             :         (SequenceControlSet *)picture_control_set_ptr
   13540           0 :             ->sequence_control_set_wrapper_ptr->object_ptr;
   13541             : 
   13542             :     int16_t xTopLeftSearchRegion;
   13543             :     int16_t yTopLeftSearchRegion;
   13544             :     uint32_t searchRegionIndex;
   13545             : 
   13546           0 :     int16_t picture_width =
   13547             :         (int16_t)((SequenceControlSet *)picture_control_set_ptr
   13548           0 :                       ->sequence_control_set_wrapper_ptr->object_ptr)
   13549           0 :             ->seq_header.max_frame_width;
   13550           0 :     int16_t picture_height =
   13551             :         (int16_t)((SequenceControlSet *)picture_control_set_ptr
   13552           0 :                       ->sequence_control_set_wrapper_ptr->object_ptr)
   13553           0 :             ->seq_header.max_frame_height;
   13554           0 :     uint32_t sb_width = (input_ptr->width - sb_origin_x) < BLOCK_SIZE_64
   13555             :                             ? input_ptr->width - sb_origin_x
   13556             :                             : BLOCK_SIZE_64;
   13557           0 :     uint32_t sb_height = (input_ptr->height - sb_origin_y) < BLOCK_SIZE_64
   13558             :                              ? input_ptr->height - sb_origin_y
   13559             :                              : BLOCK_SIZE_64;
   13560             : 
   13561           0 :     int16_t padWidth = (int16_t)BLOCK_SIZE_64 - 1;
   13562           0 :     int16_t padHeight = (int16_t)BLOCK_SIZE_64 - 1;
   13563             :     int16_t search_area_width;
   13564             :     int16_t search_area_height;
   13565             :     int16_t x_search_area_origin;
   13566             :     int16_t y_search_area_origin;
   13567           0 :     int16_t origin_x = (int16_t)sb_origin_x;
   13568           0 :     int16_t origin_y = (int16_t)sb_origin_y;
   13569             : 
   13570             :     // HME
   13571           0 :     uint32_t searchRegionNumberInWidth = 0;
   13572           0 :     uint32_t searchRegionNumberInHeight = 0;
   13573             :     int16_t xHmeLevel0SearchCenter[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
   13574             :                                   [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
   13575             :     int16_t yHmeLevel0SearchCenter[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
   13576             :                                   [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
   13577             :     uint64_t hmeLevel0Sad[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
   13578             :                          [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
   13579             :     int16_t xHmeLevel1SearchCenter[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
   13580             :                                   [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
   13581             :     int16_t yHmeLevel1SearchCenter[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
   13582             :                                   [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
   13583             :     uint64_t hmeLevel1Sad[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
   13584             :                          [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
   13585             :     int16_t xHmeLevel2SearchCenter[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
   13586             :                                   [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
   13587             :     int16_t yHmeLevel2SearchCenter[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
   13588             :                                   [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
   13589             :     uint64_t hmeLevel2Sad[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
   13590             :                          [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
   13591             : 
   13592             :     // Hierarchical ME Search Center
   13593           0 :     int16_t xHmeSearchCenter = 0;
   13594           0 :     int16_t yHmeSearchCenter = 0;
   13595             : 
   13596             :     // Final ME Search Center
   13597           0 :     int16_t x_search_center = 0;
   13598           0 :     int16_t y_search_center = 0;
   13599             : 
   13600             :     // Search Center SADs
   13601           0 :     uint64_t hmeMvSad = 0;
   13602             : 
   13603             :     uint32_t pu_index;
   13604             : 
   13605           0 :     uint32_t max_number_of_pus_per_sb =
   13606           0 :         picture_control_set_ptr->max_number_of_pus_per_sb;
   13607             : 
   13608             :     uint32_t numOfListToSearch;
   13609             :     uint32_t listIndex;
   13610           0 :     uint8_t candidateIndex = 0;
   13611           0 :     uint8_t total_me_candidate_index = 0;
   13612             :     EbPaReferenceObject
   13613             :         *referenceObject;  // input parameter, reference Object Ptr
   13614             : 
   13615             :     uint8_t ref_pic_index;
   13616             :     uint8_t num_of_ref_pic_to_search;
   13617           0 :     uint8_t candidate_index = 0;
   13618           0 :     uint32_t next_candidate_index = 0;
   13619             : 
   13620             :     MePredUnit *me_candidate;
   13621             :     EbPictureBufferDesc *refPicPtr;
   13622             :     EbPictureBufferDesc *quarterRefPicPtr;
   13623             :     EbPictureBufferDesc *sixteenthRefPicPtr;
   13624             : 
   13625           0 :     int16_t tempXHmeSearchCenter = 0;
   13626           0 :     int16_t tempYHmeSearchCenter = 0;
   13627             : 
   13628             :     uint32_t numQuadInWidth;
   13629             :     uint32_t totalMeQuad;
   13630             :     uint32_t quadIndex;
   13631             :     uint32_t nextQuadIndex;
   13632             :     uint64_t tempXHmeSad;
   13633             : 
   13634           0 :     uint64_t ref0Poc = 0;
   13635           0 :     uint64_t ref1Poc = 0;
   13636             : 
   13637             :     uint64_t i;
   13638             : 
   13639             :     int16_t hmeLevel1SearchAreaInWidth;
   13640             :     int16_t hmeLevel1SearchAreaInHeight;
   13641             :     // Configure HME level 0, level 1 and level 2 from static config parameters
   13642           0 :     EbBool enable_hme_level0_flag =
   13643             :         context_ptr->enable_hme_level0_flag;
   13644           0 :     EbBool enable_hme_level1_flag =
   13645             :         context_ptr->enable_hme_level1_flag;
   13646           0 :     EbBool enable_hme_level2_flag =
   13647             :         context_ptr->enable_hme_level2_flag;
   13648             : 
   13649           0 :     EbBool enableHalfPel32x32 = EB_FALSE;
   13650           0 :     EbBool enableHalfPel16x16 = EB_FALSE;
   13651           0 :     EbBool enableHalfPel8x8 = EB_FALSE;
   13652           0 :     EbBool enableQuarterPel = EB_FALSE;
   13653           0 :     EbBool oneQuadrantHME = EB_FALSE;
   13654             : 
   13655           0 :     oneQuadrantHME =
   13656           0 :         sequence_control_set_ptr->input_resolution < INPUT_SIZE_4K_RANGE
   13657             :             ? 0
   13658             :             : oneQuadrantHME;
   13659             : 
   13660           0 :     numOfListToSearch = (picture_control_set_ptr->slice_type == P_SLICE)
   13661             :                             ? (uint32_t)REF_LIST_0
   13662           0 :                             : (uint32_t)REF_LIST_1;
   13663             : 
   13664           0 :     EbBool is_nsq_table_used =
   13665           0 :         (picture_control_set_ptr->pic_depth_mode <= PIC_ALL_C_DEPTH_MODE &&
   13666           0 :          picture_control_set_ptr->nsq_search_level >= NSQ_SEARCH_LEVEL1 &&
   13667           0 :          picture_control_set_ptr->nsq_search_level < NSQ_SEARCH_FULL)
   13668             :             ? EB_TRUE
   13669           0 :             : EB_FALSE;
   13670             : 
   13671           0 :     is_nsq_table_used = picture_control_set_ptr->enc_mode == ENC_M0 ?  EB_FALSE : is_nsq_table_used;
   13672             : 
   13673           0 :     if (context_ptr->me_alt_ref == EB_TRUE)
   13674           0 :         numOfListToSearch = 0;
   13675             : 
   13676             :     // Uni-Prediction motion estimation loop
   13677             :     // List Loop
   13678           0 :     for (listIndex = REF_LIST_0; listIndex <= numOfListToSearch; ++listIndex) {
   13679             : 
   13680           0 :         if (context_ptr->me_alt_ref == EB_TRUE) {
   13681           0 :             num_of_ref_pic_to_search = 1;
   13682             :         } else {
   13683           0 :             num_of_ref_pic_to_search =
   13684           0 :                 (picture_control_set_ptr->slice_type == P_SLICE)
   13685             :                     ? picture_control_set_ptr->ref_list0_count
   13686             :                     : (listIndex == REF_LIST_0)
   13687             :                           ? picture_control_set_ptr->ref_list0_count
   13688             :                           : picture_control_set_ptr->ref_list1_count;
   13689             : 
   13690           0 :             referenceObject = (EbPaReferenceObject *)picture_control_set_ptr
   13691           0 :                                   ->ref_pa_pic_ptr_array[0][0]
   13692             :                                   ->object_ptr;
   13693           0 :             ref0Poc = picture_control_set_ptr->ref_pic_poc_array[0][0];
   13694             :         }
   13695             : 
   13696             :         // Ref Picture Loop
   13697           0 :         for (ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search;
   13698           0 :              ++ref_pic_index)
   13699             :         {
   13700           0 :             if (context_ptr->me_alt_ref == EB_TRUE) {
   13701           0 :                 referenceObject =
   13702             :                     (EbPaReferenceObject *)context_ptr->alt_ref_reference_ptr;
   13703             :             } else {
   13704           0 :                 if (numOfListToSearch) {
   13705           0 :                     referenceObject =
   13706             :                         (EbPaReferenceObject *)picture_control_set_ptr
   13707           0 :                             ->ref_pa_pic_ptr_array[1][0]
   13708             :                             ->object_ptr;
   13709           0 :                     ref1Poc = picture_control_set_ptr->ref_pic_poc_array[1][0];
   13710             :                 }
   13711             : 
   13712           0 :                 referenceObject =
   13713             :                     (EbPaReferenceObject *)picture_control_set_ptr
   13714           0 :                         ->ref_pa_pic_ptr_array[listIndex][ref_pic_index]
   13715             :                         ->object_ptr;
   13716             :             }
   13717             : 
   13718           0 :             refPicPtr = (EbPictureBufferDesc*)referenceObject->input_padded_picture_ptr;
   13719             :             // Set 1/4 and 1/16 ME reference buffer(s); filtered or decimated
   13720           0 :             quarterRefPicPtr = (sequence_control_set_ptr->down_sampling_method_me_search == ME_FILTERED_DOWNSAMPLED) ?
   13721           0 :                 (EbPictureBufferDesc*)referenceObject->quarter_filtered_picture_ptr :
   13722             :                 (EbPictureBufferDesc*)referenceObject->quarter_decimated_picture_ptr;
   13723             : 
   13724           0 :             sixteenthRefPicPtr = (sequence_control_set_ptr->down_sampling_method_me_search == ME_FILTERED_DOWNSAMPLED) ?
   13725           0 :                 (EbPictureBufferDesc*)referenceObject->sixteenth_filtered_picture_ptr:
   13726             :                 (EbPictureBufferDesc*)referenceObject->sixteenth_decimated_picture_ptr;
   13727           0 :             if (picture_control_set_ptr->temporal_layer_index > 0 ||
   13728             :                 listIndex == 0) {
   13729             :                 // A - The MV center for Tier0 search could be either (0,0), or
   13730             :                 // HME A - Set HME MV Center
   13731           0 :                 if (context_ptr->update_hme_search_center_flag)
   13732           0 :                     hme_mv_center_check(refPicPtr,
   13733             :                                         context_ptr,
   13734             :                                         &x_search_center,
   13735             :                                         &y_search_center,
   13736             :                                         listIndex,
   13737             :                                         origin_x,
   13738             :                                         origin_y,
   13739             :                                         sb_width,
   13740             :                                         sb_height);
   13741             :                 else {
   13742           0 :                     x_search_center = 0;
   13743           0 :                     y_search_center = 0;
   13744             :                 }
   13745             :                 // B - NO HME in boundaries
   13746             :                 // C - Skip HME
   13747             : 
   13748           0 :                 if (context_ptr->enable_hme_flag &&
   13749             : 
   13750             :                     /*B*/ sb_height ==
   13751             :                         BLOCK_SIZE_64) {  //(searchCenterSad >
   13752             :                                           // sequence_control_set_ptr->static_config.skipTier0HmeTh))
   13753             :                                           //{
   13754           0 :                     while (searchRegionNumberInHeight <
   13755           0 :                            context_ptr->number_hme_search_region_in_height) {
   13756           0 :                         while (searchRegionNumberInWidth <
   13757           0 :                                context_ptr->number_hme_search_region_in_width) {
   13758             :                             xHmeLevel0SearchCenter[searchRegionNumberInWidth]
   13759           0 :                                                   [searchRegionNumberInHeight] =
   13760             :                                                       x_search_center;
   13761             :                             yHmeLevel0SearchCenter[searchRegionNumberInWidth]
   13762           0 :                                                   [searchRegionNumberInHeight] =
   13763             :                                                       y_search_center;
   13764             : 
   13765             :                             xHmeLevel1SearchCenter[searchRegionNumberInWidth]
   13766           0 :                                                   [searchRegionNumberInHeight] =
   13767             :                                                       x_search_center;
   13768             :                             yHmeLevel1SearchCenter[searchRegionNumberInWidth]
   13769           0 :                                                   [searchRegionNumberInHeight] =
   13770             :                                                       y_search_center;
   13771             : 
   13772             :                             xHmeLevel2SearchCenter[searchRegionNumberInWidth]
   13773           0 :                                                   [searchRegionNumberInHeight] =
   13774             :                                                       x_search_center;
   13775             :                             yHmeLevel2SearchCenter[searchRegionNumberInWidth]
   13776           0 :                                                   [searchRegionNumberInHeight] =
   13777             :                                                       y_search_center;
   13778             : 
   13779           0 :                             searchRegionNumberInWidth++;
   13780             :                         }
   13781           0 :                         searchRegionNumberInWidth = 0;
   13782           0 :                         searchRegionNumberInHeight++;
   13783             :                     }
   13784             : 
   13785             :                     // HME: Level0 search
   13786             : 
   13787           0 :                     if (enable_hme_level0_flag) {
   13788           0 :                         if (oneQuadrantHME && !enable_hme_level1_flag &&
   13789             :                             !enable_hme_level2_flag) {
   13790           0 :                             searchRegionNumberInHeight = 0;
   13791           0 :                             searchRegionNumberInWidth = 0;
   13792             : 
   13793           0 :                             HmeOneQuadrantLevel0(
   13794             :                                 picture_control_set_ptr,
   13795             :                                 context_ptr,
   13796             :                                 origin_x >> 2,
   13797             :                                 origin_y >> 2,
   13798             :                                 sb_width >> 2,
   13799             :                                 sb_height >> 2,
   13800             :                                 x_search_center >> 2,
   13801             :                                 y_search_center >> 2,
   13802             :                                 sixteenthRefPicPtr,
   13803             :                                 &(hmeLevel0Sad[searchRegionNumberInWidth]
   13804             :                                               [searchRegionNumberInHeight]),
   13805             :                                 &(xHmeLevel0SearchCenter
   13806             :                                       [searchRegionNumberInWidth]
   13807             :                                       [searchRegionNumberInHeight]),
   13808             :                                 &(yHmeLevel0SearchCenter
   13809             :                                       [searchRegionNumberInWidth]
   13810             :                                       [searchRegionNumberInHeight]),
   13811             :                                 hme_level_0_search_area_multiplier_x
   13812             :                                     [picture_control_set_ptr
   13813           0 :                                          ->hierarchical_levels]
   13814           0 :                                     [picture_control_set_ptr
   13815           0 :                                          ->temporal_layer_index],
   13816             :                                 hme_level_0_search_area_multiplier_y
   13817             :                                     [picture_control_set_ptr
   13818           0 :                                          ->hierarchical_levels]
   13819           0 :                                     [picture_control_set_ptr
   13820           0 :                                          ->temporal_layer_index]);
   13821             :                         } else {
   13822           0 :                             searchRegionNumberInHeight = 0;
   13823           0 :                             searchRegionNumberInWidth = 0;
   13824             :                             {
   13825           0 :                                 while (
   13826             :                                     searchRegionNumberInHeight <
   13827             :                                     context_ptr
   13828           0 :                                         ->number_hme_search_region_in_height) {
   13829           0 :                                     while (
   13830             :                                         searchRegionNumberInWidth <
   13831             :                                         context_ptr
   13832           0 :                                             ->number_hme_search_region_in_width) {
   13833           0 :                                         HmeLevel0(
   13834             :                                             picture_control_set_ptr,
   13835             :                                             context_ptr,
   13836             :                                             origin_x >> 2,
   13837             :                                             origin_y >> 2,
   13838             :                                             sb_width >> 2,
   13839             :                                             sb_height >> 2,
   13840             :                                             x_search_center >> 2,
   13841             :                                             y_search_center >> 2,
   13842             :                                             sixteenthRefPicPtr,
   13843             :                                             searchRegionNumberInWidth,
   13844             :                                             searchRegionNumberInHeight,
   13845             :                                             &(hmeLevel0Sad
   13846             :                                                   [searchRegionNumberInWidth]
   13847             :                                                   [searchRegionNumberInHeight]),
   13848             :                                             &(xHmeLevel0SearchCenter
   13849             :                                                   [searchRegionNumberInWidth]
   13850             :                                                   [searchRegionNumberInHeight]),
   13851             :                                             &(yHmeLevel0SearchCenter
   13852             :                                                   [searchRegionNumberInWidth]
   13853             :                                                   [searchRegionNumberInHeight]),
   13854             :                                             hme_level_0_search_area_multiplier_x
   13855             :                                                 [picture_control_set_ptr
   13856           0 :                                                      ->hierarchical_levels]
   13857           0 :                                                 [picture_control_set_ptr
   13858           0 :                                                      ->temporal_layer_index],
   13859             :                                             hme_level_0_search_area_multiplier_y
   13860             :                                                 [picture_control_set_ptr
   13861           0 :                                                      ->hierarchical_levels]
   13862           0 :                                                 [picture_control_set_ptr
   13863           0 :                                                      ->temporal_layer_index]);
   13864             : 
   13865           0 :                                         searchRegionNumberInWidth++;
   13866             :                                     }
   13867           0 :                                     searchRegionNumberInWidth = 0;
   13868           0 :                                     searchRegionNumberInHeight++;
   13869             :                                 }
   13870             :                             }
   13871             :                         }
   13872             :                     }
   13873             : 
   13874             :                     // HME: Level1 search
   13875           0 :                     if (enable_hme_level1_flag) {
   13876           0 :                         searchRegionNumberInHeight = 0;
   13877           0 :                         searchRegionNumberInWidth = 0;
   13878             : 
   13879             :                         {
   13880           0 :                             while (searchRegionNumberInHeight <
   13881             :                                    context_ptr
   13882           0 :                                        ->number_hme_search_region_in_height) {
   13883           0 :                                 while (
   13884             :                                     searchRegionNumberInWidth <
   13885             :                                     context_ptr
   13886           0 :                                         ->number_hme_search_region_in_width) {
   13887             :                                     // When HME level 0 has been disabled,
   13888             :                                     // increase the search area width and height
   13889             :                                     // for level 1 to (32x12) for Gold only
   13890             : 
   13891           0 :                                     hmeLevel1SearchAreaInWidth =
   13892             :                                         (int16_t)context_ptr
   13893             :                                             ->hme_level1_search_area_in_width_array
   13894           0 :                                                 [searchRegionNumberInWidth];
   13895           0 :                                     hmeLevel1SearchAreaInHeight =
   13896             :                                         (int16_t)context_ptr
   13897             :                                             ->hme_level1_search_area_in_height_array
   13898           0 :                                                 [searchRegionNumberInHeight];
   13899             : 
   13900           0 :                                     HmeLevel1(
   13901             :                                         context_ptr,
   13902             :                                         origin_x >> 1,
   13903             :                                         origin_y >> 1,
   13904             :                                         sb_width >> 1,
   13905             :                                         sb_height >> 1,
   13906             :                                         quarterRefPicPtr,
   13907             :                                         hmeLevel1SearchAreaInWidth,
   13908             :                                         hmeLevel1SearchAreaInHeight,
   13909             :                                         xHmeLevel0SearchCenter
   13910             :                                                 [searchRegionNumberInWidth]
   13911           0 :                                                 [searchRegionNumberInHeight] >>
   13912             :                                             1,
   13913             :                                         yHmeLevel0SearchCenter
   13914             :                                                 [searchRegionNumberInWidth]
   13915           0 :                                                 [searchRegionNumberInHeight] >>
   13916             :                                             1,
   13917             :                                         &(hmeLevel1Sad
   13918             :                                               [searchRegionNumberInWidth]
   13919             :                                               [searchRegionNumberInHeight]),
   13920             :                                         &(xHmeLevel1SearchCenter
   13921             :                                               [searchRegionNumberInWidth]
   13922             :                                               [searchRegionNumberInHeight]),
   13923             :                                         &(yHmeLevel1SearchCenter
   13924             :                                               [searchRegionNumberInWidth]
   13925             :                                               [searchRegionNumberInHeight]));
   13926             : 
   13927           0 :                                     searchRegionNumberInWidth++;
   13928             :                                 }
   13929           0 :                                 searchRegionNumberInWidth = 0;
   13930           0 :                                 searchRegionNumberInHeight++;
   13931             :                             }
   13932             :                         }
   13933             :                     }
   13934             : 
   13935             :                     // HME: Level2 search
   13936           0 :                     if (enable_hme_level2_flag) {
   13937           0 :                         searchRegionNumberInHeight = 0;
   13938           0 :                         searchRegionNumberInWidth = 0;
   13939             : 
   13940             :                         {
   13941           0 :                             while (searchRegionNumberInHeight <
   13942             :                                    context_ptr
   13943           0 :                                        ->number_hme_search_region_in_height) {
   13944           0 :                                 while (
   13945             :                                     searchRegionNumberInWidth <
   13946             :                                     context_ptr
   13947           0 :                                         ->number_hme_search_region_in_width) {
   13948           0 :                                     HmeLevel2(
   13949             :                                         picture_control_set_ptr,
   13950             :                                         context_ptr,
   13951             :                                         origin_x,
   13952             :                                         origin_y,
   13953             :                                         sb_width,
   13954             :                                         sb_height,
   13955             :                                         refPicPtr,
   13956             :                                         searchRegionNumberInWidth,
   13957             :                                         searchRegionNumberInHeight,
   13958             :                                         xHmeLevel1SearchCenter
   13959             :                                             [searchRegionNumberInWidth]
   13960           0 :                                             [searchRegionNumberInHeight],
   13961             :                                         yHmeLevel1SearchCenter
   13962             :                                             [searchRegionNumberInWidth]
   13963           0 :                                             [searchRegionNumberInHeight],
   13964             :                                         &(hmeLevel2Sad
   13965             :                                               [searchRegionNumberInWidth]
   13966             :                                               [searchRegionNumberInHeight]),
   13967             :                                         &(xHmeLevel2SearchCenter
   13968             :                                               [searchRegionNumberInWidth]
   13969             :                                               [searchRegionNumberInHeight]),
   13970             :                                         &(yHmeLevel2SearchCenter
   13971             :                                               [searchRegionNumberInWidth]
   13972             :                                               [searchRegionNumberInHeight]));
   13973             : 
   13974           0 :                                     searchRegionNumberInWidth++;
   13975             :                                 }
   13976           0 :                                 searchRegionNumberInWidth = 0;
   13977           0 :                                 searchRegionNumberInHeight++;
   13978             :                             }
   13979             :                         }
   13980             :                     }
   13981             : 
   13982             :                     // Hierarchical ME - Search Center
   13983           0 :                     if (enable_hme_level0_flag && !enable_hme_level1_flag &&
   13984             :                         !enable_hme_level2_flag) {
   13985           0 :                         if (oneQuadrantHME) {
   13986           0 :                             xHmeSearchCenter = xHmeLevel0SearchCenter[0][0];
   13987           0 :                             yHmeSearchCenter = yHmeLevel0SearchCenter[0][0];
   13988           0 :                             hmeMvSad = hmeLevel0Sad[0][0];
   13989             :                         } else {
   13990           0 :                             xHmeSearchCenter = xHmeLevel0SearchCenter[0][0];
   13991           0 :                             yHmeSearchCenter = yHmeLevel0SearchCenter[0][0];
   13992           0 :                             hmeMvSad = hmeLevel0Sad[0][0];
   13993             : 
   13994           0 :                             searchRegionNumberInWidth = 1;
   13995           0 :                             searchRegionNumberInHeight = 0;
   13996             : 
   13997           0 :                             while (searchRegionNumberInHeight <
   13998             :                                    context_ptr
   13999           0 :                                        ->number_hme_search_region_in_height) {
   14000           0 :                                 while (
   14001             :                                     searchRegionNumberInWidth <
   14002             :                                     context_ptr
   14003           0 :                                         ->number_hme_search_region_in_width) {
   14004           0 :                                     xHmeSearchCenter =
   14005             :                                         (hmeLevel0Sad
   14006             :                                              [searchRegionNumberInWidth]
   14007           0 :                                              [searchRegionNumberInHeight] <
   14008             :                                          hmeMvSad)
   14009             :                                             ? xHmeLevel0SearchCenter
   14010             :                                                   [searchRegionNumberInWidth]
   14011             :                                                   [searchRegionNumberInHeight]
   14012             :                                             : xHmeSearchCenter;
   14013           0 :                                     yHmeSearchCenter =
   14014             :                                         (hmeLevel0Sad
   14015             :                                              [searchRegionNumberInWidth]
   14016           0 :                                              [searchRegionNumberInHeight] <
   14017             :                                          hmeMvSad)
   14018             :                                             ? yHmeLevel0SearchCenter
   14019             :                                                   [searchRegionNumberInWidth]
   14020             :                                                   [searchRegionNumberInHeight]
   14021             :                                             : yHmeSearchCenter;
   14022           0 :                                     hmeMvSad =
   14023             :                                         (hmeLevel0Sad
   14024             :                                              [searchRegionNumberInWidth]
   14025           0 :                                              [searchRegionNumberInHeight] <
   14026             :                                          hmeMvSad)
   14027             :                                             ? hmeLevel0Sad
   14028             :                                                   [searchRegionNumberInWidth]
   14029             :                                                   [searchRegionNumberInHeight]
   14030             :                                             : hmeMvSad;
   14031           0 :                                     searchRegionNumberInWidth++;
   14032             :                                 }
   14033           0 :                                 searchRegionNumberInWidth = 0;
   14034           0 :                                 searchRegionNumberInHeight++;
   14035             :                             }
   14036             :                         }
   14037             :                     }
   14038             : 
   14039           0 :                     if (enable_hme_level1_flag && !enable_hme_level2_flag) {
   14040           0 :                         xHmeSearchCenter = xHmeLevel1SearchCenter[0][0];
   14041           0 :                         yHmeSearchCenter = yHmeLevel1SearchCenter[0][0];
   14042           0 :                         hmeMvSad = hmeLevel1Sad[0][0];
   14043             : 
   14044           0 :                         searchRegionNumberInWidth = 1;
   14045           0 :                         searchRegionNumberInHeight = 0;
   14046             : 
   14047           0 :                         while (
   14048             :                             searchRegionNumberInHeight <
   14049           0 :                             context_ptr->number_hme_search_region_in_height) {
   14050           0 :                             while (searchRegionNumberInWidth <
   14051             :                                    context_ptr
   14052           0 :                                        ->number_hme_search_region_in_width) {
   14053           0 :                                 xHmeSearchCenter =
   14054             :                                     (hmeLevel1Sad[searchRegionNumberInWidth]
   14055           0 :                                                  [searchRegionNumberInHeight] <
   14056             :                                      hmeMvSad)
   14057             :                                         ? xHmeLevel1SearchCenter
   14058             :                                               [searchRegionNumberInWidth]
   14059             :                                               [searchRegionNumberInHeight]
   14060             :                                         : xHmeSearchCenter;
   14061           0 :                                 yHmeSearchCenter =
   14062             :                                     (hmeLevel1Sad[searchRegionNumberInWidth]
   14063           0 :                                                  [searchRegionNumberInHeight] <
   14064             :                                      hmeMvSad)
   14065             :                                         ? yHmeLevel1SearchCenter
   14066             :                                               [searchRegionNumberInWidth]
   14067             :                                               [searchRegionNumberInHeight]
   14068             :                                         : yHmeSearchCenter;
   14069           0 :                                 hmeMvSad =
   14070             :                                     (hmeLevel1Sad[searchRegionNumberInWidth]
   14071           0 :                                                  [searchRegionNumberInHeight] <
   14072             :                                      hmeMvSad)
   14073             :                                         ? hmeLevel1Sad
   14074             :                                               [searchRegionNumberInWidth]
   14075             :                                               [searchRegionNumberInHeight]
   14076             :                                         : hmeMvSad;
   14077           0 :                                 searchRegionNumberInWidth++;
   14078             :                             }
   14079           0 :                             searchRegionNumberInWidth = 0;
   14080           0 :                             searchRegionNumberInHeight++;
   14081             :                         }
   14082             :                     }
   14083             : 
   14084           0 :                     if (enable_hme_level2_flag) {
   14085           0 :                         xHmeSearchCenter = xHmeLevel2SearchCenter[0][0];
   14086           0 :                         yHmeSearchCenter = yHmeLevel2SearchCenter[0][0];
   14087           0 :                         hmeMvSad = hmeLevel2Sad[0][0];
   14088             : 
   14089           0 :                         searchRegionNumberInWidth = 1;
   14090           0 :                         searchRegionNumberInHeight = 0;
   14091             : 
   14092           0 :                         while (
   14093             :                             searchRegionNumberInHeight <
   14094           0 :                             context_ptr->number_hme_search_region_in_height) {
   14095           0 :                             while (searchRegionNumberInWidth <
   14096             :                                    context_ptr
   14097           0 :                                        ->number_hme_search_region_in_width) {
   14098           0 :                                 xHmeSearchCenter =
   14099             :                                     (hmeLevel2Sad[searchRegionNumberInWidth]
   14100           0 :                                                  [searchRegionNumberInHeight] <
   14101             :                                      hmeMvSad)
   14102             :                                         ? xHmeLevel2SearchCenter
   14103             :                                               [searchRegionNumberInWidth]
   14104             :                                               [searchRegionNumberInHeight]
   14105             :                                         : xHmeSearchCenter;
   14106           0 :                                 yHmeSearchCenter =
   14107             :                                     (hmeLevel2Sad[searchRegionNumberInWidth]
   14108           0 :                                                  [searchRegionNumberInHeight] <
   14109             :                                      hmeMvSad)
   14110             :                                         ? yHmeLevel2SearchCenter
   14111             :                                               [searchRegionNumberInWidth]
   14112             :                                               [searchRegionNumberInHeight]
   14113             :                                         : yHmeSearchCenter;
   14114           0 :                                 hmeMvSad =
   14115             :                                     (hmeLevel2Sad[searchRegionNumberInWidth]
   14116           0 :                                                  [searchRegionNumberInHeight] <
   14117             :                                      hmeMvSad)
   14118             :                                         ? hmeLevel2Sad
   14119             :                                               [searchRegionNumberInWidth]
   14120             :                                               [searchRegionNumberInHeight]
   14121             :                                         : hmeMvSad;
   14122           0 :                                 searchRegionNumberInWidth++;
   14123             :                             }
   14124           0 :                             searchRegionNumberInWidth = 0;
   14125           0 :                             searchRegionNumberInHeight++;
   14126             :                         }
   14127             : 
   14128           0 :                         numQuadInWidth =
   14129           0 :                             context_ptr->number_hme_search_region_in_width;
   14130           0 :                         totalMeQuad =
   14131           0 :                             context_ptr->number_hme_search_region_in_height *
   14132           0 :                             context_ptr->number_hme_search_region_in_width;
   14133             : 
   14134           0 :                         if ((ref0Poc == ref1Poc) && (listIndex == 1) &&
   14135             :                             (totalMeQuad > 1)) {
   14136           0 :                             for (quadIndex = 0; quadIndex < totalMeQuad - 1;
   14137           0 :                                  ++quadIndex) {
   14138           0 :                                 for (nextQuadIndex = quadIndex + 1;
   14139             :                                      nextQuadIndex < totalMeQuad;
   14140           0 :                                      ++nextQuadIndex) {
   14141           0 :                                     if (hmeLevel2Sad[quadIndex / numQuadInWidth]
   14142           0 :                                                     [quadIndex %
   14143             :                                                      numQuadInWidth] >
   14144           0 :                                         hmeLevel2Sad[nextQuadIndex /
   14145             :                                                      numQuadInWidth]
   14146           0 :                                                     [nextQuadIndex %
   14147             :                                                      numQuadInWidth]) {
   14148           0 :                                         tempXHmeSearchCenter =
   14149             :                                             xHmeLevel2SearchCenter
   14150           0 :                                                 [quadIndex / numQuadInWidth]
   14151           0 :                                                 [quadIndex % numQuadInWidth];
   14152           0 :                                         tempYHmeSearchCenter =
   14153             :                                             yHmeLevel2SearchCenter
   14154           0 :                                                 [quadIndex / numQuadInWidth]
   14155           0 :                                                 [quadIndex % numQuadInWidth];
   14156           0 :                                         tempXHmeSad =
   14157           0 :                                             hmeLevel2Sad[quadIndex /
   14158             :                                                          numQuadInWidth]
   14159           0 :                                                         [quadIndex %
   14160             :                                                          numQuadInWidth];
   14161             : 
   14162             :                                         xHmeLevel2SearchCenter
   14163           0 :                                             [quadIndex / numQuadInWidth]
   14164           0 :                                             [quadIndex % numQuadInWidth] =
   14165             :                                                 xHmeLevel2SearchCenter
   14166           0 :                                                     [nextQuadIndex /
   14167             :                                                      numQuadInWidth]
   14168           0 :                                                     [nextQuadIndex %
   14169             :                                                      numQuadInWidth];
   14170             :                                         yHmeLevel2SearchCenter
   14171           0 :                                             [quadIndex / numQuadInWidth]
   14172           0 :                                             [quadIndex % numQuadInWidth] =
   14173             :                                                 yHmeLevel2SearchCenter
   14174           0 :                                                     [nextQuadIndex /
   14175             :                                                      numQuadInWidth]
   14176           0 :                                                     [nextQuadIndex %
   14177             :                                                      numQuadInWidth];
   14178             :                                         hmeLevel2Sad
   14179           0 :                                             [quadIndex / numQuadInWidth]
   14180           0 :                                             [quadIndex % numQuadInWidth] =
   14181           0 :                                                 hmeLevel2Sad[nextQuadIndex /
   14182             :                                                              numQuadInWidth]
   14183           0 :                                                             [nextQuadIndex %
   14184             :                                                              numQuadInWidth];
   14185             : 
   14186             :                                         xHmeLevel2SearchCenter
   14187           0 :                                             [nextQuadIndex / numQuadInWidth]
   14188           0 :                                             [nextQuadIndex % numQuadInWidth] =
   14189             :                                                 tempXHmeSearchCenter;
   14190             :                                         yHmeLevel2SearchCenter
   14191           0 :                                             [nextQuadIndex / numQuadInWidth]
   14192           0 :                                             [nextQuadIndex % numQuadInWidth] =
   14193             :                                                 tempYHmeSearchCenter;
   14194           0 :                                         hmeLevel2Sad[nextQuadIndex /
   14195             :                                                      numQuadInWidth]
   14196           0 :                                                     [nextQuadIndex %
   14197           0 :                                                      numQuadInWidth] =
   14198             :                                                         tempXHmeSad;
   14199             :                                     }
   14200             :                                 }
   14201             :                             }
   14202             : 
   14203           0 :                             xHmeSearchCenter = xHmeLevel2SearchCenter[0][1];
   14204           0 :                             yHmeSearchCenter = yHmeLevel2SearchCenter[0][1];
   14205             :                         }
   14206             :                     }
   14207             : 
   14208           0 :                     x_search_center = xHmeSearchCenter;
   14209           0 :                     y_search_center = yHmeSearchCenter;
   14210             :                 }
   14211             :             }
   14212             : 
   14213             :             else {
   14214           0 :                 x_search_center = 0;
   14215           0 :                 y_search_center = 0;
   14216             :             }
   14217             :             // Constrain x_ME to be a multiple of 8 (round up)
   14218           0 :             search_area_width = (context_ptr->search_area_width + 7) & ~0x07;
   14219           0 :             search_area_height = context_ptr->search_area_height;
   14220           0 :             if ((x_search_center != 0 || y_search_center != 0) &&
   14221           0 :                 (picture_control_set_ptr->is_used_as_reference_flag ==
   14222             :                  EB_TRUE)) {
   14223           0 :                 CheckZeroZeroCenter(refPicPtr,
   14224             :                                     context_ptr,
   14225             :                                     sb_origin_x,
   14226             :                                     sb_origin_y,
   14227             :                                     sb_width,
   14228             :                                     sb_height,
   14229             :                                     &x_search_center,
   14230             :                                     &y_search_center);
   14231             :             }
   14232           0 :             x_search_area_origin = x_search_center - (search_area_width >> 1);
   14233           0 :             y_search_area_origin = y_search_center - (search_area_height >> 1);
   14234             : 
   14235           0 :             if(sequence_control_set_ptr->static_config.unrestricted_motion_vector == 0)
   14236             :             {
   14237           0 :                 int tile_start_x = sequence_control_set_ptr->sb_params_array[sb_index].tile_start_x;
   14238           0 :                 int tile_end_x   = sequence_control_set_ptr->sb_params_array[sb_index].tile_end_x;
   14239             : 
   14240             :                 // Correct the left edge of the Search Area if it is not on the
   14241             :                 // reference Picture
   14242           0 :                 x_search_area_origin =
   14243           0 :                     ((origin_x + x_search_area_origin) < tile_start_x)
   14244           0 :                         ? tile_start_x - origin_x
   14245             :                         : x_search_area_origin;
   14246             : 
   14247           0 :                 search_area_width =
   14248           0 :                     ((origin_x + x_search_area_origin) < tile_start_x)
   14249           0 :                         ? search_area_width - (tile_start_x - (origin_x + x_search_area_origin))
   14250             :                         : search_area_width;
   14251             : 
   14252             :                 // Correct the right edge of the Search Area if its not on the
   14253             :                 // reference Picture
   14254           0 :                 x_search_area_origin =
   14255           0 :                     ((origin_x + x_search_area_origin) > tile_end_x - 1)
   14256           0 :                         ? x_search_area_origin - ((origin_x + x_search_area_origin) - (tile_end_x - 1))
   14257             :                         : x_search_area_origin;
   14258             : 
   14259           0 :                 search_area_width =
   14260           0 :                     ((origin_x + x_search_area_origin + search_area_width) > tile_end_x)
   14261           0 :                         ? MAX(1, search_area_width - ((origin_x + x_search_area_origin + search_area_width) - tile_end_x))
   14262             :                         : search_area_width;
   14263             : 
   14264             :                 // Constrain x_ME to be a multiple of 8 (round down as cropping
   14265             :                 // already performed)
   14266           0 :                 search_area_width = (search_area_width < 8)
   14267             :                                         ? search_area_width
   14268             :                                         : search_area_width & ~0x07;
   14269             :             } else {
   14270             :             // Correct the left edge of the Search Area if it is not on the
   14271             :             // reference Picture
   14272           0 :             x_search_area_origin =
   14273           0 :                 ((origin_x + x_search_area_origin) < -padWidth)
   14274           0 :                     ? -padWidth - origin_x
   14275             :                     : x_search_area_origin;
   14276             : 
   14277           0 :             search_area_width =
   14278           0 :                 ((origin_x + x_search_area_origin) < -padWidth)
   14279           0 :                     ? search_area_width -
   14280           0 :                           (-padWidth - (origin_x + x_search_area_origin))
   14281             :                     : search_area_width;
   14282             : 
   14283             :             // Correct the right edge of the Search Area if its not on the
   14284             :             // reference Picture
   14285           0 :             x_search_area_origin =
   14286           0 :                 ((origin_x + x_search_area_origin) > picture_width - 1)
   14287           0 :                     ? x_search_area_origin -
   14288           0 :                           ((origin_x + x_search_area_origin) -
   14289             :                            (picture_width - 1))
   14290             :                     : x_search_area_origin;
   14291             : 
   14292           0 :             search_area_width =
   14293           0 :                 ((origin_x + x_search_area_origin + search_area_width) >
   14294             :                  picture_width)
   14295           0 :                     ? MAX(1,
   14296             :                           search_area_width -
   14297             :                               ((origin_x + x_search_area_origin +
   14298             :                                 search_area_width) -
   14299             :                                picture_width))
   14300             :                     : search_area_width;
   14301             : 
   14302             :             // Constrain x_ME to be a multiple of 8 (round down as cropping
   14303             :             // already performed)
   14304           0 :             search_area_width = (search_area_width < 8)
   14305             :                                     ? search_area_width
   14306             :                                     : search_area_width & ~0x07;
   14307             :             }
   14308             : 
   14309           0 :             if(sequence_control_set_ptr->static_config.unrestricted_motion_vector == 0)
   14310             :             {
   14311           0 :                 int tile_start_y = sequence_control_set_ptr->sb_params_array[sb_index].tile_start_y;
   14312           0 :                 int tile_end_y   = sequence_control_set_ptr->sb_params_array[sb_index].tile_end_y;
   14313             : 
   14314             :                 // Correct the top edge of the Search Area if it is not on the
   14315             :                 // reference Picture
   14316           0 :                 y_search_area_origin =
   14317           0 :                     ((origin_y + y_search_area_origin) < tile_start_y)
   14318           0 :                         ? tile_start_y - origin_y
   14319             :                         : y_search_area_origin;
   14320             : 
   14321           0 :                 search_area_height =
   14322           0 :                     ((origin_y + y_search_area_origin) < tile_start_y)
   14323           0 :                         ? search_area_height - (tile_start_y - (origin_y + y_search_area_origin))
   14324             :                         : search_area_height;
   14325             : 
   14326             :                 // Correct the bottom edge of the Search Area if its not on the
   14327             :                 // reference Picture
   14328           0 :                 y_search_area_origin =
   14329           0 :                     ((origin_y + y_search_area_origin) > tile_end_y - 1)
   14330           0 :                         ? y_search_area_origin - ((origin_y + y_search_area_origin) - (tile_end_y - 1))
   14331             :                         : y_search_area_origin;
   14332             : 
   14333           0 :                 search_area_height =
   14334           0 :                     (origin_y + y_search_area_origin + search_area_height > tile_end_y)
   14335           0 :                         ? MAX(1, search_area_height - ((origin_y + y_search_area_origin + search_area_height) - tile_end_y))
   14336             :                         : search_area_height;
   14337             :             } else {
   14338             :             // Correct the top edge of the Search Area if it is not on the
   14339             :             // reference Picture
   14340           0 :             y_search_area_origin =
   14341           0 :                 ((origin_y + y_search_area_origin) < -padHeight)
   14342           0 :                     ? -padHeight - origin_y
   14343             :                     : y_search_area_origin;
   14344             : 
   14345           0 :             search_area_height =
   14346           0 :                 ((origin_y + y_search_area_origin) < -padHeight)
   14347           0 :                     ? search_area_height -
   14348           0 :                           (-padHeight - (origin_y + y_search_area_origin))
   14349             :                     : search_area_height;
   14350             : 
   14351             :             // Correct the bottom edge of the Search Area if its not on the
   14352             :             // reference Picture
   14353           0 :             y_search_area_origin =
   14354           0 :                 ((origin_y + y_search_area_origin) > picture_height - 1)
   14355           0 :                     ? y_search_area_origin -
   14356           0 :                           ((origin_y + y_search_area_origin) -
   14357             :                            (picture_height - 1))
   14358             :                     : y_search_area_origin;
   14359             : 
   14360           0 :             search_area_height =
   14361           0 :                 (origin_y + y_search_area_origin + search_area_height >
   14362             :                  picture_height)
   14363           0 :                     ? MAX(1,
   14364             :                           search_area_height -
   14365             :                               ((origin_y + y_search_area_origin +
   14366             :                                 search_area_height) -
   14367             :                                picture_height))
   14368             :                     : search_area_height;
   14369             :             }
   14370           0 :             context_ptr->x_search_area_origin[listIndex][ref_pic_index] =
   14371             :                 x_search_area_origin;
   14372           0 :             context_ptr->y_search_area_origin[listIndex][ref_pic_index] =
   14373             :                 y_search_area_origin;
   14374             : 
   14375           0 :             context_ptr->adj_search_area_width = search_area_width;
   14376           0 :             context_ptr->adj_search_area_height = search_area_height;
   14377             : 
   14378           0 :             xTopLeftSearchRegion =
   14379           0 :                 (int16_t)(refPicPtr->origin_x + sb_origin_x) -
   14380           0 :                 (ME_FILTER_TAP >> 1) + x_search_area_origin;
   14381           0 :             yTopLeftSearchRegion =
   14382           0 :                 (int16_t)(refPicPtr->origin_y + sb_origin_y) -
   14383           0 :                 (ME_FILTER_TAP >> 1) + y_search_area_origin;
   14384           0 :             searchRegionIndex = (xTopLeftSearchRegion) +
   14385           0 :                                 (yTopLeftSearchRegion)*refPicPtr->stride_y;
   14386           0 :             context_ptr->integer_buffer_ptr[listIndex][ref_pic_index] =
   14387           0 :                 &(refPicPtr->buffer_y[searchRegionIndex]);
   14388           0 :             context_ptr->interpolated_full_stride[listIndex][ref_pic_index] =
   14389           0 :                 refPicPtr->stride_y;
   14390             : 
   14391             :             // Move to the top left of the search region
   14392           0 :             xTopLeftSearchRegion =
   14393           0 :                 (int16_t)(refPicPtr->origin_x + sb_origin_x) +
   14394             :                 x_search_area_origin;
   14395           0 :             yTopLeftSearchRegion =
   14396           0 :                 (int16_t)(refPicPtr->origin_y + sb_origin_y) +
   14397             :                 y_search_area_origin;
   14398           0 :             searchRegionIndex = xTopLeftSearchRegion +
   14399           0 :                                 yTopLeftSearchRegion * refPicPtr->stride_y;
   14400             : 
   14401             :             {
   14402             :                 {
   14403           0 :                     if (picture_control_set_ptr->pic_depth_mode <=
   14404             :                         PIC_ALL_C_DEPTH_MODE) {
   14405           0 :                         initialize_buffer_32bits(
   14406             :                             context_ptr
   14407           0 :                                 ->p_sb_best_sad[listIndex][ref_pic_index],
   14408             :                             52,
   14409             :                             1,
   14410             :                             MAX_SAD_VALUE);
   14411             : 
   14412           0 :                         context_ptr->p_best_sad64x64 = &(
   14413             :                             context_ptr->p_sb_best_sad[listIndex][ref_pic_index]
   14414           0 :                                                       [ME_TIER_ZERO_PU_64x64]);
   14415           0 :                         context_ptr->p_best_sad32x32 =
   14416           0 :                             &(context_ptr
   14417             :                                   ->p_sb_best_sad[listIndex][ref_pic_index]
   14418           0 :                                                  [ME_TIER_ZERO_PU_32x32_0]);
   14419           0 :                         context_ptr->p_best_sad16x16 =
   14420           0 :                             &(context_ptr
   14421             :                                   ->p_sb_best_sad[listIndex][ref_pic_index]
   14422           0 :                                                  [ME_TIER_ZERO_PU_16x16_0]);
   14423           0 :                         context_ptr->p_best_sad8x8 = &(
   14424             :                             context_ptr->p_sb_best_sad[listIndex][ref_pic_index]
   14425           0 :                                                       [ME_TIER_ZERO_PU_8x8_0]);
   14426           0 :                         context_ptr->p_best_sad64x32 =
   14427           0 :                             &(context_ptr
   14428             :                                   ->p_sb_best_sad[listIndex][ref_pic_index]
   14429           0 :                                                  [ME_TIER_ZERO_PU_64x32_0]);
   14430           0 :                         context_ptr->p_best_sad32x16 =
   14431           0 :                             &(context_ptr
   14432             :                                   ->p_sb_best_sad[listIndex][ref_pic_index]
   14433           0 :                                                  [ME_TIER_ZERO_PU_32x16_0]);
   14434           0 :                         context_ptr->p_best_sad16x8 = &(
   14435             :                             context_ptr->p_sb_best_sad[listIndex][ref_pic_index]
   14436           0 :                                                       [ME_TIER_ZERO_PU_16x8_0]);
   14437           0 :                         context_ptr->p_best_sad32x64 =
   14438           0 :                             &(context_ptr
   14439             :                                   ->p_sb_best_sad[listIndex][ref_pic_index]
   14440           0 :                                                  [ME_TIER_ZERO_PU_32x64_0]);
   14441           0 :                         context_ptr->p_best_sad16x32 =
   14442           0 :                             &(context_ptr
   14443             :                                   ->p_sb_best_sad[listIndex][ref_pic_index]
   14444           0 :                                                  [ME_TIER_ZERO_PU_16x32_0]);
   14445           0 :                         context_ptr->p_best_sad8x16 = &(
   14446             :                             context_ptr->p_sb_best_sad[listIndex][ref_pic_index]
   14447           0 :                                                       [ME_TIER_ZERO_PU_8x16_0]);
   14448           0 :                         context_ptr->p_best_sad32x8 = &(
   14449             :                             context_ptr->p_sb_best_sad[listIndex][ref_pic_index]
   14450           0 :                                                       [ME_TIER_ZERO_PU_32x8_0]);
   14451           0 :                         context_ptr->p_best_sad8x32 = &(
   14452             :                             context_ptr->p_sb_best_sad[listIndex][ref_pic_index]
   14453           0 :                                                       [ME_TIER_ZERO_PU_8x32_0]);
   14454           0 :                         context_ptr->p_best_sad64x16 =
   14455           0 :                             &(context_ptr
   14456             :                                   ->p_sb_best_sad[listIndex][ref_pic_index]
   14457           0 :                                                  [ME_TIER_ZERO_PU_64x16_0]);
   14458           0 :                         context_ptr->p_best_sad16x64 =
   14459           0 :                             &(context_ptr
   14460             :                                   ->p_sb_best_sad[listIndex][ref_pic_index]
   14461           0 :                                                  [ME_TIER_ZERO_PU_16x64_0]);
   14462             : 
   14463           0 :                         context_ptr->p_best_mv64x64 = &(
   14464             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14465           0 :                                                      [ME_TIER_ZERO_PU_64x64]);
   14466           0 :                         context_ptr->p_best_mv32x32 = &(
   14467             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14468           0 :                                                      [ME_TIER_ZERO_PU_32x32_0]);
   14469           0 :                         context_ptr->p_best_mv16x16 = &(
   14470             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14471           0 :                                                      [ME_TIER_ZERO_PU_16x16_0]);
   14472           0 :                         context_ptr->p_best_mv8x8 = &(
   14473             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14474           0 :                                                      [ME_TIER_ZERO_PU_8x8_0]);
   14475           0 :                         context_ptr->p_best_mv64x32 = &(
   14476             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14477           0 :                                                      [ME_TIER_ZERO_PU_64x32_0]);
   14478           0 :                         context_ptr->p_best_mv32x16 = &(
   14479             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14480           0 :                                                      [ME_TIER_ZERO_PU_32x16_0]);
   14481           0 :                         context_ptr->p_best_mv16x8 = &(
   14482             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14483           0 :                                                      [ME_TIER_ZERO_PU_16x8_0]);
   14484           0 :                         context_ptr->p_best_mv32x64 = &(
   14485             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14486           0 :                                                      [ME_TIER_ZERO_PU_32x64_0]);
   14487           0 :                         context_ptr->p_best_mv16x32 = &(
   14488             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14489           0 :                                                      [ME_TIER_ZERO_PU_16x32_0]);
   14490           0 :                         context_ptr->p_best_mv8x16 = &(
   14491             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14492           0 :                                                      [ME_TIER_ZERO_PU_8x16_0]);
   14493           0 :                         context_ptr->p_best_mv32x8 = &(
   14494             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14495           0 :                                                      [ME_TIER_ZERO_PU_32x8_0]);
   14496           0 :                         context_ptr->p_best_mv8x32 = &(
   14497             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14498           0 :                                                      [ME_TIER_ZERO_PU_8x32_0]);
   14499           0 :                         context_ptr->p_best_mv64x16 = &(
   14500             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14501           0 :                                                      [ME_TIER_ZERO_PU_64x16_0]);
   14502           0 :                         context_ptr->p_best_mv16x64 = &(
   14503             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14504           0 :                                                      [ME_TIER_ZERO_PU_16x64_0]);
   14505             : 
   14506           0 :                         context_ptr->p_best_ssd64x64 = &(
   14507             :                             context_ptr->p_sb_best_ssd[listIndex][ref_pic_index]
   14508           0 :                                                       [ME_TIER_ZERO_PU_64x64]);
   14509           0 :                         context_ptr->p_best_ssd32x32 =
   14510           0 :                             &(context_ptr
   14511             :                                   ->p_sb_best_ssd[listIndex][ref_pic_index]
   14512           0 :                                                  [ME_TIER_ZERO_PU_32x32_0]);
   14513           0 :                         context_ptr->p_best_ssd16x16 =
   14514           0 :                             &(context_ptr
   14515             :                                   ->p_sb_best_ssd[listIndex][ref_pic_index]
   14516           0 :                                                  [ME_TIER_ZERO_PU_16x16_0]);
   14517           0 :                         context_ptr->p_best_ssd8x8 = &(
   14518             :                             context_ptr->p_sb_best_ssd[listIndex][ref_pic_index]
   14519           0 :                                                       [ME_TIER_ZERO_PU_8x8_0]);
   14520           0 :                         context_ptr->p_best_ssd64x32 =
   14521           0 :                             &(context_ptr
   14522             :                                   ->p_sb_best_ssd[listIndex][ref_pic_index]
   14523           0 :                                                  [ME_TIER_ZERO_PU_64x32_0]);
   14524           0 :                         context_ptr->p_best_ssd32x16 =
   14525           0 :                             &(context_ptr
   14526             :                                   ->p_sb_best_ssd[listIndex][ref_pic_index]
   14527           0 :                                                  [ME_TIER_ZERO_PU_32x16_0]);
   14528           0 :                         context_ptr->p_best_ssd16x8 = &(
   14529             :                             context_ptr->p_sb_best_ssd[listIndex][ref_pic_index]
   14530           0 :                                                       [ME_TIER_ZERO_PU_16x8_0]);
   14531           0 :                         context_ptr->p_best_ssd32x64 =
   14532           0 :                             &(context_ptr
   14533             :                                   ->p_sb_best_ssd[listIndex][ref_pic_index]
   14534           0 :                                                  [ME_TIER_ZERO_PU_32x64_0]);
   14535           0 :                         context_ptr->p_best_ssd16x32 =
   14536           0 :                             &(context_ptr
   14537             :                                   ->p_sb_best_ssd[listIndex][ref_pic_index]
   14538           0 :                                                  [ME_TIER_ZERO_PU_16x32_0]);
   14539           0 :                         context_ptr->p_best_ssd8x16 = &(
   14540             :                             context_ptr->p_sb_best_ssd[listIndex][ref_pic_index]
   14541           0 :                                                       [ME_TIER_ZERO_PU_8x16_0]);
   14542           0 :                         context_ptr->p_best_ssd32x8 = &(
   14543             :                             context_ptr->p_sb_best_ssd[listIndex][ref_pic_index]
   14544           0 :                                                       [ME_TIER_ZERO_PU_32x8_0]);
   14545           0 :                         context_ptr->p_best_ssd8x32 = &(
   14546             :                             context_ptr->p_sb_best_ssd[listIndex][ref_pic_index]
   14547           0 :                                                       [ME_TIER_ZERO_PU_8x32_0]);
   14548           0 :                         context_ptr->p_best_ssd64x16 =
   14549           0 :                             &(context_ptr
   14550             :                                   ->p_sb_best_ssd[listIndex][ref_pic_index]
   14551           0 :                                                  [ME_TIER_ZERO_PU_64x16_0]);
   14552           0 :                         context_ptr->p_best_ssd16x64 =
   14553           0 :                             &(context_ptr
   14554             :                                   ->p_sb_best_ssd[listIndex][ref_pic_index]
   14555           0 :                                                  [ME_TIER_ZERO_PU_16x64_0]);
   14556             : 
   14557           0 :                         open_loop_me_fullpel_search_sblock(context_ptr,
   14558             :                                                            listIndex,
   14559             :                                                            ref_pic_index,
   14560             :                                                            x_search_area_origin,
   14561             :                                                            y_search_area_origin,
   14562             :                                                            search_area_width,
   14563             :                                                            search_area_height);
   14564           0 :                         context_ptr->full_quarter_pel_refinement = 0;
   14565             : 
   14566           0 :                         if (context_ptr->half_pel_mode ==
   14567             :                             EX_HP_MODE) {
   14568             :                             // Move to the top left of the search region
   14569           0 :                             xTopLeftSearchRegion =
   14570           0 :                                 (int16_t)(refPicPtr->origin_x + sb_origin_x) +
   14571             :                                 x_search_area_origin;
   14572           0 :                             yTopLeftSearchRegion =
   14573           0 :                                 (int16_t)(refPicPtr->origin_y + sb_origin_y) +
   14574             :                                 y_search_area_origin;
   14575           0 :                             searchRegionIndex =
   14576           0 :                                 xTopLeftSearchRegion +
   14577           0 :                                 yTopLeftSearchRegion * refPicPtr->stride_y;
   14578             :                             // Interpolate the search region for Half-Pel
   14579             :                             // Refinements H - AVC Style
   14580           0 :                             InterpolateSearchRegionAVC(
   14581             :                                 context_ptr,
   14582             :                                 listIndex,
   14583             :                                 ref_pic_index,
   14584             :                                 context_ptr->integer_buffer_ptr[listIndex]
   14585           0 :                                                                [ref_pic_index] +
   14586           0 :                                     (ME_FILTER_TAP >> 1) +
   14587           0 :                                     ((ME_FILTER_TAP >> 1) *
   14588             :                                      context_ptr->interpolated_full_stride
   14589           0 :                                          [listIndex][ref_pic_index]),
   14590             :                                 context_ptr
   14591             :                                     ->interpolated_full_stride[listIndex]
   14592             :                                                               [ref_pic_index],
   14593           0 :                                 (uint32_t)search_area_width +
   14594             :                                     (BLOCK_SIZE_64 - 1),
   14595           0 :                                 (uint32_t)search_area_height +
   14596             :                                     (BLOCK_SIZE_64 - 1),
   14597             :                                 8);
   14598             : 
   14599           0 :                             initialize_buffer_32bits(
   14600             :                                 context_ptr
   14601           0 :                                     ->p_sb_best_ssd[listIndex][ref_pic_index],
   14602             :                                 52,
   14603             :                                 1,
   14604             :                                 MAX_SSE_VALUE);
   14605           0 :                             memcpy(context_ptr
   14606             :                                        ->p_sb_best_full_pel_mv[listIndex]
   14607           0 :                                                               [ref_pic_index],
   14608             :                                    context_ptr
   14609           0 :                                        ->p_sb_best_mv[listIndex][ref_pic_index],
   14610             :                                    MAX_ME_PU_COUNT * sizeof(uint32_t));
   14611           0 :                             context_ptr->full_quarter_pel_refinement = 1;
   14612           0 :                             context_ptr->p_best_full_pel_mv64x64 =
   14613           0 :                                 &(context_ptr->p_sb_best_full_pel_mv
   14614             :                                       [listIndex][ref_pic_index]
   14615           0 :                                       [ME_TIER_ZERO_PU_64x64]);
   14616           0 :                             context_ptr->p_best_full_pel_mv32x32 =
   14617           0 :                                 &(context_ptr->p_sb_best_full_pel_mv
   14618             :                                       [listIndex][ref_pic_index]
   14619           0 :                                       [ME_TIER_ZERO_PU_32x32_0]);
   14620           0 :                             context_ptr->p_best_full_pel_mv16x16 =
   14621           0 :                                 &(context_ptr->p_sb_best_full_pel_mv
   14622             :                                       [listIndex][ref_pic_index]
   14623           0 :                                       [ME_TIER_ZERO_PU_16x16_0]);
   14624           0 :                             context_ptr->p_best_full_pel_mv8x8 =
   14625           0 :                                 &(context_ptr->p_sb_best_full_pel_mv
   14626             :                                       [listIndex][ref_pic_index]
   14627           0 :                                       [ME_TIER_ZERO_PU_8x8_0]);
   14628           0 :                             context_ptr->p_best_full_pel_mv64x32 =
   14629           0 :                                 &(context_ptr->p_sb_best_full_pel_mv
   14630             :                                       [listIndex][ref_pic_index]
   14631           0 :                                       [ME_TIER_ZERO_PU_64x32_0]);
   14632           0 :                             context_ptr->p_best_full_pel_mv32x16 =
   14633           0 :                                 &(context_ptr->p_sb_best_full_pel_mv
   14634             :                                       [listIndex][ref_pic_index]
   14635           0 :                                       [ME_TIER_ZERO_PU_32x16_0]);
   14636           0 :                             context_ptr->p_best_full_pel_mv16x8 =
   14637           0 :                                 &(context_ptr->p_sb_best_full_pel_mv
   14638             :                                       [listIndex][ref_pic_index]
   14639           0 :                                       [ME_TIER_ZERO_PU_16x8_0]);
   14640           0 :                             context_ptr->p_best_full_pel_mv32x64 =
   14641           0 :                                 &(context_ptr->p_sb_best_full_pel_mv
   14642             :                                       [listIndex][ref_pic_index]
   14643           0 :                                       [ME_TIER_ZERO_PU_32x64_0]);
   14644           0 :                             context_ptr->p_best_full_pel_mv16x32 =
   14645           0 :                                 &(context_ptr->p_sb_best_full_pel_mv
   14646             :                                       [listIndex][ref_pic_index]
   14647           0 :                                       [ME_TIER_ZERO_PU_16x32_0]);
   14648           0 :                             context_ptr->p_best_full_pel_mv8x16 =
   14649           0 :                                 &(context_ptr->p_sb_best_full_pel_mv
   14650             :                                       [listIndex][ref_pic_index]
   14651           0 :                                       [ME_TIER_ZERO_PU_8x16_0]);
   14652           0 :                             context_ptr->p_best_full_pel_mv32x8 =
   14653           0 :                                 &(context_ptr->p_sb_best_full_pel_mv
   14654             :                                       [listIndex][ref_pic_index]
   14655           0 :                                       [ME_TIER_ZERO_PU_32x8_0]);
   14656           0 :                             context_ptr->p_best_full_pel_mv8x32 =
   14657           0 :                                 &(context_ptr->p_sb_best_full_pel_mv
   14658             :                                       [listIndex][ref_pic_index]
   14659           0 :                                       [ME_TIER_ZERO_PU_8x32_0]);
   14660           0 :                             context_ptr->p_best_full_pel_mv64x16 =
   14661           0 :                                 &(context_ptr->p_sb_best_full_pel_mv
   14662             :                                       [listIndex][ref_pic_index]
   14663           0 :                                       [ME_TIER_ZERO_PU_64x16_0]);
   14664           0 :                             context_ptr->p_best_full_pel_mv16x64 =
   14665           0 :                                 &(context_ptr->p_sb_best_full_pel_mv
   14666             :                                       [listIndex][ref_pic_index]
   14667           0 :                                       [ME_TIER_ZERO_PU_16x64_0]);
   14668             :                             // half-Pel search
   14669           0 :                             open_loop_me_half_pel_search_sblock(
   14670             :                                 picture_control_set_ptr,
   14671             :                                 context_ptr,
   14672             :                                 listIndex,
   14673             :                                 ref_pic_index,
   14674             :                                 x_search_area_origin,
   14675             :                                 y_search_area_origin,
   14676             :                                 search_area_width,
   14677             :                                 search_area_height);
   14678             :                         }
   14679             : 
   14680           0 :                         if (context_ptr->quarter_pel_mode ==
   14681             :                             EX_QP_MODE) {
   14682             :                             // Quarter-Pel search
   14683           0 :                             memcpy(context_ptr
   14684             :                                        ->p_sb_best_full_pel_mv[listIndex]
   14685           0 :                                                               [ref_pic_index],
   14686             :                                    context_ptr
   14687           0 :                                        ->p_sb_best_mv[listIndex][ref_pic_index],
   14688             :                                    MAX_ME_PU_COUNT * sizeof(uint32_t));
   14689           0 :                             open_loop_me_quarter_pel_search_sblock(
   14690             :                                 context_ptr,
   14691             :                                 listIndex,
   14692             :                                 ref_pic_index,
   14693             :                                 x_search_area_origin,
   14694             :                                 y_search_area_origin,
   14695             :                                 search_area_width,
   14696             :                                 search_area_height);
   14697             :                         }
   14698             :                     } else {
   14699           0 :                         initialize_buffer_32bits(
   14700             :                             context_ptr
   14701           0 :                                 ->p_sb_best_sad[listIndex][ref_pic_index],
   14702             :                             21,
   14703             :                             1,
   14704             :                             MAX_SAD_VALUE);
   14705             : 
   14706           0 :                         context_ptr->full_quarter_pel_refinement = 0;
   14707             : 
   14708           0 :                         context_ptr->p_best_sad64x64 = &(
   14709             :                             context_ptr->p_sb_best_sad[listIndex][ref_pic_index]
   14710           0 :                                                       [ME_TIER_ZERO_PU_64x64]);
   14711           0 :                         context_ptr->p_best_sad32x32 =
   14712           0 :                             &(context_ptr
   14713             :                                   ->p_sb_best_sad[listIndex][ref_pic_index]
   14714           0 :                                                  [ME_TIER_ZERO_PU_32x32_0]);
   14715           0 :                         context_ptr->p_best_sad16x16 =
   14716           0 :                             &(context_ptr
   14717             :                                   ->p_sb_best_sad[listIndex][ref_pic_index]
   14718           0 :                                                  [ME_TIER_ZERO_PU_16x16_0]);
   14719           0 :                         context_ptr->p_best_sad8x8 = &(
   14720             :                             context_ptr->p_sb_best_sad[listIndex][ref_pic_index]
   14721           0 :                                                       [ME_TIER_ZERO_PU_8x8_0]);
   14722             : 
   14723           0 :                         context_ptr->p_best_mv64x64 = &(
   14724             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14725           0 :                                                      [ME_TIER_ZERO_PU_64x64]);
   14726           0 :                         context_ptr->p_best_mv32x32 = &(
   14727             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14728           0 :                                                      [ME_TIER_ZERO_PU_32x32_0]);
   14729           0 :                         context_ptr->p_best_mv16x16 = &(
   14730             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14731           0 :                                                      [ME_TIER_ZERO_PU_16x16_0]);
   14732           0 :                         context_ptr->p_best_mv8x8 = &(
   14733             :                             context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
   14734           0 :                                                      [ME_TIER_ZERO_PU_8x8_0]);
   14735             : 
   14736           0 :                         context_ptr->p_best_ssd64x64 = &(
   14737             :                             context_ptr->p_sb_best_ssd[listIndex][ref_pic_index]
   14738           0 :                                                       [ME_TIER_ZERO_PU_64x64]);
   14739           0 :                         context_ptr->p_best_ssd32x32 =
   14740           0 :                             &(context_ptr
   14741             :                                   ->p_sb_best_ssd[listIndex][ref_pic_index]
   14742           0 :                                                  [ME_TIER_ZERO_PU_32x32_0]);
   14743           0 :                         context_ptr->p_best_ssd16x16 =
   14744           0 :                             &(context_ptr
   14745             :                                   ->p_sb_best_ssd[listIndex][ref_pic_index]
   14746           0 :                                                  [ME_TIER_ZERO_PU_16x16_0]);
   14747           0 :                         context_ptr->p_best_ssd8x8 = &(
   14748             :                             context_ptr->p_sb_best_ssd[listIndex][ref_pic_index]
   14749           0 :                                                       [ME_TIER_ZERO_PU_8x8_0]);
   14750           0 :                         FullPelSearch_LCU(context_ptr,
   14751             :                                           listIndex,
   14752             :                                           ref_pic_index,
   14753             :                                           x_search_area_origin,
   14754             :                                           y_search_area_origin,
   14755             :                                           search_area_width,
   14756             :                                           search_area_height);
   14757             :                     }
   14758             :                 }
   14759             : 
   14760           0 :                 if (context_ptr->fractional_search_model == 0) {
   14761           0 :                     enableHalfPel32x32 = EB_TRUE;
   14762           0 :                     enableHalfPel16x16 = EB_TRUE;
   14763           0 :                     enableHalfPel8x8 = EB_TRUE;
   14764           0 :                     enableQuarterPel = EB_TRUE;
   14765           0 :                 } else if (context_ptr->fractional_search_model == 1) {
   14766           0 :                     suPelEnable(context_ptr,
   14767             :                                 picture_control_set_ptr,
   14768             :                                 listIndex,
   14769             :                                 0,
   14770             :                                 &enableHalfPel32x32,
   14771             :                                 &enableHalfPel16x16,
   14772             :                                 &enableHalfPel8x8);
   14773           0 :                     enableQuarterPel = EB_TRUE;
   14774             :                 } else {
   14775           0 :                     enableHalfPel32x32 = EB_FALSE;
   14776           0 :                     enableHalfPel16x16 = EB_FALSE;
   14777           0 :                     enableHalfPel8x8 = EB_FALSE;
   14778           0 :                     enableQuarterPel = EB_FALSE;
   14779             :                 }
   14780           0 :                 if (enableHalfPel32x32 || enableHalfPel16x16 ||
   14781           0 :                     enableHalfPel8x8 || enableQuarterPel) {
   14782             :                     // if((picture_control_set_ptr->is_used_as_reference_flag ==
   14783             :                     // EB_TRUE)) {
   14784             :                     // Move to the top left of the search region
   14785           0 :                     xTopLeftSearchRegion =
   14786           0 :                         (int16_t)(refPicPtr->origin_x + sb_origin_x) +
   14787             :                         x_search_area_origin;
   14788           0 :                     yTopLeftSearchRegion =
   14789           0 :                         (int16_t)(refPicPtr->origin_y + sb_origin_y) +
   14790             :                         y_search_area_origin;
   14791           0 :                     searchRegionIndex =
   14792           0 :                         xTopLeftSearchRegion +
   14793           0 :                         yTopLeftSearchRegion * refPicPtr->stride_y;
   14794             : 
   14795             :                     // Interpolate the search region for Half-Pel Refinements
   14796             :                     // H - AVC Style
   14797             : 
   14798           0 :                     if (context_ptr->half_pel_mode ==
   14799             :                         REFINMENT_HP_MODE) {
   14800           0 :                         InterpolateSearchRegionAVC(
   14801             :                             context_ptr,
   14802             :                             listIndex,
   14803             :                             ref_pic_index,
   14804             :                             context_ptr->integer_buffer_ptr[listIndex]
   14805           0 :                                                            [ref_pic_index] +
   14806           0 :                                 (ME_FILTER_TAP >> 1) +
   14807           0 :                                 ((ME_FILTER_TAP >> 1) *
   14808             :                                  context_ptr
   14809             :                                      ->interpolated_full_stride[listIndex]
   14810           0 :                                                                [ref_pic_index]),
   14811             :                             context_ptr
   14812             :                                 ->interpolated_full_stride[listIndex]
   14813             :                                                           [ref_pic_index],
   14814           0 :                             (uint32_t)search_area_width + (BLOCK_SIZE_64 - 1),
   14815           0 :                             (uint32_t)search_area_height + (BLOCK_SIZE_64 - 1),
   14816             :                             8);
   14817             : 
   14818             :                         // Half-Pel Refinement [8 search positions]
   14819           0 :                         HalfPelSearch_LCU(
   14820             :                             sequence_control_set_ptr,
   14821             :                             picture_control_set_ptr,
   14822             :                             context_ptr,
   14823             : #if M0_HIGH_PRECISION_INTERPOLATION
   14824             :                             context_ptr->integer_buffer_ptr[listIndex]
   14825             :                                                            [ref_pic_index] +
   14826             :                                 (ME_FILTER_PAD_DISTANCE >> 1) +
   14827             :                                 ((ME_FILTER_PAD_DISTANCE >> 1) *
   14828             :                                  context_ptr
   14829             :                                      ->interpolated_full_stride[listIndex]
   14830             :                                                                [ref_pic_index]),
   14831             :                             context_ptr
   14832             :                                 ->interpolated_full_stride[listIndex]
   14833             :                                                           [ref_pic_index],
   14834             :                             &(context_ptr->pos_b_buffer
   14835             :                                   [listIndex][ref_pic_index]
   14836             :                                   [(ME_FILTER_PAD_DISTANCE >> 1) *
   14837             :                                    context_ptr->interpolated_stride]),
   14838             : #else
   14839             :                             context_ptr->integer_buffer_ptr[listIndex]
   14840           0 :                                                            [ref_pic_index] +
   14841           0 :                                 (ME_FILTER_TAP >> 1) +
   14842           0 :                                 ((ME_FILTER_TAP >> 1) *
   14843             :                                  context_ptr
   14844             :                                      ->interpolated_full_stride[listIndex]
   14845           0 :                                                                [ref_pic_index]),
   14846             :                             context_ptr
   14847             :                                 ->interpolated_full_stride[listIndex]
   14848             :                                                           [ref_pic_index],
   14849             :                             &(context_ptr->pos_b_buffer
   14850           0 :                                   [listIndex][ref_pic_index]
   14851           0 :                                   [(ME_FILTER_TAP >> 1) *
   14852           0 :                                    context_ptr->interpolated_stride]),
   14853             : #endif
   14854             :                             &(context_ptr
   14855           0 :                                   ->pos_h_buffer[listIndex][ref_pic_index][1]),
   14856             :                             &(context_ptr
   14857             :                                   ->pos_j_buffer[listIndex][ref_pic_index][0]),
   14858             :                             x_search_area_origin,
   14859             :                             y_search_area_origin,
   14860           0 :                             picture_control_set_ptr->cu8x8_mode ==
   14861             :                                 CU_8x8_MODE_1,
   14862             :                             enableHalfPel32x32,
   14863             :                             enableHalfPel16x16,
   14864             :                             enableHalfPel8x8);
   14865             :                     }
   14866             : 
   14867           0 :                     if (context_ptr->quarter_pel_mode ==
   14868             :                         REFINMENT_QP_MODE) {
   14869             :                         // Quarter-Pel Refinement [8 search positions]
   14870           0 :                         QuarterPelSearch_LCU(
   14871             :                             context_ptr,
   14872             : #if M0_HIGH_PRECISION_INTERPOLATION
   14873             :                             context_ptr->integer_buffer_ptr[listIndex]
   14874             :                                                            [ref_pic_index] +
   14875             :                                 (ME_FILTER_PAD_DISTANCE >> 1) +
   14876             :                                 ((ME_FILTER_PAD_DISTANCE >> 1) *
   14877             :                                  context_ptr
   14878             :                                      ->interpolated_full_stride[listIndex]
   14879             :                                                                [ref_pic_index]),
   14880             :                             context_ptr
   14881             :                                 ->interpolated_full_stride[listIndex]
   14882             :                                                           [ref_pic_index],
   14883             :                             &(context_ptr->pos_b_buffer
   14884             :                                   [listIndex][ref_pic_index]
   14885             :                                   [(ME_FILTER_PAD_DISTANCE >> 1) *
   14886             :                                    context_ptr
   14887             :                                        ->interpolated_stride]),  // points to b
   14888             :                                                                  // position of
   14889             :                                                                  // the figure
   14890             :                                                                  // above
   14891             : #else
   14892             :                             context_ptr->integer_buffer_ptr[listIndex]
   14893           0 :                                                            [ref_pic_index] +
   14894           0 :                                 (ME_FILTER_TAP >> 1) +
   14895           0 :                                 ((ME_FILTER_TAP >> 1) *
   14896             :                                  context_ptr
   14897             :                                      ->interpolated_full_stride[listIndex]
   14898           0 :                                                                [ref_pic_index]),
   14899             :                             context_ptr
   14900             :                                 ->interpolated_full_stride[listIndex]
   14901             :                                                           [ref_pic_index],
   14902             :                             &(context_ptr->pos_b_buffer
   14903           0 :                                   [listIndex][ref_pic_index]
   14904           0 :                                   [(ME_FILTER_TAP >> 1) *
   14905             :                                    context_ptr
   14906           0 :                                        ->interpolated_stride]),  // points to b
   14907             :                                                                  // position of
   14908             :                                                                  // the figure
   14909             :                                                                  // above
   14910             : #endif
   14911             :                             &(context_ptr
   14912           0 :                                   ->pos_h_buffer[listIndex][ref_pic_index]
   14913             :                                                 [1]),  // points to h position
   14914             :                                                        // of the figure above
   14915             :                             &(context_ptr
   14916             :                                   ->pos_j_buffer[listIndex][ref_pic_index]
   14917             :                                                 [0]),  // points to j position
   14918             :                                                        // of the figure above
   14919             :                             x_search_area_origin,
   14920             :                             y_search_area_origin,
   14921           0 :                             picture_control_set_ptr->cu8x8_mode ==
   14922             :                                 CU_8x8_MODE_1,
   14923             :                             enableHalfPel32x32,
   14924             :                             enableHalfPel16x16,
   14925             :                             enableHalfPel8x8,
   14926             :                             enableQuarterPel,
   14927           0 :                             picture_control_set_ptr->pic_depth_mode <=
   14928             :                                 PIC_ALL_C_DEPTH_MODE);
   14929             :                     }
   14930             :                 }
   14931           0 :                 if (is_nsq_table_used && ref_pic_index == 0) {
   14932           0 :                     context_ptr->p_best_nsq64x64 =
   14933           0 :                         &(context_ptr->p_sb_best_nsq[listIndex][0]
   14934             :                                                     [ME_TIER_ZERO_PU_64x64]);
   14935           0 :                     context_ptr->p_best_nsq32x32 =
   14936           0 :                         &(context_ptr->p_sb_best_nsq[listIndex][0]
   14937             :                                                     [ME_TIER_ZERO_PU_32x32_0]);
   14938           0 :                     context_ptr->p_best_nsq16x16 =
   14939           0 :                         &(context_ptr->p_sb_best_nsq[listIndex][0]
   14940             :                                                     [ME_TIER_ZERO_PU_16x16_0]);
   14941           0 :                     context_ptr->p_best_nsq8x8 =
   14942           0 :                         &(context_ptr->p_sb_best_nsq[listIndex][0]
   14943             :                                                     [ME_TIER_ZERO_PU_8x8_0]);
   14944           0 :                     nsq_get_analysis_results_block(context_ptr);
   14945             :                 }
   14946             :         }
   14947             :     }
   14948             : }
   14949             : 
   14950           0 : if (context_ptr->me_alt_ref == EB_FALSE) {
   14951             : 
   14952             :     // Bi-Prediction motion estimation loop
   14953           0 :     for (pu_index = 0; pu_index < max_number_of_pus_per_sb; ++pu_index) {
   14954           0 :         candidateIndex = 0;
   14955             : 
   14956             :         uint32_t nIdx;
   14957             : 
   14958           0 :         if (pu_index > 200)
   14959           0 :             nIdx = pu_index;
   14960           0 :         else if (pu_index > 184)
   14961           0 :             nIdx = tab8x32[pu_index - 185] + 185;
   14962           0 :         else if (pu_index > 168)
   14963           0 :             nIdx = tab32x8[pu_index - 169] + 169;
   14964           0 :         else if (pu_index > 136)
   14965           0 :             nIdx = tab8x16[pu_index - 137] + 137;
   14966           0 :         else if (pu_index > 128)
   14967           0 :             nIdx = tab16x32[pu_index - 129] + 129;
   14968           0 :         else if (pu_index > 126)
   14969           0 :             nIdx = pu_index;
   14970           0 :         else if (pu_index > 94)
   14971           0 :             nIdx = tab16x8[pu_index - 95] + 95;
   14972           0 :         else if (pu_index > 86)
   14973           0 :             nIdx = tab32x16[pu_index - 87] + 87;
   14974           0 :         else if (pu_index > 84)
   14975           0 :             nIdx = pu_index;
   14976           0 :         else if (pu_index > 20)
   14977           0 :             nIdx = tab8x8[pu_index - 21] + 21;
   14978           0 :         else if (pu_index > 4)
   14979           0 :             nIdx = tab16x16[pu_index - 5] + 5;
   14980             :         else
   14981           0 :             nIdx = pu_index;
   14982           0 :         for (listIndex = REF_LIST_0; listIndex <= numOfListToSearch;
   14983           0 :              ++listIndex) {
   14984           0 :             num_of_ref_pic_to_search =
   14985           0 :                 (picture_control_set_ptr->slice_type == P_SLICE)
   14986             :                     ? picture_control_set_ptr->ref_list0_count
   14987             :                     : (listIndex == REF_LIST_0)
   14988             :                           ? picture_control_set_ptr->ref_list0_count
   14989             :                           : picture_control_set_ptr->ref_list1_count;
   14990             : 
   14991             :             // Ref Picture Loop
   14992           0 :             for (ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search;
   14993           0 :                  ++ref_pic_index) {
   14994           0 :                 me_candidate =
   14995           0 :                     &(context_ptr->me_candidate[candidateIndex].pu[pu_index]);
   14996           0 :                 me_candidate->prediction_direction = listIndex;
   14997           0 :                 me_candidate->ref_index[listIndex] = ref_pic_index;
   14998           0 :                 me_candidate->ref0_list = me_candidate->prediction_direction == 0 ? listIndex : 24;
   14999           0 :                 me_candidate->ref1_list = me_candidate->prediction_direction == 1 ? listIndex : 24;
   15000           0 :                 me_candidate->distortion =
   15001           0 :                     context_ptr->p_sb_best_sad[listIndex][ref_pic_index][nIdx];
   15002           0 :                 candidateIndex++;
   15003             :             }
   15004             :         }
   15005             : 
   15006           0 :         total_me_candidate_index = candidateIndex;
   15007             :         uint8_t ref_type_table[7];
   15008           0 :         if (picture_control_set_ptr->prune_unipred_at_me) {
   15009             :             // Sorting of the ME candidates
   15010           0 :             for (candidate_index = 0;
   15011           0 :                 candidate_index < total_me_candidate_index - 1;
   15012           0 :                 ++candidate_index) {
   15013           0 :                 for (next_candidate_index = candidate_index + 1;
   15014           0 :                     next_candidate_index < total_me_candidate_index;
   15015           0 :                     ++next_candidate_index) {
   15016           0 :                     if (context_ptr->me_candidate[candidate_index]
   15017             :                         .pu[pu_index]
   15018           0 :                         .distortion >
   15019           0 :                         context_ptr->me_candidate[next_candidate_index]
   15020             :                         .pu[pu_index]
   15021           0 :                         .distortion) {
   15022           0 :                         SwapMeCandidate(
   15023           0 :                             &(context_ptr->me_candidate[candidate_index]
   15024             :                                 .pu[pu_index]),
   15025           0 :                             &(context_ptr->me_candidate[next_candidate_index]
   15026             :                                 .pu[pu_index]));
   15027             :                     }
   15028             :                 }
   15029             :             }
   15030           0 :             for (candidate_index = 0;
   15031           0 :                 candidate_index < total_me_candidate_index;
   15032           0 :                 ++candidate_index) {
   15033             : 
   15034           0 :                 me_candidate =
   15035           0 :                     &(context_ptr->me_candidate[candidate_index].pu[pu_index]);
   15036             : 
   15037           0 :                 if (me_candidate->prediction_direction == 0)
   15038           0 :                     ref_type_table[candidate_index] = svt_get_ref_frame_type(me_candidate->ref0_list, me_candidate->ref_index[0]);
   15039             :                 else
   15040           0 :                     ref_type_table[candidate_index] = svt_get_ref_frame_type(me_candidate->ref1_list, me_candidate->ref_index[1]);
   15041             : 
   15042             :             }
   15043             :         }
   15044           0 :         if (numOfListToSearch) {
   15045           0 :             if (picture_control_set_ptr->cu8x8_mode == CU_8x8_MODE_0 ||
   15046           0 :                 pu_index < 21 ||
   15047           0 :                 (picture_control_set_ptr->pic_depth_mode <=
   15048             :                  PIC_ALL_C_DEPTH_MODE)) {
   15049           0 :                 BiPredictionSearch(
   15050             :                     sequence_control_set_ptr,
   15051             :                     context_ptr,
   15052             :                     pu_index,
   15053             :                     candidateIndex,
   15054           0 :                     picture_control_set_ptr->ref_list0_count,
   15055           0 :                     picture_control_set_ptr->ref_list1_count,
   15056             :                     &total_me_candidate_index,
   15057             :                     ref_type_table,
   15058             :                     picture_control_set_ptr);
   15059             :             }
   15060             :         }
   15061             : 
   15062             :         // Sorting of the ME candidates
   15063           0 :         for (candidate_index = 0;
   15064           0 :              candidate_index < total_me_candidate_index - 1;
   15065           0 :              ++candidate_index) {
   15066           0 :             for (next_candidate_index = candidate_index + 1;
   15067           0 :                  next_candidate_index < total_me_candidate_index;
   15068           0 :                  ++next_candidate_index) {
   15069           0 :                 if (context_ptr->me_candidate[candidate_index]
   15070             :                         .pu[pu_index]
   15071           0 :                         .distortion >
   15072           0 :                     context_ptr->me_candidate[next_candidate_index]
   15073             :                         .pu[pu_index]
   15074           0 :                         .distortion) {
   15075           0 :                     SwapMeCandidate(
   15076           0 :                         &(context_ptr->me_candidate[candidate_index]
   15077             :                               .pu[pu_index]),
   15078           0 :                         &(context_ptr->me_candidate[next_candidate_index]
   15079             :                               .pu[pu_index]));
   15080             :                 }
   15081             :             }
   15082             :         }
   15083             : 
   15084           0 :         MeLcuResults *mePuResult =
   15085           0 :             picture_control_set_ptr->me_results[sb_index];
   15086           0 :         mePuResult->total_me_candidate_index[pu_index] =
   15087             :             total_me_candidate_index;
   15088             : 
   15089           0 :         uint8_t l0_nsq =
   15090             :             is_nsq_table_used ? context_ptr->p_sb_best_nsq[0][0][nIdx] : 0;
   15091           0 :         uint8_t l1_nsq =
   15092             :             is_nsq_table_used ? context_ptr->p_sb_best_nsq[1][0][nIdx] : 0;
   15093           0 :         mePuResult->me_nsq_0[pu_index] = l0_nsq;
   15094           0 :         mePuResult->me_nsq_1[pu_index] = l1_nsq;
   15095             : 
   15096           0 :         mePuResult->total_me_candidate_index[pu_index] =
   15097           0 :             MIN(total_me_candidate_index, ME_RES_CAND_MRP_MODE_0);
   15098             :         // Assining the ME candidates to the me Results buffer
   15099           0 :         for (candidateIndex = 0; candidateIndex < total_me_candidate_index;
   15100           0 :              ++candidateIndex) {
   15101           0 :             me_candidate =
   15102           0 :                 &(context_ptr->me_candidate[candidateIndex].pu[pu_index]);
   15103           0 :             picture_control_set_ptr->me_results[sb_index]
   15104           0 :                 ->me_candidate[pu_index][candidateIndex]
   15105           0 :                 .distortion = me_candidate->distortion;
   15106           0 :             picture_control_set_ptr->me_results[sb_index]
   15107           0 :                 ->me_candidate[pu_index][candidateIndex]
   15108           0 :                 .direction = me_candidate->prediction_direction;
   15109           0 :             picture_control_set_ptr->me_results[sb_index]
   15110           0 :                 ->me_candidate[pu_index][candidateIndex]
   15111           0 :                 .ref_idx_l0 = me_candidate->ref_index[0];
   15112           0 :             picture_control_set_ptr->me_results[sb_index]
   15113           0 :                 ->me_candidate[pu_index][candidateIndex]
   15114           0 :                 .ref_idx_l1 = me_candidate->ref_index[1];
   15115           0 :             picture_control_set_ptr->me_results[sb_index]
   15116           0 :                 ->me_candidate[pu_index][candidateIndex]
   15117           0 :                 .ref0_list = me_candidate->ref0_list;
   15118           0 :             picture_control_set_ptr->me_results[sb_index]
   15119           0 :                 ->me_candidate[pu_index][candidateIndex]
   15120           0 :                 .ref1_list = me_candidate->ref1_list;
   15121             :         }
   15122             : 
   15123           0 :         for (listIndex = REF_LIST_0; listIndex <= numOfListToSearch;
   15124           0 :              ++listIndex) {
   15125           0 :             num_of_ref_pic_to_search =
   15126           0 :                 (picture_control_set_ptr->slice_type == P_SLICE)
   15127             :                     ? picture_control_set_ptr->ref_list0_count
   15128             :                     : (listIndex == REF_LIST_0)
   15129             :                           ? picture_control_set_ptr->ref_list0_count
   15130             :                           : picture_control_set_ptr->ref_list1_count;
   15131             : 
   15132             :             // Ref Picture Loop
   15133           0 :             for (ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search;
   15134           0 :                  ++ref_pic_index) {
   15135           0 :                 picture_control_set_ptr->me_results[sb_index]
   15136           0 :                     ->me_mv_array[pu_index]
   15137           0 :                                  [((listIndex &&
   15138           0 :                                     sequence_control_set_ptr->mrp_mode == 0)
   15139             :                                        ? 4
   15140           0 :                                        : listIndex ? 2 : 0) +
   15141             :                                   ref_pic_index]
   15142           0 :                     .x_mv = _MVXT(
   15143             :                     context_ptr->p_sb_best_mv[listIndex][ref_pic_index][nIdx]);
   15144           0 :                 picture_control_set_ptr->me_results[sb_index]
   15145           0 :                     ->me_mv_array[pu_index]
   15146           0 :                                  [((listIndex &&
   15147           0 :                                     sequence_control_set_ptr->mrp_mode == 0)
   15148             :                                        ? 4
   15149           0 :                                        : listIndex ? 2 : 0) +
   15150             :                                   ref_pic_index]
   15151           0 :                     .y_mv = _MVYT(
   15152             :                     context_ptr->p_sb_best_mv[listIndex][ref_pic_index][nIdx]);
   15153             :             }
   15154             :         }
   15155             :     }
   15156             :     {
   15157             :         // Compute the sum of the distortion of all 16 16x16 (best) blocks
   15158             :         // in the LCU
   15159           0 :         picture_control_set_ptr->rc_me_distortion[sb_index] = 0;
   15160           0 :         for (i = 0; i < 16; i++)
   15161           0 :             picture_control_set_ptr->rc_me_distortion[sb_index] +=
   15162           0 :                 picture_control_set_ptr->me_results[sb_index]
   15163           0 :                     ->me_candidate[5 + i][0]
   15164           0 :                     .distortion;
   15165             :     }
   15166             : 
   15167             : }
   15168             : 
   15169           0 : return return_error;
   15170             : }
   15171             : 
   15172             : /*******************************************
   15173             :  * SixteenthDecimatedSearch
   15174             :  *  performs a 1/16 decimated search
   15175             :  *******************************************/
   15176           0 : uint64_t SixteenthDecimatedSearch(MeContext *context_ptr, int16_t origin_x,
   15177             :                                   int16_t origin_y, uint32_t sb_width,
   15178             :                                   uint32_t sb_height,
   15179             :                                   EbPictureBufferDesc *sixteenthRefPicPtr,
   15180             :                                   int16_t search_area_width,
   15181             :                                   int16_t search_area_height)
   15182             : {
   15183             :     int16_t xTopLeftSearchRegion;
   15184             :     int16_t yTopLeftSearchRegion;
   15185             :     uint32_t searchRegionIndex;
   15186             :     int16_t x_search_area_origin;
   15187             :     int16_t y_search_area_origin;
   15188             : 
   15189           0 :     int16_t padWidth = (int16_t)(sixteenthRefPicPtr->origin_x) - 1;
   15190           0 :     int16_t padHeight = (int16_t)(sixteenthRefPicPtr->origin_y) - 1;
   15191             : 
   15192             :     uint64_t best_sad;
   15193             :     int16_t x_search_center;
   15194             :     int16_t y_search_center;
   15195             : 
   15196           0 :     x_search_area_origin = -(search_area_width >> 1);
   15197           0 :     y_search_area_origin = -(search_area_height >> 1);
   15198             : 
   15199             :     // Correct the left edge of the Search Area if it is not on the reference
   15200             :     // Picture
   15201           0 :     x_search_area_origin = ((origin_x + x_search_area_origin) < -padWidth)
   15202           0 :                                ? -padWidth - origin_x
   15203             :                                : x_search_area_origin;
   15204             : 
   15205           0 :     search_area_width =
   15206           0 :         ((origin_x + x_search_area_origin) < -padWidth)
   15207           0 :             ? search_area_width -
   15208           0 :                   (-padWidth - (origin_x + x_search_area_origin))
   15209             :             : search_area_width;
   15210             : 
   15211             :     // Correct the right edge of the Search Area if its not on the reference
   15212             :     // Picture
   15213           0 :     x_search_area_origin =
   15214           0 :         ((origin_x + x_search_area_origin) >
   15215           0 :          (int16_t)sixteenthRefPicPtr->width - 1)
   15216           0 :             ? x_search_area_origin - ((origin_x + x_search_area_origin) -
   15217           0 :                                       ((int16_t)sixteenthRefPicPtr->width - 1))
   15218             :             : x_search_area_origin;
   15219             : 
   15220           0 :     search_area_width =
   15221           0 :         ((origin_x + x_search_area_origin + search_area_width) >
   15222           0 :          (int16_t)sixteenthRefPicPtr->width)
   15223           0 :             ? MAX(1,
   15224             :                   search_area_width -
   15225             :                       ((origin_x + x_search_area_origin + search_area_width) -
   15226             :                        (int16_t)sixteenthRefPicPtr->width))
   15227             :             : search_area_width;
   15228             : 
   15229             :     // Correct the top edge of the Search Area if it is not on the reference
   15230             :     // Picture
   15231           0 :     y_search_area_origin = ((origin_y + y_search_area_origin) < -padHeight)
   15232           0 :                                ? -padHeight - origin_y
   15233             :                                : y_search_area_origin;
   15234             : 
   15235           0 :     search_area_height =
   15236           0 :         ((origin_y + y_search_area_origin) < -padHeight)
   15237           0 :             ? search_area_height -
   15238           0 :                   (-padHeight - (origin_y + y_search_area_origin))
   15239             :             : search_area_height;
   15240             : 
   15241             :     // Correct the bottom edge of the Search Area if its not on the reference
   15242             :     // Picture
   15243           0 :     y_search_area_origin =
   15244           0 :         ((origin_y + y_search_area_origin) >
   15245           0 :          (int16_t)sixteenthRefPicPtr->height - 1)
   15246           0 :             ? y_search_area_origin - ((origin_y + y_search_area_origin) -
   15247           0 :                                       ((int16_t)sixteenthRefPicPtr->height - 1))
   15248             :             : y_search_area_origin;
   15249             : 
   15250           0 :     search_area_height =
   15251           0 :         (origin_y + y_search_area_origin + search_area_height >
   15252           0 :          (int16_t)sixteenthRefPicPtr->height)
   15253           0 :             ? MAX(1,
   15254             :                   search_area_height -
   15255             :                       ((origin_y + y_search_area_origin + search_area_height) -
   15256             :                        (int16_t)sixteenthRefPicPtr->height))
   15257             :             : search_area_height;
   15258             : 
   15259           0 :     xTopLeftSearchRegion = ((int16_t)sixteenthRefPicPtr->origin_x + origin_x) +
   15260             :                            x_search_area_origin;
   15261           0 :     yTopLeftSearchRegion = ((int16_t)sixteenthRefPicPtr->origin_y + origin_y) +
   15262             :                            y_search_area_origin;
   15263           0 :     searchRegionIndex = xTopLeftSearchRegion +
   15264           0 :                         yTopLeftSearchRegion * sixteenthRefPicPtr->stride_y;
   15265             : 
   15266           0 :     if ((search_area_width & 15) == 0) {
   15267             :         // Only width equals 16 (LCU equals 64) is updated
   15268             :         // other width sizes work with the old code as the one
   15269             :         // in"sad_loop_kernel_sse4_1_intrin"
   15270           0 :         sad_loop_kernel_hme_l0(
   15271             :             &context_ptr->sixteenth_sb_buffer[0],
   15272           0 :             context_ptr->sixteenth_sb_buffer_stride * 2,
   15273           0 :             &sixteenthRefPicPtr->buffer_y[searchRegionIndex],
   15274           0 :             sixteenthRefPicPtr->stride_y * 2,
   15275             :             sb_height >> 1,
   15276             :             sb_width,
   15277             :             /* results */
   15278             :             &best_sad,
   15279             :             &x_search_center,
   15280             :             &y_search_center,
   15281             :             /* range */
   15282           0 :             sixteenthRefPicPtr->stride_y,
   15283             :             search_area_width,
   15284             :             search_area_height);
   15285             :     } else {
   15286             :         // Put the first search location into level0 results
   15287           0 :         sad_loop_kernel(
   15288             :             &context_ptr->sixteenth_sb_buffer[0],
   15289           0 :             context_ptr->sixteenth_sb_buffer_stride * 2,
   15290           0 :             &sixteenthRefPicPtr->buffer_y[searchRegionIndex],
   15291           0 :             sixteenthRefPicPtr->stride_y * 2,
   15292             :             sb_height >> 1,
   15293             :             sb_width,
   15294             :             /* results */
   15295             :             &best_sad,
   15296             :             &x_search_center,
   15297             :             &y_search_center,
   15298             :             /* range */
   15299           0 :             sixteenthRefPicPtr->stride_y,
   15300             :             search_area_width,
   15301             :             search_area_height);
   15302             :     }
   15303             : 
   15304           0 :     return (best_sad);
   15305             : }
   15306             : 
   15307             : /*******************************************
   15308             :  * IsComplexLcu
   15309             :  *   returns true is the SB has a high spatial & temporal complexity
   15310             :  *******************************************/
   15311           0 : EbBool IsComplexLcu(PictureParentControlSet *previousParentPcs,
   15312             :                     PictureParentControlSet *currentParentPcs,
   15313             :                     PictureParentControlSet *plusOneParentPcs,
   15314             :                     uint32_t pictureWidthInLcus, uint32_t lcuAdrr,
   15315             :                     uint32_t sb_origin_x, uint32_t sb_origin_y,
   15316             :                     uint32_t sb_width, uint32_t sb_height,
   15317             :                     uint32_t lcuCollocatedSad) {
   15318           0 :     uint32_t availableLcusCount = 0;
   15319           0 :     uint32_t highVarianceLcusCount = 0;
   15320             : 
   15321             :     // Check the variance of the current LCU
   15322           0 :     if ((currentParentPcs->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64]) >
   15323             :         IS_COMPLEX_LCU_VARIANCE_TH) {
   15324           0 :         availableLcusCount++;
   15325           0 :         highVarianceLcusCount++;
   15326             :     }
   15327             : 
   15328             :     // Check the variance of left SB if available
   15329           0 :     if (sb_origin_x != 0) {
   15330           0 :         availableLcusCount++;
   15331           0 :         if ((currentParentPcs->variance[lcuAdrr - 1][ME_TIER_ZERO_PU_64x64]) >
   15332             :             IS_COMPLEX_LCU_VARIANCE_TH)
   15333           0 :             highVarianceLcusCount++;
   15334             :     }
   15335             : 
   15336             :     // Check the variance of right SB if available
   15337           0 :     if ((sb_origin_x + BLOCK_SIZE_64) <
   15338           0 :         currentParentPcs->enhanced_picture_ptr->width) {
   15339           0 :         availableLcusCount++;
   15340           0 :         if ((currentParentPcs->variance[lcuAdrr + 1][ME_TIER_ZERO_PU_64x64]) >
   15341             :             IS_COMPLEX_LCU_VARIANCE_TH)
   15342           0 :             highVarianceLcusCount++;
   15343             :     }
   15344             : 
   15345             :     // Check the variance of top SB if available
   15346           0 :     if (sb_origin_y != 0) {
   15347           0 :         availableLcusCount++;
   15348           0 :         if ((currentParentPcs->variance[lcuAdrr - pictureWidthInLcus]
   15349           0 :                                        [ME_TIER_ZERO_PU_64x64]) >
   15350             :             IS_COMPLEX_LCU_VARIANCE_TH)
   15351           0 :             highVarianceLcusCount++;
   15352             :     }
   15353             : 
   15354             :     // Check the variance of bottom LCU
   15355           0 :     if ((sb_origin_y + BLOCK_SIZE_64) <
   15356           0 :         currentParentPcs->enhanced_picture_ptr->height) {
   15357           0 :         availableLcusCount++;
   15358           0 :         if ((currentParentPcs->variance[lcuAdrr + pictureWidthInLcus]
   15359           0 :                                        [ME_TIER_ZERO_PU_64x64]) >
   15360             :             IS_COMPLEX_LCU_VARIANCE_TH)
   15361           0 :             highVarianceLcusCount++;
   15362             :     }
   15363             : 
   15364             :     // Check the variance of top-left LCU
   15365           0 :     if ((sb_origin_x >= BLOCK_SIZE_64) && (sb_origin_y >= BLOCK_SIZE_64)) {
   15366           0 :         availableLcusCount++;
   15367           0 :         if ((currentParentPcs->variance[lcuAdrr - pictureWidthInLcus - 1]
   15368           0 :                                        [ME_TIER_ZERO_PU_64x64]) >
   15369             :             IS_COMPLEX_LCU_VARIANCE_TH)
   15370           0 :             highVarianceLcusCount++;
   15371             :     }
   15372             : 
   15373             :     // Check the variance of top-right LCU
   15374           0 :     if ((sb_origin_x <
   15375           0 :          currentParentPcs->enhanced_picture_ptr->width - BLOCK_SIZE_64) &&
   15376             :         (sb_origin_y >= BLOCK_SIZE_64)) {
   15377           0 :         availableLcusCount++;
   15378           0 :         if ((currentParentPcs->variance[lcuAdrr - pictureWidthInLcus + 1]
   15379           0 :                                        [ME_TIER_ZERO_PU_64x64]) >
   15380             :             IS_COMPLEX_LCU_VARIANCE_TH)
   15381           0 :             highVarianceLcusCount++;
   15382             :     }
   15383             : 
   15384             :     // Check the variance of bottom-left LCU
   15385           0 :     if ((sb_origin_x >= BLOCK_SIZE_64) &&
   15386             :         (sb_origin_y <
   15387           0 :          currentParentPcs->enhanced_picture_ptr->height - BLOCK_SIZE_64)) {
   15388           0 :         availableLcusCount++;
   15389           0 :         if ((currentParentPcs->variance[lcuAdrr + pictureWidthInLcus - 1]
   15390           0 :                                        [ME_TIER_ZERO_PU_64x64]) >
   15391             :             IS_COMPLEX_LCU_VARIANCE_TH)
   15392           0 :             highVarianceLcusCount++;
   15393             :     }
   15394             : 
   15395             :     // Check the variance of bottom-right LCU
   15396           0 :     if ((sb_origin_x <
   15397           0 :          currentParentPcs->enhanced_picture_ptr->width - BLOCK_SIZE_64) &&
   15398             :         (sb_origin_y <
   15399           0 :          currentParentPcs->enhanced_picture_ptr->height - BLOCK_SIZE_64)) {
   15400           0 :         availableLcusCount++;
   15401           0 :         if ((currentParentPcs->variance[lcuAdrr + pictureWidthInLcus + 1]
   15402           0 :                                        [ME_TIER_ZERO_PU_64x64]) >
   15403             :             IS_COMPLEX_LCU_VARIANCE_TH)
   15404           0 :             highVarianceLcusCount++;
   15405             :     }
   15406             : 
   15407           0 :     EbBool varianceFluctuateFlag = EB_FALSE;
   15408             : 
   15409           0 :     if ((previousParentPcs->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64]) >
   15410           0 :             IS_COMPLEX_LCU_FLAT_VARIANCE_TH &&
   15411           0 :         (currentParentPcs->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64]) >
   15412           0 :             IS_COMPLEX_LCU_FLAT_VARIANCE_TH &&
   15413           0 :         (plusOneParentPcs->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64]) >
   15414             :             IS_COMPLEX_LCU_FLAT_VARIANCE_TH) {
   15415           0 :         varianceFluctuateFlag = (EbBool)(
   15416           0 :             (((ABS((int32_t)currentParentPcs
   15417             :                        ->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64] -
   15418             :                    (int32_t)previousParentPcs
   15419           0 :                        ->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64]) *
   15420           0 :                100) /
   15421           0 :               (int32_t)previousParentPcs
   15422           0 :                   ->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64]) >=
   15423           0 :              IS_COMPLEX_LCU_VARIANCE_DEVIATION_TH) &&
   15424           0 :             (((ABS((int32_t)currentParentPcs
   15425             :                        ->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64] -
   15426             :                    (int32_t)plusOneParentPcs
   15427           0 :                        ->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64]) *
   15428           0 :                100) /
   15429           0 :               (int32_t)
   15430           0 :                   plusOneParentPcs->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64]) >=
   15431             :              IS_COMPLEX_LCU_VARIANCE_DEVIATION_TH));
   15432             :     }
   15433             : 
   15434           0 :     if (lcuCollocatedSad >=
   15435           0 :             ((sb_width * sb_height) * IS_COMPLEX_LCU_ZZ_SAD_FACTOR_TH) &&
   15436           0 :         highVarianceLcusCount >= (availableLcusCount >> 1) &&
   15437             :         varianceFluctuateFlag) {
   15438           0 :         return EB_TRUE;
   15439             :     }
   15440             : 
   15441           0 :     return EB_FALSE;
   15442             : }
   15443             : 
   15444           0 : EbErrorType open_loop_intra_search_sb(
   15445             :     PictureParentControlSet *picture_control_set_ptr, uint32_t sb_index,
   15446             :     MotionEstimationContext_t *context_ptr, EbPictureBufferDesc *input_ptr)
   15447             : {
   15448           0 :     EbErrorType return_error = EB_ErrorNone;
   15449           0 :     SequenceControlSet *sequence_control_set_ptr =
   15450             :         (SequenceControlSet *)picture_control_set_ptr
   15451           0 :             ->sequence_control_set_wrapper_ptr->object_ptr;
   15452             : 
   15453             :     uint32_t cu_origin_x;
   15454             :     uint32_t cu_origin_y;
   15455           0 :     uint32_t pa_blk_index = 0;
   15456             : #if !PAETH_HBD
   15457             :     uint8_t is_16_bit =
   15458             :         (sequence_control_set_ptr->static_config.encoder_bit_depth > EB_8BIT);
   15459             : #endif
   15460           0 :     SbParams *sb_params = &sequence_control_set_ptr->sb_params_array[sb_index];
   15461           0 :     OisSbResults *ois_sb_results_ptr =
   15462           0 :         picture_control_set_ptr->ois_sb_results[sb_index];
   15463             :     uint8_t *above_row;
   15464             :     uint8_t *left_col;
   15465             : 
   15466             :     DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]);
   15467             :     DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]);
   15468           0 :     while (pa_blk_index < CU_MAX_COUNT) {
   15469             :         const CodedUnitStats *blk_stats_ptr;
   15470           0 :         blk_stats_ptr = get_coded_unit_stats(pa_blk_index);
   15471           0 :         uint8_t bsize = blk_stats_ptr->size;
   15472           0 :         TxSize tx_size =
   15473             :             bsize == 8
   15474             :                 ? TX_8X8
   15475           0 :                 : bsize == 16 ? TX_16X16 : bsize == 32 ? TX_32X32 : TX_64X64;
   15476           0 :         if (sb_params->raster_scan_cu_validity
   15477           0 :                 [md_scan_to_raster_scan[pa_blk_index]]) {
   15478           0 :             OisCandidate *ois_blk_ptr =
   15479             :                 ois_sb_results_ptr->ois_candidate_array[pa_blk_index];
   15480           0 :             cu_origin_x = sb_params->origin_x + blk_stats_ptr->origin_x;
   15481           0 :             cu_origin_y = sb_params->origin_y + blk_stats_ptr->origin_y;
   15482           0 :             above_row = above_data + 16;
   15483           0 :             left_col = left_data + 16;
   15484             : 
   15485             :             // Fill Neighbor Arrays
   15486           0 :             update_neighbor_samples_array_open_loop(above_row - 1,
   15487             :                                                     left_col - 1,
   15488             :                                                     input_ptr,
   15489           0 :                                                     input_ptr->stride_y,
   15490             :                                                     cu_origin_x,
   15491             :                                                     cu_origin_y,
   15492             :                                                     bsize,
   15493             :                                                     bsize);
   15494             :             uint8_t ois_intra_mode;
   15495           0 :             uint8_t ois_intra_count = 0;
   15496           0 :             uint8_t best_intra_ois_index = 0;
   15497           0 :             uint32_t best_intra_ois_distortion = 64 * 64 * 255;
   15498           0 :             uint8_t intra_mode_start = DC_PRED;
   15499             : #if PAETH_HBD
   15500           0 :             uint8_t intra_mode_end = PAETH_PRED;
   15501             : #else
   15502             :             uint8_t intra_mode_end = is_16_bit ? SMOOTH_H_PRED : PAETH_PRED;
   15503             : #endif
   15504           0 :             uint8_t angle_delta_counter = 0;
   15505           0 :             uint8_t angle_delta_shift = 1;
   15506           0 :             EbBool use_angle_delta = (bsize >= 8);
   15507           0 :             uint8_t angle_delta_candidate_count = use_angle_delta ? 7 : 1;
   15508           0 :             uint8_t disable_angular_prediction = 0;
   15509           0 :             if (picture_control_set_ptr->intra_pred_mode == 5) {
   15510           0 :                 intra_mode_end =
   15511           0 :                     (picture_control_set_ptr->is_used_as_reference_flag == 0)
   15512             :                         ? DC_PRED
   15513             :                         : intra_mode_end;
   15514           0 :                 disable_angular_prediction =
   15515           0 :                     picture_control_set_ptr->temporal_layer_index > 0
   15516             :                         ? 1
   15517           0 :                         : (bsize > 16) ? 1 : 0;
   15518           0 :                 angle_delta_candidate_count =
   15519             :                     disable_angular_prediction ? 1 : use_angle_delta ? 5 : 1;
   15520           0 :                 angle_delta_shift = 1;
   15521             :             }
   15522           0 :             else if (picture_control_set_ptr->intra_pred_mode == 6) {
   15523           0 :                 intra_mode_end =
   15524           0 :                     (picture_control_set_ptr->is_used_as_reference_flag == 0)
   15525             :                         ? DC_PRED
   15526             :                         : intra_mode_end;
   15527           0 :                 disable_angular_prediction =
   15528           0 :                     picture_control_set_ptr->temporal_layer_index > 0
   15529             :                         ? 1
   15530           0 :                         : (bsize > 16) ? 1 : 0;
   15531           0 :                 angle_delta_candidate_count = 1;
   15532           0 :                 angle_delta_shift = 1;
   15533             :             } else {
   15534           0 :                 if (picture_control_set_ptr->slice_type == I_SLICE) {
   15535             : #if PAETH_HBD
   15536           0 :                     intra_mode_end = /*is_16_bit ? SMOOTH_H_PRED :*/ PAETH_PRED;
   15537             : #else
   15538             :                     intra_mode_end = is_16_bit ? SMOOTH_H_PRED : PAETH_PRED;
   15539             : #endif
   15540           0 :                     angle_delta_candidate_count = use_angle_delta ? 5 : 1;
   15541           0 :                     disable_angular_prediction = 0;
   15542           0 :                     angle_delta_shift = 1;
   15543           0 :                 } else if (picture_control_set_ptr->temporal_layer_index == 0) {
   15544             : #if PAETH_HBD
   15545           0 :                     intra_mode_end = /*is_16_bit ? SMOOTH_H_PRED :*/ PAETH_PRED;
   15546             : #else
   15547             :                     intra_mode_end = is_16_bit ? SMOOTH_H_PRED : PAETH_PRED;
   15548             : #endif
   15549           0 :                     angle_delta_candidate_count =
   15550             :                         (bsize > 16) ? 1 : use_angle_delta ? 2 : 1;
   15551           0 :                     disable_angular_prediction = 0;
   15552           0 :                     angle_delta_shift = 3;
   15553             :                 } else {
   15554           0 :                     intra_mode_end = DC_PRED;
   15555           0 :                     disable_angular_prediction = 1;
   15556           0 :                     angle_delta_candidate_count = 1;
   15557           0 :                     angle_delta_shift = 1;
   15558             :                 }
   15559             :             }
   15560           0 :             for (ois_intra_mode = intra_mode_start;
   15561             :                  ois_intra_mode <= intra_mode_end;
   15562           0 :                  ++ois_intra_mode) {
   15563           0 :                 if (av1_is_directional_mode((PredictionMode)ois_intra_mode)) {
   15564           0 :                     if (!disable_angular_prediction) {
   15565           0 :                         for (angle_delta_counter = 0;
   15566             :                              angle_delta_counter < angle_delta_candidate_count;
   15567           0 :                              ++angle_delta_counter) {
   15568           0 :                             int32_t angle_delta =
   15569           0 :                                 angle_delta_shift *
   15570             :                                 (angle_delta_candidate_count == 1
   15571             :                                      ? 0
   15572           0 :                                      : angle_delta_counter -
   15573           0 :                                            (angle_delta_candidate_count >> 1));
   15574           0 :                             int32_t p_angle =
   15575           0 :                                 mode_to_angle_map[(
   15576           0 :                                     PredictionMode)ois_intra_mode] +
   15577           0 :                                 angle_delta * ANGLE_STEP;
   15578             :                             // PRED
   15579           0 :                             intra_prediction_open_loop(p_angle,
   15580             :                                                        ois_intra_mode,
   15581             :                                                        cu_origin_x,
   15582             :                                                        cu_origin_y,
   15583             :                                                        tx_size,
   15584             :                                                        above_row,
   15585             :                                                        left_col,
   15586             :                                                        context_ptr);
   15587             :                             // Distortion
   15588           0 :                             ois_blk_ptr[ois_intra_count].distortion =
   15589           0 :                                 (uint32_t)nxm_sad_kernel(  // Always SAD without weighting
   15590           0 :                                         &(input_ptr->buffer_y
   15591           0 :                                               [(input_ptr->origin_y +
   15592           0 :                                                 cu_origin_y) *
   15593           0 :                                                    input_ptr->stride_y +
   15594           0 :                                                (input_ptr->origin_x +
   15595             :                                                 cu_origin_x)]),
   15596           0 :                                         input_ptr->stride_y,
   15597           0 :                                         &(context_ptr->me_context_ptr
   15598             :                                               ->sb_buffer[0]),
   15599             :                                         BLOCK_SIZE_64,
   15600             :                                         bsize,
   15601             :                                         bsize);
   15602             :                             // kepp track of best SAD
   15603           0 :                             if (ois_blk_ptr[ois_intra_count].distortion <
   15604             :                                 best_intra_ois_distortion) {
   15605           0 :                                 best_intra_ois_index = ois_intra_count;
   15606           0 :                                 best_intra_ois_distortion =
   15607           0 :                                     ois_blk_ptr[ois_intra_count].distortion;
   15608             :                             }
   15609           0 :                             ois_blk_ptr[ois_intra_count].intra_mode =
   15610             :                                 ois_intra_mode;
   15611           0 :                             ois_blk_ptr[ois_intra_count].valid_distortion =
   15612             :                                 EB_TRUE;
   15613           0 :                             ois_blk_ptr[ois_intra_count++].angle_delta =
   15614             :                                 angle_delta;
   15615             :                         }
   15616             :                     }
   15617             :                 } else {
   15618             :                     // PRED
   15619           0 :                     intra_prediction_open_loop(0,
   15620             :                                                ois_intra_mode,
   15621             :                                                cu_origin_x,
   15622             :                                                cu_origin_y,
   15623             :                                                tx_size,
   15624             :                                                above_row,
   15625             :                                                left_col,
   15626             :                                                context_ptr);
   15627             :                     // Distortion
   15628           0 :                     ois_blk_ptr[ois_intra_count]
   15629           0 :                         .distortion = (uint32_t)nxm_sad_kernel(  // Always SAD without weighting
   15630           0 :                             &(input_ptr->buffer_y
   15631           0 :                                   [(input_ptr->origin_y + cu_origin_y) *
   15632           0 :                                        input_ptr->stride_y +
   15633           0 :                                    (input_ptr->origin_x + cu_origin_x)]),
   15634           0 :                             input_ptr->stride_y,
   15635           0 :                             &(context_ptr->me_context_ptr->sb_buffer[0]),
   15636             :                             BLOCK_SIZE_64,
   15637             :                             bsize,
   15638             :                             bsize);
   15639             :                     // kepp track of best SAD
   15640           0 :                     if (ois_blk_ptr[ois_intra_count].distortion <
   15641             :                         best_intra_ois_distortion) {
   15642           0 :                         best_intra_ois_index = ois_intra_count;
   15643           0 :                         best_intra_ois_distortion =
   15644           0 :                             ois_blk_ptr[ois_intra_count].distortion;
   15645             :                     }
   15646           0 :                     ois_blk_ptr[ois_intra_count].intra_mode = ois_intra_mode;
   15647           0 :                     ois_blk_ptr[ois_intra_count].valid_distortion = EB_TRUE;
   15648           0 :                     ois_blk_ptr[ois_intra_count++].angle_delta = 0;
   15649             :                 }
   15650             :             }
   15651           0 :             ois_sb_results_ptr->best_distortion_index[pa_blk_index] =
   15652             :                 best_intra_ois_index;
   15653           0 :             ois_sb_results_ptr->total_ois_intra_candidate[pa_blk_index] =
   15654             :                 ois_intra_count;
   15655             :         }
   15656           0 :         pa_blk_index++;
   15657             :     }
   15658           0 :     return return_error;
   15659             : }

Generated by: LCOV version 1.14