LCOV - code coverage report
Current view: top level - Codec - av1me.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 218 530 41.1 %
Date: 2019-11-25 17:38:06 Functions: 16 25 64.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #include <limits.h>
      13             : #include <math.h>
      14             : #include <stdio.h>
      15             : #include "EbDefinitions.h"
      16             : #include "EbCodingUnit.h"
      17             : #include "av1me.h"
      18             : #include "EbPictureControlSet.h"
      19             : #include "EbSequenceControlSet.h"
      20             : #include "EbComputeSAD.h"
      21             : #include "aom_dsp_rtcd.h"
      22             : #if OBMC_FLAG
      23             : #include "EbModeDecisionProcess.h"
      24             : #endif
      25             : 
      26             : #include "EbAdaptiveMotionVectorPrediction.h"
      27             : 
      28             : int av1_is_dv_valid(const MV dv,
      29             :     const MacroBlockD *xd, int mi_row, int mi_col,
      30             :     BlockSize bsize, int mib_size_log2);
      31             : 
      32             : typedef struct dist_wtd_comp_params {
      33             :     int use_dist_wtd_comp_avg;
      34             :     int fwd_offset;
      35             :     int bck_offset;
      36             : } DIST_WTD_COMP_PARAMS;
      37             : 
      38             : typedef unsigned int(*aom_sad_avg_fn_t)(const uint8_t *a, int a_stride,
      39             :     const uint8_t *b, int b_stride,
      40             :     const uint8_t *second_pred);
      41             : 
      42             : typedef void(*aom_copy32xn_fn_t)(const uint8_t *a, int a_stride, uint8_t *b,
      43             :     int b_stride, int n);
      44             : 
      45             : typedef unsigned int(*aom_subpixvariance_fn_t)(const uint8_t *a, int a_stride,
      46             :     int xoffset, int yoffset,
      47             :     const uint8_t *b, int b_stride,
      48             :     unsigned int *sse);
      49             : 
      50             : typedef unsigned int(*aom_subp_avg_variance_fn_t)(
      51             :     const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
      52             :     int b_stride, unsigned int *sse, const uint8_t *second_pred);
      53             : 
      54             : typedef unsigned int(*aom_dist_wtd_sad_avg_fn_t)(
      55             :     const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,
      56             :     const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param);
      57             : 
      58             : typedef unsigned int(*aom_dist_wtd_subp_avg_variance_fn_t)(
      59             :     const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
      60             :     int b_stride, unsigned int *sse, const uint8_t *second_pred,
      61             :     const DIST_WTD_COMP_PARAMS *jcp_param);
      62             : 
      63             : typedef unsigned int(*aom_masked_sad_fn_t)(const uint8_t *src, int src_stride,
      64             :     const uint8_t *ref, int ref_stride,
      65             :     const uint8_t *second_pred,
      66             :     const uint8_t *msk, int msk_stride,
      67             :     int invert_mask);
      68             : typedef unsigned int(*aom_masked_subpixvariance_fn_t)(
      69             :     const uint8_t *src, int src_stride, int xoffset, int yoffset,
      70             :     const uint8_t *ref, int ref_stride, const uint8_t *second_pred,
      71             :     const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse);
      72             : 
      73             : int eb_av1_refining_search_sad(IntraBcContext  *x, MV *ref_mv, int error_per_bit,
      74             :     int search_range,
      75             :     const aom_variance_fn_ptr_t *fn_ptr,
      76             :     const MV *center_mv);
      77             : 
      78             : aom_variance_fn_ptr_t mefn_ptr[BlockSizeS_ALL];
      79             : 
      80           2 : void init_fn_ptr(void)
      81             : {
      82             : #define BFP0(BT, SDF, VF, SDX4DF)                            \
      83             :   mefn_ptr[BT].sdf = SDF;                                    \
      84             :   mefn_ptr[BT].vf = VF;                                      \
      85             :   mefn_ptr[BT].sdx4df = SDX4DF;
      86             : 
      87           2 :         BFP0(BLOCK_4X16, eb_aom_sad4x16, eb_aom_variance4x16, eb_aom_sad4x16x4d)
      88           2 :         BFP0(BLOCK_16X4, eb_aom_sad16x4, eb_aom_variance16x4, eb_aom_sad16x4x4d)
      89           2 :         BFP0(BLOCK_8X32, eb_aom_sad8x32, eb_aom_variance8x32, eb_aom_sad8x32x4d)
      90           2 :         BFP0(BLOCK_32X8, eb_aom_sad32x8, eb_aom_variance32x8, eb_aom_sad32x8x4d)
      91           2 :         BFP0(BLOCK_16X64, eb_aom_sad16x64, eb_aom_variance16x64, eb_aom_sad16x64x4d)
      92           2 :         BFP0(BLOCK_64X16, eb_aom_sad64x16, eb_aom_variance64x16, eb_aom_sad64x16x4d)
      93           2 :         BFP0(BLOCK_128X128, eb_aom_sad128x128, eb_aom_variance128x128, eb_aom_sad128x128x4d)
      94           2 :         BFP0(BLOCK_128X64, eb_aom_sad128x64, eb_aom_variance128x64, eb_aom_sad128x64x4d)
      95           2 :         BFP0(BLOCK_64X128, eb_aom_sad64x128, eb_aom_variance64x128, eb_aom_sad64x128x4d)
      96           2 :         BFP0(BLOCK_32X16, eb_aom_sad32x16, eb_aom_variance32x16, eb_aom_sad32x16x4d)
      97           2 :         BFP0(BLOCK_16X32, eb_aom_sad16x32, eb_aom_variance16x32, eb_aom_sad16x32x4d)
      98           2 :         BFP0(BLOCK_64X32, eb_aom_sad64x32, eb_aom_variance64x32, eb_aom_sad64x32x4d)
      99           2 :         BFP0(BLOCK_32X64, eb_aom_sad32x64, eb_aom_variance32x64, eb_aom_sad32x64x4d)
     100           2 :         BFP0(BLOCK_32X32, eb_aom_sad32x32, eb_aom_variance32x32, eb_aom_sad32x32x4d)
     101           2 :         BFP0(BLOCK_64X64, eb_aom_sad64x64, eb_aom_variance64x64, eb_aom_sad64x64x4d)
     102           2 :         BFP0(BLOCK_16X16, eb_aom_sad16x16, eb_aom_variance16x16, eb_aom_sad16x16x4d)
     103           2 :         BFP0(BLOCK_16X8, eb_aom_sad16x8, eb_aom_variance16x8, eb_aom_sad16x8x4d)
     104           2 :         BFP0(BLOCK_8X16, eb_aom_sad8x16, eb_aom_variance8x16, eb_aom_sad8x16x4d)
     105           2 :         BFP0(BLOCK_8X8, eb_aom_sad8x8, eb_aom_variance8x8, eb_aom_sad8x8x4d)
     106           2 :         BFP0(BLOCK_8X4, eb_aom_sad8x4, eb_aom_variance8x4, eb_aom_sad8x4x4d)
     107           2 :         BFP0(BLOCK_4X8, eb_aom_sad4x8, eb_aom_variance4x8, eb_aom_sad4x8x4d)
     108           2 :         BFP0(BLOCK_4X4, eb_aom_sad4x4, eb_aom_variance4x4, eb_aom_sad4x4x4d)
     109             : #if OBMC_FLAG
     110             :      #define OBFP(BT, OSDF, OVF, OSVF) \
     111             :       mefn_ptr[BT].osdf = OSDF;    \
     112             :       mefn_ptr[BT].ovf = OVF;      \
     113             :       mefn_ptr[BT].osvf = OSVF;
     114           2 :       OBFP(BLOCK_128X128, aom_obmc_sad128x128, aom_obmc_variance128x128,
     115             :            aom_obmc_sub_pixel_variance128x128)
     116           2 :       OBFP(BLOCK_128X64, aom_obmc_sad128x64, aom_obmc_variance128x64,
     117             :            aom_obmc_sub_pixel_variance128x64)
     118           2 :       OBFP(BLOCK_64X128, aom_obmc_sad64x128, aom_obmc_variance64x128,
     119             :            aom_obmc_sub_pixel_variance64x128)
     120           2 :       OBFP(BLOCK_64X64, aom_obmc_sad64x64, aom_obmc_variance64x64,
     121             :            aom_obmc_sub_pixel_variance64x64)
     122           2 :       OBFP(BLOCK_64X32, aom_obmc_sad64x32, aom_obmc_variance64x32,
     123             :            aom_obmc_sub_pixel_variance64x32)
     124           2 :       OBFP(BLOCK_32X64, aom_obmc_sad32x64, aom_obmc_variance32x64,
     125             :            aom_obmc_sub_pixel_variance32x64)
     126           2 :       OBFP(BLOCK_32X32, aom_obmc_sad32x32, aom_obmc_variance32x32,
     127             :            aom_obmc_sub_pixel_variance32x32)
     128           2 :       OBFP(BLOCK_32X16, aom_obmc_sad32x16, aom_obmc_variance32x16,
     129             :            aom_obmc_sub_pixel_variance32x16)
     130           2 :       OBFP(BLOCK_16X32, aom_obmc_sad16x32, aom_obmc_variance16x32,
     131             :            aom_obmc_sub_pixel_variance16x32)
     132           2 :       OBFP(BLOCK_16X16, aom_obmc_sad16x16, aom_obmc_variance16x16,
     133             :            aom_obmc_sub_pixel_variance16x16)
     134           2 :       OBFP(BLOCK_16X8, aom_obmc_sad16x8, aom_obmc_variance16x8,
     135             :            aom_obmc_sub_pixel_variance16x8)
     136           2 :       OBFP(BLOCK_8X16, aom_obmc_sad8x16, aom_obmc_variance8x16,
     137             :            aom_obmc_sub_pixel_variance8x16)
     138           2 :       OBFP(BLOCK_8X8, aom_obmc_sad8x8, aom_obmc_variance8x8,
     139             :            aom_obmc_sub_pixel_variance8x8)
     140           2 :       OBFP(BLOCK_4X8, aom_obmc_sad4x8, aom_obmc_variance4x8,
     141             :            aom_obmc_sub_pixel_variance4x8)
     142           2 :       OBFP(BLOCK_8X4, aom_obmc_sad8x4, aom_obmc_variance8x4,
     143             :            aom_obmc_sub_pixel_variance8x4)
     144           2 :       OBFP(BLOCK_4X4, aom_obmc_sad4x4, aom_obmc_variance4x4,
     145             :            aom_obmc_sub_pixel_variance4x4)
     146           2 :       OBFP(BLOCK_4X16, aom_obmc_sad4x16, aom_obmc_variance4x16,
     147             :            aom_obmc_sub_pixel_variance4x16)
     148           2 :       OBFP(BLOCK_16X4, aom_obmc_sad16x4, aom_obmc_variance16x4,
     149             :            aom_obmc_sub_pixel_variance16x4)
     150           2 :       OBFP(BLOCK_8X32, aom_obmc_sad8x32, aom_obmc_variance8x32,
     151             :            aom_obmc_sub_pixel_variance8x32)
     152           2 :       OBFP(BLOCK_32X8, aom_obmc_sad32x8, aom_obmc_variance32x8,
     153             :            aom_obmc_sub_pixel_variance32x8)
     154           2 :       OBFP(BLOCK_16X64, aom_obmc_sad16x64, aom_obmc_variance16x64,
     155             :            aom_obmc_sub_pixel_variance16x64)
     156           2 :       OBFP(BLOCK_64X16, aom_obmc_sad64x16, aom_obmc_variance64x16,
     157             :            aom_obmc_sub_pixel_variance64x16)
     158             : #endif
     159           2 : }
     160             : 
     161             : // #define NEW_DIAMOND_SEARCH
     162             : 
     163    18143600 : static INLINE const uint8_t *get_buf_from_mv(const struct Buf2D *buf,
     164             :                                              const MV *mv) {
     165    18143600 :   return &buf->buf[mv->row * buf->stride + mv->col];
     166             : }
     167             : 
     168     1781160 : void eb_av1_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
     169     1781160 :   int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
     170     1781160 :   int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
     171     1781160 :   int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
     172     1781160 :   int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
     173             : 
     174     1781160 :   col_min = AOMMAX(col_min, (MV_LOW >> 3) + 1);
     175     1781160 :   row_min = AOMMAX(row_min, (MV_LOW >> 3) + 1);
     176     1781160 :   col_max = AOMMIN(col_max, (MV_UPP >> 3) - 1);
     177     1781160 :   row_max = AOMMIN(row_max, (MV_UPP >> 3) - 1);
     178             : 
     179             :   // Get intersection of UMV window and valid MV window to reduce # of checks
     180             :   // in diamond search.
     181     1781160 :   if (mv_limits->col_min < col_min) mv_limits->col_min = col_min;
     182     1781160 :   if (mv_limits->col_max > col_max) mv_limits->col_max = col_max;
     183     1781160 :   if (mv_limits->row_min < row_min) mv_limits->row_min = row_min;
     184     1781160 :   if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
     185     1781160 : }
     186             : 
     187             : MvJointType av1_get_mv_joint(const MV *mv);
     188             : 
     189    47286800 : static INLINE int mv_cost(const MV *mv, const int *joint_cost,
     190             :                           int *const comp_cost[2]) {
     191    47286800 :   return joint_cost[av1_get_mv_joint(mv)] + comp_cost[0][mv->row] +
     192    47283100 :          comp_cost[1][mv->col];
     193             : }
     194             : 
     195             : #define PIXEL_TRANSFORM_ERROR_SCALE 4
     196    38352800 : static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
     197             :                        int *mvcost[2], int error_per_bit) {
     198    38352800 :   if (mvcost) {
     199    38353000 :     const MV diff = { mv->row - ref->row, mv->col - ref->col };
     200    38353000 :     return (int)ROUND_POWER_OF_TWO_64(
     201             :         (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
     202             :         RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT +
     203             :             PIXEL_TRANSFORM_ERROR_SCALE);
     204             :   }
     205           0 :   return 0;
     206             : }
     207             : 
     208     8958030 : static int mvsad_err_cost(const IntraBcContext *x, const MV *mv, const MV *ref,
     209             :                           int sad_per_bit) {
     210     8958030 :   const MV diff = { (mv->row - ref->row) * 8, (mv->col - ref->col) * 8 };
     211     8958030 :   return ROUND_POWER_OF_TWO(
     212             :       (unsigned)mv_cost(&diff, x->nmv_vec_cost, x->mv_cost_stack) * sad_per_bit,
     213             :       AV1_PROB_COST_SHIFT);
     214             : }
     215             : 
     216           0 : void eb_av1_init3smotion_compensation(SearchSiteConfig *cfg, int stride) {
     217           0 :   int len, ss_count = 1;
     218             : 
     219           0 :   cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
     220           0 :   cfg->ss[0].offset = 0;
     221             : 
     222           0 :   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
     223             :     // Generate offsets for 8 search sites per step.
     224           0 :     const MV ss_mvs[8] = { { -len, 0 },   { len, 0 },     { 0, -len },
     225             :                            { 0, len },    { -len, -len }, { -len, len },
     226             :                            { len, -len }, { len, len } };
     227             :     int i;
     228           0 :     for (i = 0; i < 8; ++i) {
     229           0 :       search_site *const ss = &cfg->ss[ss_count++];
     230           0 :       ss->mv = ss_mvs[i];
     231           0 :       ss->offset = ss->mv.row * stride + ss->mv.col;
     232             :     }
     233             :   }
     234             : 
     235           0 :   cfg->ss_count = ss_count;
     236           0 :   cfg->searches_per_step = 8;
     237           0 : }
     238             : 
     239    14605100 : static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) {
     240    14593500 :   return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) &&
     241    29198600 :          (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max);
     242             : }
     243             : #if !OBMC_FLAG
     244             : #define CHECK_BETTER                                                      \
     245             :   {                                                                       \
     246             :     if (thissad < bestsad) {                                              \
     247             :       if (use_mvcost)                                                     \
     248             :         thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
     249             :       if (thissad < bestsad) {                                            \
     250             :         bestsad = thissad;                                                \
     251             :         best_site = i;                                                    \
     252             :       }                                                                   \
     253             :     }                                                                     \
     254             :   }
     255             : #endif
     256             : #define MAX_PATTERN_SCALES 11
     257             : #define MAX_PATTERN_CANDIDATES 8  // max number of canddiates per scale
     258             : #define PATTERN_CANDIDATES_REF 3  // number of refinement candidates
     259             : 
     260           0 : int eb_av1_get_mvpred_var(const IntraBcContext *x, const MV *best_mv,
     261             :                        const MV *center_mv, const aom_variance_fn_ptr_t *vfp,
     262             :                        int use_mvcost) {
     263           0 :   const struct Buf2D *const what = &x->plane[0].src;
     264           0 :   const struct Buf2D *const in_what = &x->xdplane[0].pre[0];
     265           0 :   const MV mv = { best_mv->row * 8, best_mv->col * 8 };
     266             :   unsigned int unused;
     267             : 
     268           0 :   return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
     269           0 :                  in_what->stride, &unused) +
     270           0 :          (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmv_vec_cost,
     271             :                                    x->mv_cost_stack, x->errorperbit)
     272           0 :                      : 0);
     273             : }
     274             : 
     275             : // Exhuastive motion search around a given centre position with a given
     276             : // step size.
     277           0 : static int exhuastive_mesh_search(IntraBcContext  *x, MV *ref_mv, MV *best_mv,
     278             :                                   int range, int step, int sad_per_bit,
     279             :                                   const aom_variance_fn_ptr_t *fn_ptr,
     280             :                                   const MV *center_mv) {
     281           0 :   const struct Buf2D *const what = &x->plane[0].src;
     282           0 :   const struct Buf2D *const in_what = &x->xdplane[0].pre[0];
     283           0 :   MV fcenter_mv = { center_mv->row, center_mv->col };
     284           0 :   unsigned int best_sad = INT_MAX;
     285             :   int r, c, i;
     286             :   int start_col, end_col, start_row, end_row;
     287           0 :   int col_step = (step > 1) ? step : 4;
     288             : 
     289           0 :   assert(step >= 1);
     290             : 
     291           0 :   clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
     292             :            x->mv_limits.row_min, x->mv_limits.row_max);
     293           0 :   *best_mv = fcenter_mv;
     294           0 :   best_sad =
     295           0 :       fn_ptr->sdf(what->buf, what->stride,
     296             :                   get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
     297           0 :       mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
     298           0 :   start_row = AOMMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
     299           0 :   start_col = AOMMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
     300           0 :   end_row = AOMMIN(range, x->mv_limits.row_max - fcenter_mv.row);
     301           0 :   end_col = AOMMIN(range, x->mv_limits.col_max - fcenter_mv.col);
     302             : 
     303           0 :   for (r = start_row; r <= end_row; r += step) {
     304           0 :     for (c = start_col; c <= end_col; c += col_step) {
     305             :       // Step > 1 means we are not checking every location in this pass.
     306           0 :       if (step > 1) {
     307           0 :         const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
     308             :         unsigned int sad =
     309           0 :             fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
     310             :                         in_what->stride);
     311           0 :         if (sad < best_sad) {
     312           0 :           sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
     313           0 :           if (sad < best_sad) {
     314           0 :             best_sad = sad;
     315           0 :             x->second_best_mv.as_mv = *best_mv;
     316           0 :             *best_mv = mv;
     317             :           }
     318             :         }
     319             :       } else {
     320             :         // 4 sads in a single call if we are checking every location
     321           0 :         if (c + 3 <= end_col) {
     322             :           unsigned int sads[4];
     323             :           const uint8_t *addrs[4];
     324           0 :           for (i = 0; i < 4; ++i) {
     325           0 :             const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
     326           0 :             addrs[i] = get_buf_from_mv(in_what, &mv);
     327             :           }
     328           0 :           fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
     329             : 
     330           0 :           for (i = 0; i < 4; ++i) {
     331           0 :             if (sads[i] < best_sad) {
     332           0 :               const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
     333           0 :               const unsigned int sad =
     334           0 :                   sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
     335           0 :               if (sad < best_sad) {
     336           0 :                 best_sad = sad;
     337           0 :                 x->second_best_mv.as_mv = *best_mv;
     338           0 :                 *best_mv = mv;
     339             :               }
     340             :             }
     341             :           }
     342             :         } else {
     343           0 :           for (i = 0; i < end_col - c; ++i) {
     344           0 :             const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
     345             :             unsigned int sad =
     346           0 :                 fn_ptr->sdf(what->buf, what->stride,
     347             :                             get_buf_from_mv(in_what, &mv), in_what->stride);
     348           0 :             if (sad < best_sad) {
     349           0 :               sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
     350           0 :               if (sad < best_sad) {
     351           0 :                 best_sad = sad;
     352           0 :                 x->second_best_mv.as_mv = *best_mv;
     353           0 :                 *best_mv = mv;
     354             :               }
     355             :             }
     356             :           }
     357             :         }
     358             :       }
     359             :     }
     360             :   }
     361             : 
     362           0 :   return best_sad;
     363             : }
     364             : 
     365           0 : int eb_av1_diamond_search_sad_c(IntraBcContext  *x, const SearchSiteConfig *cfg,
     366             :                              MV *ref_mv, MV *best_mv, int search_param,
     367             :                              int sad_per_bit, int *num00,
     368             :                              const aom_variance_fn_ptr_t *fn_ptr,
     369             :                              const MV *center_mv) {
     370             :   int i, j, step;
     371             : 
     372           0 :   uint8_t *what = x->plane[0].src.buf;
     373           0 :   const int what_stride = x->plane[0].src.stride;
     374             :   const uint8_t *in_what;
     375           0 :   const int in_what_stride = x->xdplane[0].pre[0].stride;
     376             :   const uint8_t *best_address;
     377             : 
     378           0 :   unsigned int bestsad = INT_MAX;
     379           0 :   int best_site = 0;
     380           0 :   int last_site = 0;
     381             : 
     382             :   int ref_row;
     383             :   int ref_col;
     384             : 
     385             :   // search_param determines the length of the initial step and hence the number
     386             :   // of iterations.
     387             :   // 0 = initial step (MAX_FIRST_STEP) pel
     388             :   // 1 = (MAX_FIRST_STEP/2) pel,
     389             :   // 2 = (MAX_FIRST_STEP/4) pel...
     390           0 :   const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
     391           0 :   const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
     392             : 
     393           0 :   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
     394           0 :   clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
     395             :            x->mv_limits.row_min, x->mv_limits.row_max);
     396           0 :   ref_row = ref_mv->row;
     397           0 :   ref_col = ref_mv->col;
     398           0 :   *num00 = 0;
     399           0 :   best_mv->row = ref_row;
     400           0 :   best_mv->col = ref_col;
     401             : 
     402             :   // Work out the start point for the search
     403           0 :   in_what = x->xdplane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
     404           0 :   best_address = in_what;
     405             : 
     406             :   // Check the starting position
     407           0 :   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
     408           0 :             mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
     409             : 
     410           0 :   i = 1;
     411             : 
     412           0 :   for (step = 0; step < tot_steps; step++) {
     413           0 :     int all_in = 1, t;
     414             : 
     415             :     // All_in is true if every one of the points we are checking are within
     416             :     // the bounds of the image.
     417           0 :     all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_limits.row_min);
     418           0 :     all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_limits.row_max);
     419           0 :     all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_limits.col_min);
     420           0 :     all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_limits.col_max);
     421             : 
     422             :     // If all the pixels are within the bounds we don't check whether the
     423             :     // search point is valid in this loop,  otherwise we check each point
     424             :     // for validity..
     425           0 :     if (all_in) {
     426             :       unsigned int sad_array[4];
     427             : 
     428           0 :       for (j = 0; j < cfg->searches_per_step; j += 4) {
     429             :         unsigned char const *block_offset[4];
     430             : 
     431           0 :         for (t = 0; t < 4; t++)
     432           0 :           block_offset[t] = ss[i + t].offset + best_address;
     433             : 
     434           0 :         fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
     435             :                        sad_array);
     436             : 
     437           0 :         for (t = 0; t < 4; t++, i++) {
     438           0 :           if (sad_array[t] < bestsad) {
     439           0 :             const MV this_mv = { best_mv->row + ss[i].mv.row,
     440           0 :                                  best_mv->col + ss[i].mv.col };
     441           0 :             sad_array[t] +=
     442           0 :                 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
     443           0 :             if (sad_array[t] < bestsad) {
     444           0 :               bestsad = sad_array[t];
     445           0 :               best_site = i;
     446             :             }
     447             :           }
     448             :         }
     449             :       }
     450             :     } else {
     451           0 :       for (j = 0; j < cfg->searches_per_step; j++) {
     452             :         // Trap illegal vectors
     453           0 :         const MV this_mv = { best_mv->row + ss[i].mv.row,
     454           0 :                              best_mv->col + ss[i].mv.col };
     455             : 
     456           0 :         if (is_mv_in(&x->mv_limits, &this_mv)) {
     457           0 :           const uint8_t *const check_here = ss[i].offset + best_address;
     458             :           unsigned int thissad =
     459           0 :               fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
     460             : 
     461           0 :           if (thissad < bestsad) {
     462           0 :             thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
     463           0 :             if (thissad < bestsad) {
     464           0 :               bestsad = thissad;
     465           0 :               best_site = i;
     466             :             }
     467             :           }
     468             :         }
     469           0 :         i++;
     470             :       }
     471             :     }
     472           0 :     if (best_site != last_site) {
     473           0 :       x->second_best_mv.as_mv = *best_mv;
     474           0 :       best_mv->row += ss[best_site].mv.row;
     475           0 :       best_mv->col += ss[best_site].mv.col;
     476           0 :       best_address += ss[best_site].offset;
     477           0 :       last_site = best_site;
     478             : #if defined(NEW_DIAMOND_SEARCH)
     479             :       while (1) {
     480             :         const MV this_mv = { best_mv->row + ss[best_site].mv.row,
     481             :                              best_mv->col + ss[best_site].mv.col };
     482             :         if (is_mv_in(&x->mv_limits, &this_mv)) {
     483             :           const uint8_t *const check_here = ss[best_site].offset + best_address;
     484             :           unsigned int thissad =
     485             :               fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
     486             :           if (thissad < bestsad) {
     487             :             thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
     488             :             if (thissad < bestsad) {
     489             :               bestsad = thissad;
     490             :               best_mv->row += ss[best_site].mv.row;
     491             :               best_mv->col += ss[best_site].mv.col;
     492             :               best_address += ss[best_site].offset;
     493             :               continue;
     494             :             }
     495             :           }
     496             :         }
     497             :         break;
     498             :       }
     499             : #endif
     500           0 :     } else if (best_address == in_what)
     501           0 :       (*num00)++;
     502             :   }
     503           0 :   return bestsad;
     504             : }
     505             : 
     506             : /* do_refine: If last step (1-away) of n-step search doesn't pick the center
     507             :               point as the best match, we will do a final 1-away diamond
     508             :               refining search  */
     509           0 : static int full_pixel_diamond(PictureControlSet *pcs, IntraBcContext /*MACROBLOCK*/ *x,
     510             :                               MV *mvp_full, int step_param, int sadpb,
     511             :                               int further_steps, int do_refine, int *cost_list,
     512             :                               const aom_variance_fn_ptr_t *fn_ptr,
     513             :                               const MV *ref_mv) {
     514             :   MV temp_mv;
     515           0 :   int thissme, n, num00 = 0;
     516             :   (void)cost_list;
     517             :   /*int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
     518             :                                         step_param, sadpb, &n, fn_ptr, ref_mv);*/
     519           0 :   int bestsme = eb_av1_diamond_search_sad_c(x, &pcs->ss_cfg, mvp_full, &temp_mv,
     520             :       step_param, sadpb, &n, fn_ptr, ref_mv);
     521             : 
     522           0 :   if (bestsme < INT_MAX)
     523           0 :     bestsme = eb_av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
     524           0 :   x->best_mv.as_mv = temp_mv;
     525             : 
     526             :   // If there won't be more n-step search, check to see if refining search is
     527             :   // needed.
     528           0 :   if (n > further_steps) do_refine = 0;
     529             : 
     530           0 :   while (n < further_steps) {
     531           0 :     ++n;
     532             : 
     533           0 :     if (num00) {
     534           0 :       num00--;
     535             :     } else {
     536             :       /*thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
     537             :                                         step_param + n, sadpb, &num00, fn_ptr,
     538             :                                         ref_mv);*/
     539           0 :       thissme = eb_av1_diamond_search_sad_c(x, &pcs->ss_cfg, mvp_full, &temp_mv,
     540             :           step_param + n, sadpb, &num00, fn_ptr,
     541             :           ref_mv);
     542             : 
     543           0 :       if (thissme < INT_MAX)
     544           0 :         thissme = eb_av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
     545             : 
     546             :       // check to see if refining search is needed.
     547           0 :       if (num00 > further_steps - n) do_refine = 0;
     548             : 
     549           0 :       if (thissme < bestsme) {
     550           0 :         bestsme = thissme;
     551           0 :         x->best_mv.as_mv = temp_mv;
     552             :       }
     553             :     }
     554             :   }
     555             : 
     556             :   // final 1-away diamond refining search
     557           0 :   if (do_refine) {
     558           0 :     const int search_range = 8;
     559           0 :     MV best_mv = x->best_mv.as_mv;
     560           0 :     thissme = eb_av1_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr,
     561             :                                       ref_mv);
     562           0 :     if (thissme < INT_MAX)
     563           0 :       thissme = eb_av1_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
     564           0 :     if (thissme < bestsme) {
     565           0 :       bestsme = thissme;
     566           0 :       x->best_mv.as_mv = best_mv;
     567             :     }
     568             :   }
     569             : 
     570             :   // Return cost list.
     571             :  /* if (cost_list) {
     572             :     calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, &x->best_mv.as_mv, cost_list);
     573             :   }*/
     574           0 :   return bestsme;
     575             : }
     576             : 
     577             : #define MIN_RANGE 7
     578             : #define MAX_RANGE 256
     579             : #define MIN_INTERVAL 1
     580             : // Runs an limited range exhaustive mesh search using a pattern set
     581             : // according to the encode speed profile.
     582           0 : static int full_pixel_exhaustive(PictureControlSet *pcs, IntraBcContext  *x,
     583             :                                  const MV *centre_mv_full, int sadpb,
     584             :                                  int *cost_list,
     585             :                                  const aom_variance_fn_ptr_t *fn_ptr,
     586             :                                  const MV *ref_mv, MV *dst_mv) {
     587             :     UNUSED(cost_list);
     588           0 :     const SpeedFeatures *const sf = &pcs->sf;// cpi->sf;
     589           0 :   MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
     590           0 :   MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
     591             :   int bestsme;
     592             :   int i;
     593           0 :   int interval = sf->mesh_patterns[0].interval;
     594           0 :   int range = sf->mesh_patterns[0].range;
     595             :   int baseline_interval_divisor;
     596             : 
     597             :   // Keep track of number of exhaustive calls (this frame in this thread).
     598             :   //CHKN if (x->ex_search_count_ptr != NULL) ++(*x->ex_search_count_ptr);
     599             : 
     600             :   // Trap illegal values for interval and range for this function.
     601           0 :   if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
     602             :       (interval > range))
     603           0 :     return INT_MAX;
     604             : 
     605           0 :   baseline_interval_divisor = range / interval;
     606             : 
     607             :   // Check size of proposed first range against magnitude of the centre
     608             :   // value used as a starting point.
     609           0 :   range = AOMMAX(range, (5 * AOMMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
     610           0 :   range = AOMMIN(range, MAX_RANGE);
     611           0 :   interval = AOMMAX(interval, range / baseline_interval_divisor);
     612             : 
     613             :   // initial search
     614           0 :   bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval,
     615             :                                    sadpb, fn_ptr, &temp_mv);
     616             : 
     617           0 :   if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
     618             :     // Progressive searches with range and step size decreasing each time
     619             :     // till we reach a step size of 1. Then break out.
     620           0 :     for (i = 1; i < MAX_MESH_STEP; ++i) {
     621             :       // First pass with coarser step and longer range
     622           0 :       bestsme = exhuastive_mesh_search(
     623             :           x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
     624             :           sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
     625             : 
     626           0 :       if (sf->mesh_patterns[i].interval == 1) break;
     627             :     }
     628             :   }
     629             : 
     630           0 :   if (bestsme < INT_MAX)
     631           0 :     bestsme = eb_av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
     632           0 :   *dst_mv = temp_mv;
     633             : 
     634             :   // Return cost list.
     635             :  /* if (cost_list) {
     636             :     calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
     637             :   }*/
     638           0 :   return bestsme;
     639             : }
     640             : 
     641           0 : int eb_av1_refining_search_sad(IntraBcContext  *x, MV *ref_mv, int error_per_bit,
     642             :                             int search_range,
     643             :                             const aom_variance_fn_ptr_t *fn_ptr,
     644             :                             const MV *center_mv) {
     645           0 :   const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
     646           0 :   const struct Buf2D *const what = &x->plane[0].src;
     647           0 :   const struct Buf2D *const in_what = &x->xdplane[0].pre[0];
     648           0 :   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
     649           0 :   const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
     650           0 :   unsigned int best_sad =
     651           0 :       fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
     652           0 :       mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
     653             :   int i, j;
     654             : 
     655           0 :   for (i = 0; i < search_range; i++) {
     656           0 :     int best_site = -1;
     657           0 :     const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) &
     658           0 :                        ((ref_mv->row + 1) < x->mv_limits.row_max) &
     659           0 :                        ((ref_mv->col - 1) > x->mv_limits.col_min) &
     660           0 :                        ((ref_mv->col + 1) < x->mv_limits.col_max);
     661             : 
     662           0 :     if (all_in) {
     663             :       unsigned int sads[4];
     664           0 :       const uint8_t *const positions[4] = { best_address - in_what->stride,
     665           0 :                                             best_address - 1, best_address + 1,
     666           0 :                                             best_address + in_what->stride };
     667             : 
     668           0 :       fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
     669             : 
     670           0 :       for (j = 0; j < 4; ++j) {
     671           0 :         if (sads[j] < best_sad) {
     672           0 :           const MV mv = { ref_mv->row + neighbors[j].row,
     673           0 :                           ref_mv->col + neighbors[j].col };
     674           0 :           sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
     675           0 :           if (sads[j] < best_sad) {
     676           0 :             best_sad = sads[j];
     677           0 :             best_site = j;
     678             :           }
     679             :         }
     680             :       }
     681             :     } else {
     682           0 :       for (j = 0; j < 4; ++j) {
     683           0 :         const MV mv = { ref_mv->row + neighbors[j].row,
     684           0 :                         ref_mv->col + neighbors[j].col };
     685             : 
     686           0 :         if (is_mv_in(&x->mv_limits, &mv)) {
     687             :           unsigned int sad =
     688           0 :               fn_ptr->sdf(what->buf, what->stride,
     689             :                           get_buf_from_mv(in_what, &mv), in_what->stride);
     690           0 :           if (sad < best_sad) {
     691           0 :             sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
     692           0 :             if (sad < best_sad) {
     693           0 :               best_sad = sad;
     694           0 :               best_site = j;
     695             :             }
     696             :           }
     697             :         }
     698             :       }
     699             :     }
     700             : 
     701           0 :     if (best_site == -1) {
     702           0 :       break;
     703             :     } else {
     704           0 :       x->second_best_mv.as_mv = *ref_mv;
     705           0 :       ref_mv->row += neighbors[best_site].row;
     706           0 :       ref_mv->col += neighbors[best_site].col;
     707           0 :       best_address = get_buf_from_mv(in_what, ref_mv);
     708             :     }
     709             :   }
     710             : 
     711           0 :   return best_sad;
     712             : }
     713             : #if OBMC_FLAG
     714     1781130 : static int get_obmc_mvpred_var(const IntraBcContext *x, const int32_t *wsrc,
     715             :                                const int32_t *mask, const MV *best_mv,
     716             :                                const MV *center_mv,
     717             :                                const aom_variance_fn_ptr_t *vfp, int use_mvcost,
     718             :                                int is_second) {
     719             : 
     720     1781130 :  const   struct Buf2d * in_what = (const   struct Buf2d *) (&x->xdplane[0].pre[is_second]);
     721     1781130 :   const MV mv = { best_mv->row * 8, best_mv->col * 8 };
     722             :   unsigned int unused;
     723             : 
     724     1781130 :   return vfp->ovf(get_buf_from_mv((const struct Buf2D *)in_what, best_mv), in_what->stride, wsrc,
     725     1781160 :                   mask, &unused) +
     726     1781170 :          (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmv_vec_cost,
     727             :                                    x->mv_cost_stack, x->errorperbit)
     728     3562330 :                      : 0);
     729             : }
     730     1781110 : static int obmc_refining_search_sad(const IntraBcContext *x, const int32_t *wsrc,
     731             :                                     const int32_t *mask, MV *ref_mv,
     732             :                                     int error_per_bit, int search_range,
     733             :                                     const aom_variance_fn_ptr_t *fn_ptr,
     734             :                                     const MV *center_mv, int is_second) {
     735     1781110 :   const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
     736             : 
     737     1781110 :   const  struct Buf2d *in_what = (const  struct Buf2d *)(&x->xdplane[0].pre[is_second]);
     738     1781110 :   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
     739     1781110 :   unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv((const struct Buf2D *)in_what, ref_mv),
     740             :                                        in_what->stride, wsrc, mask) +
     741     1781170 :                           mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
     742             :   int i, j;
     743             : 
     744     3660000 :   for (i = 0; i < search_range; i++) {
     745     3652260 :     int best_site = -1;
     746             : 
     747    18257300 :     for (j = 0; j < 4; j++) {
     748    14605100 :       const MV mv = { ref_mv->row + neighbors[j].row,
     749    14605100 :                       ref_mv->col + neighbors[j].col };
     750    14605100 :       if (is_mv_in(&x->mv_limits, &mv)) {
     751    14585000 :         unsigned int sad = fn_ptr->osdf(get_buf_from_mv((const struct Buf2D *)in_what, &mv),
     752             :                                         in_what->stride, wsrc, mask);
     753    14584600 :         if (sad < best_sad) {
     754     7177370 :           sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
     755     7177380 :           if (sad < best_sad) {
     756     2235740 :             best_sad = sad;
     757     2235740 :             best_site = j;
     758             :           }
     759             :         }
     760             :       }
     761             :     }
     762             : 
     763     3652260 :     if (best_site == -1) {
     764     1773380 :       break;
     765             :     } else {
     766     1878880 :       ref_mv->row += neighbors[best_site].row;
     767     1878880 :       ref_mv->col += neighbors[best_site].col;
     768             :     }
     769             :   }
     770     1781130 :   return best_sad;
     771             : }
     772             : 
     773             : 
     774     1781130 : int av1_obmc_full_pixel_search(
     775             :     ModeDecisionContext *context_ptr,
     776             :     IntraBcContext *x,
     777             :     MV *mvp_full,
     778             :     int sadpb,
     779             :     const aom_variance_fn_ptr_t *fn_ptr,
     780             :     const MV *ref_mv,
     781             :     MV *dst_mv,
     782             :     int is_second) {
     783             : 
     784             :     // obmc_full_pixel_diamond does not provide BDR gain on 360p
     785     1781130 :     const int32_t *wsrc = context_ptr->wsrc_buf;
     786     1781130 :     const int32_t *mask = context_ptr->mask_buf;
     787     1781130 :     const int search_range = 8;
     788     1781130 :     *dst_mv = *mvp_full;
     789     1781130 :     clamp_mv(dst_mv, x->mv_limits.col_min, x->mv_limits.col_max,
     790             :              x->mv_limits.row_min, x->mv_limits.row_max);
     791     1781140 :     int thissme = obmc_refining_search_sad(
     792             :         x, wsrc, mask, dst_mv, sadpb, search_range, fn_ptr, ref_mv, is_second);
     793     1781130 :     if (thissme < INT_MAX)
     794     1781130 :       thissme = get_obmc_mvpred_var(x, wsrc, mask, dst_mv, ref_mv, fn_ptr, 1, is_second);
     795             : 
     796     1781150 :     return thissme;
     797             : 
     798             : }
     799             : 
     800     1781180 : static INLINE void set_subpel_mv_search_range(const MvLimits *mv_limits,
     801             :                                               int *col_min, int *col_max,
     802             :                                               int *row_min, int *row_max,
     803             :                                               const MV *ref_mv) {
     804     1781180 :   const int max_mv = MAX_FULL_PEL_VAL * 8;
     805     1781180 :   const int minc = AOMMAX(mv_limits->col_min * 8, ref_mv->col - max_mv);
     806     1781180 :   const int maxc = AOMMIN(mv_limits->col_max * 8, ref_mv->col + max_mv);
     807     1781180 :   const int minr = AOMMAX(mv_limits->row_min * 8, ref_mv->row - max_mv);
     808     1781180 :   const int maxr = AOMMIN(mv_limits->row_max * 8, ref_mv->row + max_mv);
     809             : 
     810     1781180 :   *col_min = AOMMAX(MV_LOW + 1, minc);
     811     1781180 :   *col_max = AOMMIN(MV_UPP - 1, maxc);
     812     1781180 :   *row_min = AOMMAX(MV_LOW + 1, minr);
     813     1781180 :   *row_max = AOMMIN(MV_UPP - 1, maxr);
     814     1781180 : }
     815             : static const MV search_step_table[12] = {
     816             :   // left, right, up, down
     817             :   { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
     818             :   { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
     819             :   { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
     820             : };
     821             : 
     822             : 
     823           0 : static unsigned int setup_obmc_center_error(
     824             :     const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit,
     825             :     const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc,
     826             :     const uint8_t *const y, int y_stride, int offset, int *mvjcost,
     827             :     int *mvcost[2], unsigned int *sse1, int *distortion) {
     828             :   unsigned int besterr;
     829           0 :   besterr = vfp->ovf(y + offset, y_stride, wsrc, mask, sse1);
     830           0 :   *distortion = besterr;
     831           0 :   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
     832           0 :   return besterr;
     833             : }
     834             : 
     835             : 
     836             : /* checks if (r, c) has better score than previous best */
     837             : #define MVC(r, c)                                                              \
     838             :   (unsigned int)(mvcost                                                        \
     839             :                      ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +              \
     840             :                          mvcost[0][((r)-rr)] + (int64_t)mvcost[1][((c)-rc)]) * \
     841             :                             error_per_bit +                                    \
     842             :                         4096) >>                                               \
     843             :                            13                                                  \
     844             :                      : 0)
     845             : 
     846             : /* returns subpixel variance error function */
     847             : #define DIST(r, c) \
     848             :   vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse)
     849             : #define CHECK_BETTER(v, r, c)                             \
     850             :   if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
     851             :     thismse = (DIST(r, c));                               \
     852             :     if ((v = MVC(r, c) + thismse) < besterr) {            \
     853             :       besterr = v;                                        \
     854             :       br = r;                                             \
     855             :       bc = c;                                             \
     856             :       *distortion = thismse;                              \
     857             :       *sse1 = sse;                                        \
     858             :     }                                                     \
     859             :   } else {                                                \
     860             :     v = INT_MAX;                                          \
     861             :   }
     862             : #define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
     863             : 
     864             : 
     865             : #define CHECK_BETTER1(v, r, c)                                                \
     866             :   if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                     \
     867             :     MV this_mv = { r, c };                                                    \
     868             :     thismse = upsampled_obmc_pref_error(xd, cm, mi_row, mi_col, &this_mv,     \
     869             :                                         mask, vfp, z, pre(y, y_stride, r, c), \
     870             :                                         y_stride, sp(c), sp(r), w, h, &sse,   \
     871             :                                         use_accurate_subpel_search);          \
     872             :     v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);        \
     873             :     if ((v + thismse) < besterr) {                                            \
     874             :       besterr = v + thismse;                                                  \
     875             :       br = r;                                                                 \
     876             :       bc = c;                                                                 \
     877             :       *distortion = thismse;                                                  \
     878             :       *sse1 = sse;                                                            \
     879             :     }                                                                         \
     880             :   } else {                                                                    \
     881             :     v = INT_MAX;                                                              \
     882             :   }
     883             : 
     884             : // TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
     885             : // SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
     886             : // later in the same way.
     887             : #define SECOND_LEVEL_CHECKS_BEST(k)                \
     888             :   {                                                \
     889             :     unsigned int second;                           \
     890             :     int br0 = br;                                  \
     891             :     int bc0 = bc;                                  \
     892             :     assert(tr == br || tc == bc);                  \
     893             :     if (tr == br && tc != bc) {                    \
     894             :       kc = bc - tc;                                \
     895             :     } else if (tr != br && tc == bc) {             \
     896             :       kr = br - tr;                                \
     897             :     }                                              \
     898             :     CHECK_BETTER##k(second, br0 + kr, bc0);        \
     899             :     CHECK_BETTER##k(second, br0, bc0 + kc);        \
     900             :     if (br0 != br || bc0 != bc) {                  \
     901             :       CHECK_BETTER##k(second, br0 + kr, bc0 + kc); \
     902             :     }                                              \
     903             :   }
     904             : 
     905    36563600 : static int upsampled_obmc_pref_error(
     906             :     MacroBlockD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
     907             :     const MV *const mv, const int32_t *mask, const aom_variance_fn_ptr_t *vfp,
     908             :     const int32_t *const wsrc, const uint8_t *const y, int y_stride,
     909             :     int subpel_x_q3, int subpel_y_q3, int w, int h, unsigned int *sse,
     910             :     int subpel_search) {
     911             :   unsigned int besterr;
     912             : 
     913             :   DECLARE_ALIGNED(16, uint8_t, pred[2 * MAX_SB_SQUARE]);
     914             : #if CONFIG_AV1_HIGHBITDEPTH
     915             :   if (is_cur_buf_hbd(xd)) {
     916             :     uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred);
     917             :     aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred8, w, h,
     918             :                               subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd,
     919             :                               subpel_search);
     920             :     besterr = vfp->ovf(pred8, w, wsrc, mask, sse);
     921             :   } else {
     922             :     aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3,
     923             :                        subpel_y_q3, y, y_stride, subpel_search);
     924             : 
     925             :     besterr = vfp->ovf(pred, w, wsrc, mask, sse);
     926             :   }
     927             : #else
     928    36563600 :   aom_upsampled_pred(xd, (const struct AV1Common *const)cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3,
     929             :                      subpel_y_q3, y, y_stride, subpel_search);
     930             : 
     931    36577500 :   besterr = vfp->ovf(pred, w, wsrc, mask, sse);
     932             : #endif
     933    36583300 :   return besterr;
     934             : }
     935     1781170 : static unsigned int upsampled_setup_obmc_center_error(
     936             :     MacroBlockD *xd, const Av1Common *const cm, int mi_row, int mi_col,
     937             :     const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit,
     938             :     const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc,
     939             :     const uint8_t *const y, int y_stride, int w, int h, int offset,
     940             :     int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion,
     941             :     int subpel_search) {
     942     1781170 :   unsigned int besterr = upsampled_obmc_pref_error(
     943             :       xd, cm, mi_row, mi_col, bestmv, mask, vfp, wsrc, y + offset, y_stride, 0,
     944             :       0, w, h, sse1, subpel_search);
     945     1781170 :   *distortion = besterr;
     946     1781170 :   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
     947     1781140 :   return besterr;
     948             : }
     949             : 
     950             : 
     951             : // convert motion vector component to offset for sv[a]f calc
     952    69558600 : static INLINE int sp(int x) { return x & 7; }
     953    34780200 : static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
     954    34780200 :   const int offset = (r >> 3) * stride + (c >> 3);
     955    34780200 :   return buf + offset;
     956             : }
     957             : 
     958     1781160 : int av1_find_best_obmc_sub_pixel_tree_up(
     959             :     ModeDecisionContext *context_ptr,IntraBcContext *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
     960             :     MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit,
     961             :     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
     962             :     int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
     963             :     int is_second, int use_accurate_subpel_search) {
     964     1781160 :   const int32_t *wsrc = context_ptr->wsrc_buf;
     965     1781160 :   const int32_t *mask = context_ptr->mask_buf;
     966     1781160 :   const int *const z = wsrc;
     967     1781160 :   const int *const src_address = z;
     968     1781160 :   MacroBlockD *xd = x->xd;
     969     1781160 :   struct MacroBlockDPlane *const pd = &x->xdplane[0];
     970     1781160 :   unsigned int besterr = INT_MAX;
     971             :   unsigned int sse;
     972             :   unsigned int thismse;
     973             : 
     974     1781160 :   int rr = ref_mv->row;
     975     1781160 :   int rc = ref_mv->col;
     976     1781160 :   int br = bestmv->row * 8;
     977     1781160 :   int bc = bestmv->col * 8;
     978     1781160 :   int hstep = 4;
     979             :   int iter;
     980     1781160 :   int round = 3 - forced_stop;
     981     1781160 :   int tr = br;
     982     1781160 :   int tc = bc;
     983     1781160 :   const MV *search_step = search_step_table;
     984     1781160 :   int idx, best_idx = -1;
     985             :   unsigned int cost_array[5];
     986             :   int kr, kc;
     987     1781160 :   const int w = block_size_wide[context_ptr->blk_geom->bsize];
     988     1781160 :   const int h = block_size_high[context_ptr->blk_geom->bsize];
     989             :   int offset;
     990             :   int y_stride;
     991             :   const uint8_t *y;
     992             : 
     993             :   int minc, maxc, minr, maxr;
     994             : 
     995     1781160 :   set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, ref_mv);
     996             : 
     997     1781180 :   y = pd->pre[is_second].buf;
     998     1781180 :   y_stride = pd->pre[is_second].stride;
     999     1781180 :   offset = bestmv->row * y_stride + bestmv->col;
    1000             : 
    1001     1781180 :   if (!allow_hp)
    1002           0 :     if (round == 3) round = 2;
    1003             : 
    1004     1781180 :   bestmv->row *= 8;
    1005     1781180 :   bestmv->col *= 8;
    1006             :   // use_accurate_subpel_search can be 0 or 1 or 2
    1007     1781180 :   if (use_accurate_subpel_search)
    1008     1781170 :     besterr = upsampled_setup_obmc_center_error(
    1009             :         xd, cm, mi_row, mi_col, mask, bestmv, ref_mv, error_per_bit, vfp, z, y,
    1010             :         y_stride, w, h, offset, mvjcost, mvcost, sse1, distortion,
    1011             :         use_accurate_subpel_search);
    1012             :   else
    1013           3 :     besterr = setup_obmc_center_error(mask, bestmv, ref_mv, error_per_bit, vfp,
    1014             :                                       z, y, y_stride, offset, mvjcost, mvcost,
    1015             :                                       sse1, distortion);
    1016             : 
    1017     7125600 :   for (iter = 0; iter < round; ++iter) {
    1018             :     // Check vertical and horizontal sub-pixel positions.
    1019    26706800 :     for (idx = 0; idx < 4; ++idx) {
    1020    21362200 :       tr = br + search_step[idx].row;
    1021    21362200 :       tc = bc + search_step[idx].col;
    1022    42712500 :       if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
    1023    21348500 :         MV this_mv = { tr, tc };
    1024    21348500 :         if (use_accurate_subpel_search) {
    1025    21348300 :           thismse = upsampled_obmc_pref_error(
    1026             :               xd, cm, mi_row, mi_col, &this_mv, mask, vfp, src_address,
    1027             :               pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse,
    1028             :               use_accurate_subpel_search);
    1029             :         } else {
    1030         209 :           thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc),
    1031             :                               sp(tr), src_address, mask, &sse);
    1032             :         }
    1033             : 
    1034    21352500 :         cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
    1035             :                                                 mvcost, error_per_bit);
    1036    21350300 :         if (cost_array[idx] < besterr) {
    1037     4615760 :           best_idx = idx;
    1038     4615760 :           besterr = cost_array[idx];
    1039     4615760 :           *distortion = thismse;
    1040     4615760 :           *sse1 = sse;
    1041             :         }
    1042             :       } else {
    1043       13662 :         cost_array[idx] = INT_MAX;
    1044             :       }
    1045             :     }
    1046             : 
    1047             :     // Check diagonal sub-pixel position
    1048     5344610 :     kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
    1049     5344610 :     kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
    1050             : 
    1051     5344610 :     tc = bc + kc;
    1052     5344610 :     tr = br + kr;
    1053    10687600 :     if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
    1054     5343050 :       MV this_mv = { tr, tc };
    1055             : 
    1056     5343050 :       if (use_accurate_subpel_search) {
    1057     5343000 :         thismse = upsampled_obmc_pref_error(
    1058             :             xd, cm, mi_row, mi_col, &this_mv, mask, vfp, src_address,
    1059             :             pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse,
    1060             :             use_accurate_subpel_search);
    1061             :       } else {
    1062          49 :         thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr),
    1063             :                             src_address, mask, &sse);
    1064             :       }
    1065             : 
    1066     5343120 :       cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
    1067             :                                             error_per_bit);
    1068             : 
    1069     5342960 :       if (cost_array[4] < besterr) {
    1070     1282480 :         best_idx = 4;
    1071     1282480 :         besterr = cost_array[4];
    1072     1282480 :         *distortion = thismse;
    1073     1282480 :         *sse1 = sse;
    1074             :       }
    1075             :     } else {
    1076        1557 :       cost_array[idx] = INT_MAX;
    1077             :     }
    1078             : 
    1079     5344520 :     if (best_idx < 4 && best_idx >= 0) {
    1080     2577570 :       br += search_step[best_idx].row;
    1081     2577570 :       bc += search_step[best_idx].col;
    1082     2766940 :     } else if (best_idx == 4) {
    1083     1282480 :       br = tr;
    1084     1282480 :       bc = tc;
    1085             :     }
    1086             : 
    1087     5344520 :     if (iters_per_step > 1 && best_idx != -1) {
    1088     3859900 :       if (use_accurate_subpel_search) {
    1089     3859900 :         SECOND_LEVEL_CHECKS_BEST(1);
    1090             :       } else {
    1091           0 :         SECOND_LEVEL_CHECKS_BEST(0);
    1092             :       }
    1093             :     }
    1094             : 
    1095     5344470 :     tr = br;
    1096     5344470 :     tc = bc;
    1097             : 
    1098     5344470 :     search_step += 4;
    1099     5344470 :     hstep >>= 1;
    1100     5344470 :     best_idx = -1;
    1101             :   }
    1102             : 
    1103             :   // These lines insure static analysis doesn't warn that
    1104             :   // tr and tc aren't used after the above point.
    1105             :   (void)tr;
    1106             :   (void)tc;
    1107             : 
    1108     1782750 :   bestmv->row = br;
    1109     1782750 :   bestmv->col = bc;
    1110             : 
    1111     1782750 :   return besterr;
    1112             : }
    1113             : 
    1114             : 
    1115             : #endif
    1116           0 : int eb_av1_full_pixel_search(PictureControlSet *pcs, IntraBcContext  *x, BlockSize bsize,
    1117             :                           MV *mvp_full, int step_param, int method,
    1118             :                           int run_mesh_search, int error_per_bit,
    1119             :                           int *cost_list, const MV *ref_mv, int var_max, int rd,
    1120             :                           int x_pos, int y_pos, int intra) {
    1121             :     UNUSED (run_mesh_search);
    1122             :     UNUSED (var_max);
    1123             :     UNUSED (rd);
    1124             : 
    1125           0 :     int32_t ibc_shift = 0;
    1126           0 :     if (pcs->parent_pcs_ptr->ibc_mode > 0)
    1127           0 :         ibc_shift = 1;
    1128             : 
    1129           0 :     SpeedFeatures * sf = &pcs->sf;
    1130           0 :     sf->exhaustive_searches_thresh = (1 << 25);
    1131           0 :   const aom_variance_fn_ptr_t *fn_ptr = &mefn_ptr[bsize];
    1132           0 :   int var = 0;
    1133             : 
    1134           0 :   if (cost_list) {
    1135           0 :     cost_list[0] = INT_MAX;
    1136           0 :     cost_list[1] = INT_MAX;
    1137           0 :     cost_list[2] = INT_MAX;
    1138           0 :     cost_list[3] = INT_MAX;
    1139           0 :     cost_list[4] = INT_MAX;
    1140             :   }
    1141             : 
    1142             :   // Keep track of number of searches (this frame in this thread).
    1143             :   //if (x->m_search_count_ptr != NULL) ++(*x->m_search_count_ptr);
    1144             : 
    1145           0 :   switch (method) {
    1146           0 :     case FAST_DIAMOND:
    1147             :       //var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
    1148             :       //                      cost_list, fn_ptr, 1, ref_mv);
    1149           0 :       break;
    1150           0 :     case FAST_HEX:
    1151             :       //var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
    1152             :       //                      cost_list, fn_ptr, 1, ref_mv);
    1153           0 :       break;
    1154           0 :     case HEX:
    1155             :       //var = av1_hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
    1156             :       //                     fn_ptr, 1, ref_mv);
    1157           0 :       break;
    1158           0 :     case SQUARE:
    1159             :       //var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
    1160             :       //                    fn_ptr, 1, ref_mv);
    1161           0 :       break;
    1162           0 :     case BIGDIA:
    1163             :       //var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
    1164             :       //                    fn_ptr, 1, ref_mv);
    1165           0 :       break;
    1166           0 :     case NSTEP:
    1167           0 :       var = full_pixel_diamond(pcs, x, mvp_full, step_param, error_per_bit,
    1168             :                                MAX_MVSEARCH_STEPS - 1 - step_param, 1,
    1169             :                                cost_list, fn_ptr, ref_mv);
    1170             : 
    1171           0 :       if (x->is_exhaustive_allowed)
    1172             :       {
    1173           0 :           int exhuastive_thr = sf->exhaustive_searches_thresh;
    1174           0 :           exhuastive_thr >>=
    1175           0 :               10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]);
    1176             : 
    1177           0 :           exhuastive_thr = exhuastive_thr << ibc_shift;
    1178             : 
    1179           0 :           if (var > exhuastive_thr)
    1180             :           {
    1181             :               int var_ex;
    1182             :               MV tmp_mv_ex;
    1183             :               var_ex =
    1184           0 :                   full_pixel_exhaustive(pcs, x, &x->best_mv.as_mv, error_per_bit,
    1185             :                       cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
    1186             : 
    1187           0 :               if (var_ex < var) {
    1188           0 :                   var = var_ex;
    1189           0 :                   x->best_mv.as_mv = tmp_mv_ex;
    1190             :               }
    1191             :           }
    1192             :       }
    1193           0 :       break;
    1194           0 :     default: assert(0 && "Invalid search method.");
    1195             :   }
    1196             : 
    1197             :   do {
    1198             :     //CHKN if (!intra || !av1_use_hash_me(&cpi->common)) break;
    1199             : 
    1200             :     // already single ME
    1201             :     // get block size and original buffer of current block
    1202           0 :     const int block_height = block_size_high[bsize];
    1203           0 :     const int block_width = block_size_wide[bsize];
    1204           0 :     if (block_height == block_width && x_pos >= 0 && y_pos >= 0) {
    1205           0 :       if (block_width == 4 || block_width == 8 || block_width == 16 ||
    1206           0 :           block_width == 32 || block_width == 64 || block_width == 128) {
    1207           0 :         uint8_t *what = x->plane[0].src.buf;
    1208           0 :         const int what_stride = x->plane[0].src.stride;
    1209             :         uint32_t hash_value1, hash_value2;
    1210             :         MV best_hash_mv;
    1211           0 :         int best_hash_cost = INT_MAX;
    1212             : 
    1213             :         // for the hashMap
    1214           0 :         HashTable *ref_frame_hash = &pcs->hash_table;
    1215             : 
    1216           0 :         av1_get_block_hash_value(what, what_stride, block_width, &hash_value1,
    1217             :                                  &hash_value2, 0, pcs, x);
    1218             : 
    1219           0 :         const int count = av1_hash_table_count(ref_frame_hash, hash_value1);
    1220             :         // for intra, at least one matching can be found, itself.
    1221           0 :         if (count <= (intra ? 1 : 0))
    1222           0 :           break;
    1223             :         Iterator iterator =
    1224           0 :             av1_hash_get_first_iterator(ref_frame_hash, hash_value1);
    1225           0 :         for (int i = 0; i < count; i++, iterator_increment(&iterator)) {
    1226           0 :           block_hash ref_block_hash = *(block_hash *)(iterator_get(&iterator));
    1227           0 :           if (hash_value2 == ref_block_hash.hash_value2) {
    1228             :             // For intra, make sure the prediction is from valid area.
    1229           0 :             if (intra) {
    1230           0 :               const int mi_col = x_pos / MI_SIZE;
    1231           0 :               const int mi_row = y_pos / MI_SIZE;
    1232           0 :               const MV dv = { 8 * (ref_block_hash.y - y_pos),
    1233           0 :                               8 * (ref_block_hash.x - x_pos) };
    1234           0 :               if (!av1_is_dv_valid(dv, x->xd, mi_row, mi_col,
    1235           0 :                                    bsize, pcs->parent_pcs_ptr->sequence_control_set_ptr->seq_header.sb_size_log2))
    1236           0 :                 continue;
    1237             :             }
    1238             :             MV hash_mv;
    1239           0 :             hash_mv.col = ref_block_hash.x - x_pos;
    1240           0 :             hash_mv.row = ref_block_hash.y - y_pos;
    1241           0 :             if (!is_mv_in(&x->mv_limits, &hash_mv)) continue;
    1242             :             const int refCost =
    1243           0 :                 eb_av1_get_mvpred_var(x, &hash_mv, ref_mv, fn_ptr, 1);
    1244           0 :             if (refCost < best_hash_cost) {
    1245           0 :               best_hash_cost = refCost;
    1246           0 :               best_hash_mv = hash_mv;
    1247             :             }
    1248             :           }
    1249             :         }
    1250             : 
    1251           0 :         if (best_hash_cost < var) {
    1252           0 :           x->second_best_mv = x->best_mv;
    1253           0 :           x->best_mv.as_mv = best_hash_mv;
    1254           0 :           var = best_hash_cost;
    1255             :         }
    1256             :       }
    1257             :     }
    1258             :   } while (0);
    1259             : 
    1260           0 :   return 0;//CHKN  var;
    1261             : }

Generated by: LCOV version 1.14