LCOV - code coverage report
Current view: top level - Codec - EbRateDistortionCost.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 950 1219 77.9 %
Date: 2019-11-25 17:38:06 Functions: 32 37 86.5 %

          Line data    Source code
       1             : /*
       2             : * Copyright(c) 2019 Intel Corporation
       3             : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
       4             : */
       5             : 
       6             : /*
       7             : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       8             : *
       9             : * This source code is subject to the terms of the BSD 2 Clause License and
      10             : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      11             : * was not distributed with this source code in the LICENSE file, you can
      12             : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      13             : * Media Patent License 1.0 was not distributed with this source code in the
      14             : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      15             : */
      16             : 
      17             : /***************************************
      18             : * Includes
      19             : ***************************************/
      20             : #include "EbRateDistortionCost.h"
      21             : #include "EbCommonUtils.h"
      22             : #include "aom_dsp_rtcd.h"
      23             : 
      24             : #include <assert.h>
      25             : #if TWO_PASS
      26             : #define FIRST_PASS_COST_PENALTY    20 // The penalty is added in cost calculation of the first pass.
      27             : #endif
      28             : #define AV1_COST_PRECISION          0
      29             : #define MV_COST_WEIGHT              108
      30             : int av1_get_reference_mode_context_new(const MacroBlockD *xd);
      31             : int eb_av1_get_pred_context_uni_comp_ref_p(const MacroBlockD *xd);
      32             : int eb_av1_get_pred_context_uni_comp_ref_p1(const MacroBlockD *xd);
      33             : int eb_av1_get_pred_context_uni_comp_ref_p2(const MacroBlockD *xd);
      34             : int av1_get_comp_reference_type_context_new(const MacroBlockD *xd);
      35             : 
      36             : #if PAL_SUP
      37             : int av1_get_palette_bsize_ctx(BlockSize bsize);
      38             : int av1_get_palette_mode_ctx(const MacroBlockD *xd);
      39             : int write_uniform_cost(int n, int v);
      40             : int eb_get_palette_cache(const MacroBlockD *const xd, int plane,uint16_t *cache);
      41             : int av1_palette_color_cost_y(const PaletteModeInfo *const pmi,
      42             :     uint16_t *color_cache, int n_cache,
      43             :     int bit_depth);
      44             : int av1_cost_color_map(PaletteInfo *palette_info, MdRateEstimationContext  *rate_table, CodingUnit*cu_ptr, int plane, BlockSize bsize,
      45             :      COLOR_MAP_TYPE type);
      46             : void av1_get_block_dimensions(BlockSize bsize, int plane,
      47             :     const MacroBlockD *xd, int *width,
      48             :     int *height,
      49             :     int *rows_within_bounds,
      50             :     int *cols_within_bounds);
      51             : int av1_allow_palette(int allow_screen_content_tools,
      52             :     BlockSize sb_type);
      53             : #endif
      54           0 : BlockSize GetBlockSize(uint8_t cu_size) {
      55           0 :     return (cu_size == 64 ? BLOCK_64X64 : cu_size == 32 ? BLOCK_32X32 : cu_size == 16 ? BLOCK_16X16 : cu_size == 8 ? BLOCK_8X8 : BLOCK_4X4);
      56             : }
      57             : 
      58             : int av1_allow_intrabc(const Av1Common *const cm);
      59             : 
      60    75456500 : uint8_t av1_drl_ctx(const CandidateMv *ref_mv_stack,
      61             :     int32_t ref_idx) {
      62    75456500 :     if (ref_mv_stack[ref_idx].weight >= REF_CAT_LEVEL &&
      63    16308700 :         ref_mv_stack[ref_idx + 1].weight >= REF_CAT_LEVEL)
      64     5240980 :         return 0;
      65             : 
      66    70215500 :     if (ref_mv_stack[ref_idx].weight >= REF_CAT_LEVEL &&
      67    11072900 :         ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
      68    11073100 :         return 1;
      69             : 
      70    59142400 :     if (ref_mv_stack[ref_idx].weight < REF_CAT_LEVEL &&
      71    59233300 :         ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
      72    59236000 :         return 2;
      73             : 
      74           0 :     return 0;
      75             : }
      76             : 
      77             : /* Symbols for coding which components are zero jointly */
      78             : //#define MV_JOINTS 4
      79             : //typedef enum {
      80             : //    MV_JOINT_ZERO = 0,   /* Zero vector */
      81             : //    MV_JOINT_HNZVZ = 1,  /* Vert zero, hor nonzero */
      82             : //    MV_JOINT_HZVNZ = 2,  /* Hor zero, vert nonzero */
      83             : //    MV_JOINT_HNZVNZ = 3, /* Both components nonzero */
      84             : //} MvJointType;
      85             : 
      86   248323000 : MvJointType av1_get_mv_joint(const MV *mv) {
      87   248323000 :     if (mv->row == 0)
      88    29801600 :         return mv->col == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ;
      89             :     else
      90   218521000 :         return mv->col == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ;
      91             : }
      92   201486000 : int32_t mv_cost(const MV *mv, const int32_t *joint_cost,
      93             :     int32_t *const comp_cost[2]) {
      94   201486000 :     int32_t jnC = av1_get_mv_joint(mv);
      95   201541000 :     int32_t res =
      96   201541000 :         joint_cost[jnC] + comp_cost[0][mv->row] +
      97   201541000 :         comp_cost[1][mv->col];
      98             : 
      99   201541000 :     return res;
     100             : }
     101             : 
     102   201503000 : int32_t eb_av1_mv_bit_cost(const MV *mv, const MV *ref, const int32_t *mvjcost,
     103             :     int32_t *mvcost[2], int32_t weight) {
     104   201503000 :     const MV diff = { mv->row - ref->row, mv->col - ref->col };
     105   201503000 :     return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
     106             : }
     107             : 
     108             : /////////////////////////////COEFFICIENT CALCULATION //////////////////////////////////////////////
     109     1751740 : static INLINE int32_t get_golomb_cost(int32_t abs_qc) {
     110     1751740 :     if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
     111     1751740 :         const int32_t r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
     112     1751740 :         const int32_t length = get_msb(r) + 1;
     113     1751740 :         return av1_cost_literal(2 * length - 1);
     114             :     }
     115           0 :     return 0;
     116             : }
     117             : 
     118           0 : void eb_av1_txb_init_levels_c(
     119             :     const TranLow *const coeff,
     120             :     const int32_t width,
     121             :     const int32_t height,
     122             :     uint8_t *const levels) {
     123           0 :     const int32_t stride = width + TX_PAD_HOR;
     124           0 :     uint8_t *ls = levels;
     125             : 
     126           0 :     memset(levels - TX_PAD_TOP * stride, 0,
     127             :         sizeof(*levels) * TX_PAD_TOP * stride);
     128           0 :     memset(levels + stride * height, 0,
     129           0 :         sizeof(*levels) * (TX_PAD_BOTTOM * stride + TX_PAD_END));
     130             : 
     131           0 :     for (int32_t i = 0; i < height; i++) {
     132           0 :         for (int32_t j = 0; j < width; j++)
     133           0 :             *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, INT8_MAX);
     134           0 :         for (int32_t j = 0; j < TX_PAD_HOR; j++)
     135           0 :             *ls++ = 0;
     136             :     }
     137           0 : }
     138             : 
     139             : // TODO(angiebird): use this function whenever it's possible
     140    34557800 : int32_t Av1TransformTypeRateEstimation(
     141             :     uint8_t        allow_update_cdf,
     142             :     FRAME_CONTEXT *fc,
     143             :     struct ModeDecisionCandidateBuffer    *candidate_buffer_ptr,
     144             :     EbBool                                  is_inter,
     145             : #if !FILTER_INTRA_FLAG
     146             :     EbBool                                  useFilterIntraFlag,
     147             : #endif
     148             :     TxSize                                  transform_size,
     149             :     TxType                                  transform_type,
     150             :     EbBool                                  reduced_tx_set_used)
     151             : {
     152             : #if !FILTER_INTRA_FLAG
     153             :     uint8_t filterIntraMode = 0; // AMIR to check// NM- hardcoded to zero for the moment until we support different intra filtering modes.
     154             : #endif
     155             :     //const MbModeInfo *mbmi = &xd->mi[0]->mbmi;
     156             :     //const int32_t is_inter = is_inter_block(mbmi);
     157             : 
     158    34557800 :     if (get_ext_tx_types(transform_size, is_inter, reduced_tx_set_used) > 1  /*&&    !xd->lossless[xd->mi[0]->mbmi.segment_id]  WE ARE NOT LOSSLESS*/) {
     159    32732300 :         const TxSize square_tx_size = txsize_sqr_map[transform_size];
     160    32732300 :         assert(square_tx_size < EXT_TX_SIZES);
     161             : 
     162    32732300 :         const int32_t ext_tx_set = get_ext_tx_set(transform_size, is_inter, reduced_tx_set_used);
     163    32729000 :         if (is_inter) {
     164    16644500 :             if (ext_tx_set > 0)
     165             :             {
     166    16644600 :                 if (allow_update_cdf) {
     167             :                     const TxSetType tx_set_type =
     168        2777 :                         get_ext_tx_set_type(transform_size, is_inter, reduced_tx_set_used);
     169             : 
     170        2777 :                     update_cdf(fc->inter_ext_tx_cdf[ext_tx_set][square_tx_size],
     171             :                         av1_ext_tx_ind[tx_set_type][transform_type],
     172             :                         av1_num_ext_tx_set[tx_set_type]);
     173             :                 }
     174    16645100 :                 return candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->inter_tx_type_fac_bits[ext_tx_set][square_tx_size][transform_type];
     175             :             }
     176             :         }
     177             :         else {
     178    16084500 :             if (ext_tx_set > 0) {
     179             :                 PredictionMode intra_dir;
     180             : #if FILTER_INTRA_FLAG
     181    16097400 :                 if (candidate_buffer_ptr->candidate_ptr->filter_intra_mode != FILTER_INTRA_MODES)
     182     4494820 :                     intra_dir = fimode_to_intradir[candidate_buffer_ptr->candidate_ptr->filter_intra_mode];
     183             : #else
     184             :                 if (useFilterIntraFlag)
     185             :                     intra_dir = fimode_to_intradir[filterIntraMode];
     186             : #endif
     187             :                 else
     188    11602600 :                     intra_dir = candidate_buffer_ptr->candidate_ptr->pred_mode;
     189    16097400 :                 assert(intra_dir < INTRA_MODES);
     190             :                 const TxSetType tx_set_type =
     191    16097400 :                     get_ext_tx_set_type(transform_size, is_inter, reduced_tx_set_used);
     192             : 
     193    16093500 :                 if (allow_update_cdf) {
     194        4282 :                     update_cdf(
     195        4282 :                         fc->intra_ext_tx_cdf[ext_tx_set][square_tx_size][intra_dir],
     196             :                         av1_ext_tx_ind[tx_set_type][transform_type],
     197             :                         av1_num_ext_tx_set[tx_set_type]);
     198             :                 }
     199    16095700 :                 return candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->intra_tx_type_fac_bits[ext_tx_set][square_tx_size][intra_dir][transform_type];
     200             :             }
     201             :         }
     202             :     }
     203     1815780 :     return 0;
     204             : }
     205             : 
     206             : static const int8_t eob_to_pos_small[33] = {
     207             :     0, 1, 2,                                        // 0-2
     208             :     3, 3,                                           // 3-4
     209             :     4, 4, 4, 4,                                     // 5-8
     210             :     5, 5, 5, 5, 5, 5, 5, 5,                         // 9-16
     211             :     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6  // 17-32
     212             : };
     213             : 
     214             : static const int8_t eob_to_pos_large[17] = {
     215             :     6,                               // place holder
     216             :     7,                               // 33-64
     217             :     8, 8,                           // 65-128
     218             :     9, 9, 9, 9,                   // 129-256
     219             :     10, 10, 10, 10, 10, 10, 10, 10,  // 257-512
     220             :     11                               // 513-
     221             : };
     222             : 
     223    43885800 : static INLINE int32_t get_eob_pos_token(const int32_t eob, int32_t *const extra) {
     224             :     int32_t t;
     225             : 
     226    43885800 :     if (eob < 33)
     227    30095100 :         t = eob_to_pos_small[eob];
     228             :     else {
     229    13790600 :         const int32_t e = AOMMIN((eob - 1) >> 5, 16);
     230    13790600 :         t = eob_to_pos_large[e];
     231             :     }
     232             : 
     233    43885800 :     *extra = eob - eb_k_eob_group_start[t];
     234             : 
     235    43885800 :     return t;
     236             : }
     237             : #define TX_SIZE TxSize
     238        9342 : static INLINE TX_SIZE get_txsize_entropy_ctx(TX_SIZE txsize) {
     239        9342 :     return (TX_SIZE)((txsize_sqr_map[txsize] + txsize_sqr_up_map[txsize] + 1) >>
     240             :         1);
     241             : }
     242        9342 : void eb_av1_update_eob_context(int eob, TX_SIZE tx_size, TxClass tx_class,
     243             :     PlaneType plane, FRAME_CONTEXT *ec_ctx,
     244             :     uint8_t allow_update_cdf) {
     245             :     int eob_extra;
     246        9342 :     const int eob_pt = get_eob_pos_token(eob, &eob_extra);
     247        9342 :     TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
     248        9342 :     assert(txs_ctx < TX_SIZES);
     249        9342 :     const int eob_multi_size = txsize_log2_minus4[tx_size];
     250        9342 :     const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
     251             : 
     252        9342 :     switch (eob_multi_size) {
     253        2974 :     case 0:
     254             : #if CONFIG_ENTROPY_STATS
     255             :         ++counts->eob_multi16[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
     256             : #endif
     257        2974 :         if (allow_update_cdf)
     258        2974 :             update_cdf(ec_ctx->eob_flag_cdf16[plane][eob_multi_ctx], eob_pt - 1, 5);
     259        2974 :         break;
     260        1668 :     case 1:
     261             : #if CONFIG_ENTROPY_STATS
     262             :         ++counts->eob_multi32[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
     263             : #endif
     264        1668 :         if (allow_update_cdf)
     265        1668 :             update_cdf(ec_ctx->eob_flag_cdf32[plane][eob_multi_ctx], eob_pt - 1, 6);
     266        1668 :         break;
     267        3068 :     case 2:
     268             : #if CONFIG_ENTROPY_STATS
     269             :         ++counts->eob_multi64[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
     270             : #endif
     271        3068 :         if (allow_update_cdf)
     272        3068 :             update_cdf(ec_ctx->eob_flag_cdf64[plane][eob_multi_ctx], eob_pt - 1, 7);
     273        3068 :         break;
     274         677 :     case 3:
     275             : #if CONFIG_ENTROPY_STATS
     276             :         ++counts->eob_multi128[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
     277             : #endif
     278         677 :         if (allow_update_cdf) {
     279         677 :             update_cdf(ec_ctx->eob_flag_cdf128[plane][eob_multi_ctx], eob_pt - 1,
     280             :                 8);
     281             :         }
     282         677 :         break;
     283         690 :     case 4:
     284             : #if CONFIG_ENTROPY_STATS
     285             :         ++counts->eob_multi256[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
     286             : #endif
     287         690 :         if (allow_update_cdf) {
     288         690 :             update_cdf(ec_ctx->eob_flag_cdf256[plane][eob_multi_ctx], eob_pt - 1,
     289             :                 9);
     290             :         }
     291         690 :         break;
     292          96 :     case 5:
     293             : #if CONFIG_ENTROPY_STATS
     294             :         ++counts->eob_multi512[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
     295             : #endif
     296          96 :         if (allow_update_cdf) {
     297          96 :             update_cdf(ec_ctx->eob_flag_cdf512[plane][eob_multi_ctx], eob_pt - 1,
     298             :                 10);
     299             :         }
     300          96 :         break;
     301         169 :     case 6:
     302             :     default:
     303             : #if CONFIG_ENTROPY_STATS
     304             :         ++counts->eob_multi1024[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
     305             : #endif
     306         169 :         if (allow_update_cdf) {
     307         169 :             update_cdf(ec_ctx->eob_flag_cdf1024[plane][eob_multi_ctx], eob_pt - 1,
     308             :                 11);
     309             :         }
     310         169 :         break;
     311             :     }
     312             : 
     313        9342 :     if (eb_k_eob_offset_bits[eob_pt] > 0) {
     314        6499 :         int eob_ctx = eob_pt - 3;
     315        6499 :         int eob_shift = eb_k_eob_offset_bits[eob_pt] - 1;
     316        6499 :         int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
     317             : #if CONFIG_ENTROPY_STATS
     318             :         counts->eob_extra[cdf_idx][txs_ctx][plane][eob_pt][bit]++;
     319             : #endif  // CONFIG_ENTROPY_STATS
     320        6499 :         if (allow_update_cdf)
     321        6499 :             update_cdf(ec_ctx->eob_extra_cdf[txs_ctx][plane][eob_ctx], bit, 2);
     322             :     }
     323        9342 : }
     324    43877700 : static int32_t get_eob_cost(int32_t eob, const LvMapEobCost *txb_eob_costs,
     325             :     const LvMapCoeffCost *txb_costs, TxType tx_type) {
     326             :     int32_t eob_extra;
     327    43877700 :     const int32_t eob_pt = get_eob_pos_token(eob, &eob_extra);
     328    43878700 :     int32_t eob_cost = 0;
     329    43878700 :     const int32_t eob_multi_ctx = (tx_type_to_class[tx_type] == TX_CLASS_2D) ? 0 : 1;
     330    43878700 :     eob_cost = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1];
     331             : 
     332    43878700 :     if (eb_k_eob_offset_bits[eob_pt] > 0) {
     333    36413800 :         const int32_t eob_shift = eb_k_eob_offset_bits[eob_pt] - 1;
     334    36413800 :         const int32_t bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
     335    36413800 :         eob_cost += txb_costs->eob_extra_cost[eob_pt][bit];
     336    36413800 :         const int32_t offset_bits = eb_k_eob_offset_bits[eob_pt];
     337    36413800 :         if (offset_bits > 1) eob_cost += av1_cost_literal(offset_bits - 1);
     338             :     }
     339    43878700 :     return eob_cost;
     340             : }
     341             : 
     342             : #if ADD_MDC_FULL_COST
     343    45131600 : int32_t av1_cost_skip_txb(
     344             : #else
     345             : static INLINE int32_t av1_cost_skip_txb(
     346             : #endif
     347             :     uint8_t        allow_update_cdf,
     348             :     FRAME_CONTEXT *ec_ctx,
     349             :     struct ModeDecisionCandidateBuffer    *candidate_buffer_ptr,
     350             :     TxSize                                  transform_size,
     351             :     PlaneType                               plane_type,
     352             :     int16_t                                   txb_skip_ctx)
     353             : {
     354    45131600 :     const TxSize txs_ctx = (TxSize)((txsize_sqr_map[transform_size] + txsize_sqr_up_map[transform_size] + 1) >> 1);
     355    45131600 :     assert(txs_ctx < TX_SIZES);
     356    45131600 :     const LvMapCoeffCost *const coeff_costs = &candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->coeff_fac_bits[txs_ctx][plane_type];
     357    45131600 :     if (allow_update_cdf)
     358       31060 :         update_cdf(ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], 1, 2);
     359    45138800 :     return coeff_costs->txb_skip_cost[txb_skip_ctx][1];
     360             : }
     361             : 
     362    43871700 : static INLINE int32_t av1_cost_coeffs_txb_loop_cost_eob(uint16_t eob,
     363             :     const int16_t *const scan, const TranLow *const qcoeff,
     364             :     int8_t *const coeff_contexts, const LvMapCoeffCost *coeff_costs,
     365             :     int16_t dc_sign_ctx, uint8_t *const levels,
     366             :     const int32_t bwl,
     367             :     TxType transform_type) {
     368    43871700 :     const uint32_t cost_literal = av1_cost_literal(1);
     369    43871700 :     int32_t cost = 0;
     370             :     int32_t c;
     371             : 
     372             :     /* Loop reduced to touch only first (eob - 1) and last (0) index */
     373    43871700 :     int32_t decr = eob - 1;
     374    43871700 :     if (decr < 1)
     375     6455920 :         decr = 1;
     376   125210000 :     for (c = eob - 1; c >= 0; c -= decr) {
     377    81337900 :         const int32_t pos = scan[c];
     378    81337900 :         const TranLow v = qcoeff[pos];
     379    81337900 :          const int32_t is_nz = (v != 0);
     380    81337900 :         const int32_t level = abs(v);
     381    81337900 :         const int32_t coeff_ctx = coeff_contexts[pos];
     382             : 
     383    81337900 :         if (c == eob - 1) {
     384    43871000 :             assert((AOMMIN(level, 3) - 1) >= 0);
     385    43871000 :             cost += coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1];
     386             :         }
     387             :         else {
     388    37466900 :             cost += coeff_costs->base_cost[coeff_ctx][AOMMIN(level, 3)];
     389             :         }
     390             : 
     391    81337900 :         if (is_nz) {
     392    70331200 :             if (c == 0) {
     393    32922300 :                 const int32_t sign = (v < 0) ? 1 : 0;
     394             :                 // sign bit cost
     395             : 
     396    32922300 :                 cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign];
     397             :             }
     398             :             else {
     399    37408900 :                 cost += cost_literal;
     400             :             }
     401             : 
     402    70331200 :             if (level > NUM_BASE_LEVELS) {
     403             :                 int32_t ctx;
     404    11454500 :                 ctx = get_br_ctx(levels, pos, bwl, transform_type);
     405             : 
     406    11454800 :                 const int32_t base_range = level - 1 - NUM_BASE_LEVELS;
     407             : 
     408    11454800 :                 if (base_range < COEFF_BASE_RANGE)
     409    10355100 :                     cost += coeff_costs->lps_cost[ctx][base_range];
     410             :                 else
     411     1099690 :                     cost += coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE];
     412             : 
     413             : 
     414    11454800 :                 if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE)
     415     1100080 :                     cost += get_golomb_cost(level);
     416             :             }
     417             :         }
     418             :     }
     419             : 
     420             :     /* Optimized Loop, omitted first (eob - 1) and last (0) index */
     421  1819190000 :     for (c = eob - 2; c >= 1; --c) {
     422  1775290000 :         const int32_t pos = scan[c];
     423  1775290000 :         const int32_t level = abs(qcoeff[pos]);
     424  1775290000 :         if (level > NUM_BASE_LEVELS) {
     425    72960400 :             const int32_t ctx = get_br_ctx(levels, pos, bwl, transform_type);
     426    72963600 :             const int32_t base_range = level - 1 - NUM_BASE_LEVELS;
     427             : 
     428    72963600 :             if (base_range < COEFF_BASE_RANGE) {
     429    72334700 :                 cost += cost_literal + coeff_costs->lps_cost[ctx][base_range]
     430    72334700 :                     + coeff_costs->base_cost[coeff_contexts[pos]][3];
     431             :             }
     432             :             else {
     433      628890 :                 cost += get_golomb_cost(level) + cost_literal
     434      651756 :                     + coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE]
     435      651756 :                     + coeff_costs->base_cost[coeff_contexts[pos]][3];
     436             :             }
     437             :         }
     438  1702330000 :         else if (level) {
     439   482349000 :             cost += cost_literal
     440   482349000 :                 + coeff_costs->base_cost[coeff_contexts[pos]][level];
     441             :         }
     442             :         else {
     443  1219980000 :             cost += coeff_costs->base_cost[coeff_contexts[pos]][0];
     444             :         }
     445             :     }
     446    43898100 :     return cost;
     447             : }
     448             : 
     449             : // Note: don't call this function when eob is 0.
     450    43889300 : uint64_t eb_av1_cost_coeffs_txb(
     451             :     uint8_t                             allow_update_cdf,
     452             :     FRAME_CONTEXT                      *ec_ctx,
     453             :     struct ModeDecisionCandidateBuffer *candidate_buffer_ptr,
     454             :     const TranLow                      *const qcoeff,
     455             :     uint16_t                            eob,
     456             :     PlaneType                           plane_type,
     457             :     TxSize                              transform_size,
     458             :     TxType                              transform_type,
     459             :     int16_t                             txb_skip_ctx,
     460             :     int16_t                             dc_sign_ctx,
     461             :     EbBool                              reducedTransformSetFlag)
     462             : 
     463             : {
     464             :     //Note: there is a different version of this function in AOM that seems to be efficient as its name is:
     465             :     //warehouse_efficients_txb
     466             : 
     467    43889300 :     const TxSize txs_ctx = (TxSize)((txsize_sqr_map[transform_size] + txsize_sqr_up_map[transform_size] + 1) >> 1);
     468    43889300 :     const TxClass tx_class = tx_type_to_class[transform_type];
     469             :     int32_t cost;
     470    43889300 :     const int32_t bwl = get_txb_bwl(transform_size);
     471    43879400 :     const int32_t width = get_txb_wide(transform_size);
     472    43856400 :     const int32_t height = get_txb_high(transform_size);
     473    43835900 :     const ScanOrder *const scan_order = &av1_scan_orders[transform_size][transform_type]; // get_scan(tx_size, tx_type);
     474    43835900 :     const int16_t *const scan = scan_order->scan;
     475             :     uint8_t levels_buf[TX_PAD_2D];
     476    43835900 :     uint8_t *const levels = set_levels(levels_buf, width);
     477             :     DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
     478    43839400 :     assert(txs_ctx < TX_SIZES);
     479    43839400 :     const LvMapCoeffCost *const coeff_costs = &candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->coeff_fac_bits[txs_ctx][plane_type];
     480             : 
     481    43839400 :     const int32_t eob_multi_size = txsize_log2_minus4[transform_size];
     482    43839400 :     const LvMapEobCost *const eobBits = &candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->eob_frac_bits[eob_multi_size][plane_type];
     483             :     // eob must be greater than 0 here.
     484    43839400 :     assert(eob > 0);
     485    43839400 :     cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0];
     486             : 
     487    43839400 :     if (allow_update_cdf)
     488        9342 :         update_cdf(ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], eob == 0, 2);
     489    43839400 :     eb_av1_txb_init_levels(qcoeff, width, height, levels); // NM - Needs to be optimized - to be combined with the quantisation.
     490             : 
     491             :     // Transform type bit estimation
     492    43864100 :     cost += plane_type > PLANE_TYPE_Y ? 0 :
     493    34520200 :         Av1TransformTypeRateEstimation(
     494             :             allow_update_cdf,
     495             :             ec_ctx,
     496             :             candidate_buffer_ptr,
     497    34520200 :             candidate_buffer_ptr->candidate_ptr->type == INTER_MODE ? EB_TRUE : EB_FALSE,
     498             : #if !FILTER_INTRA_FLAG
     499             :             EB_FALSE, // NM - Hardcoded to false for the moment until we support the intra filtering
     500             : #endif
     501             :             transform_size,
     502             :             transform_type,
     503             :             reducedTransformSetFlag);
     504             : 
     505             :     // Transform ebo bit estimation
     506    43900500 :     int32_t eob_cost = get_eob_cost(eob, eobBits, coeff_costs, transform_type);
     507    43883900 :     cost += eob_cost;
     508    43883900 :     if (allow_update_cdf)
     509        9342 :         eb_av1_update_eob_context(eob, transform_size, tx_class,
     510             :             plane_type, ec_ctx, allow_update_cdf);
     511             :     // Transform non-zero coeff bit estimation
     512    43883900 :     eb_av1_get_nz_map_contexts(
     513             :         levels,
     514             :         scan,
     515             :         eob,
     516             :         transform_size,
     517             :         tx_class,
     518             :         coeff_contexts); // NM - Assembly version is available in AOM
     519             : 
     520    43885000 :     if (allow_update_cdf)
     521             :     {
     522      164958 :         for (int c = eob - 1; c >= 0; --c) {
     523      155616 :             const int pos = scan[c];
     524      155616 :             const int coeff_ctx = coeff_contexts[pos];
     525      155616 :             const TranLow v = qcoeff[pos];
     526      155616 :             const TranLow level = abs(v);
     527             : 
     528      155616 :             if (allow_update_cdf) {
     529      155616 :                 if (c == eob - 1) {
     530        9342 :                     assert(coeff_ctx < 4);
     531        9342 :                     update_cdf(
     532        9342 :                         ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx],
     533        9342 :                         AOMMIN(level, 3) - 1, 3);
     534             :                 }
     535             :                 else {
     536      146274 :                     update_cdf(ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx],
     537      146274 :                         AOMMIN(level, 3), 4);
     538             :                 }
     539             :             }
     540             : 
     541             :             {
     542      155616 :                 if (c == eob - 1) {
     543        9342 :                     assert(coeff_ctx < 4);
     544             : #if CONFIG_ENTROPY_STATS
     545             :                     ++td->counts->coeff_base_eob_multi[cdf_idx][txsize_ctx][plane_type]
     546             :                         [coeff_ctx][AOMMIN(level, 3) - 1];
     547             :                 }
     548             :                 else {
     549             :                     ++td->counts->coeff_base_multi[cdf_idx][txsize_ctx][plane_type]
     550             :                         [coeff_ctx][AOMMIN(level, 3)];
     551             : #endif
     552             :                 }
     553             :             }
     554             : 
     555      155616 :             if (level > NUM_BASE_LEVELS) {
     556        6994 :                 const int base_range = level - 1 - NUM_BASE_LEVELS;
     557        6994 :                 const int br_ctx = get_br_ctx(levels, pos, bwl, (const TxType)tx_class);
     558             : 
     559        9457 :                 for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
     560        9315 :                     const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1);
     561        9315 :                     if (allow_update_cdf) {
     562        9315 :                         update_cdf(ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)]
     563        9315 :                             [plane_type][br_ctx],
     564             :                             k, BR_CDF_SIZE);
     565             :                     }
     566       20851 :                     for (int lps = 0; lps < BR_CDF_SIZE - 1; lps++) {
     567             : #if CONFIG_ENTROPY_STATS
     568             :                         ++td->counts->coeff_lps[AOMMIN(txsize_ctx, TX_32X32)][plane_type][lps]
     569             :                             [br_ctx][lps == k];
     570             : #endif  // CONFIG_ENTROPY_STATS
     571       18388 :                         if (lps == k) break;
     572             :                     }
     573             : #if CONFIG_ENTROPY_STATS
     574             :                     ++td->counts->coeff_lps_multi[cdf_idx][AOMMIN(txsize_ctx, TX_32X32)]
     575             :                         [plane_type][br_ctx][k];
     576             : #endif
     577        9315 :                     if (k < BR_CDF_SIZE - 1) break;
     578             :                 }
     579             :             }
     580             :         }
     581             : 
     582        9342 :         if (qcoeff[0] != 0) {
     583        6978 :             const int dc_sign = (qcoeff[0] < 0) ? 1 : 0;
     584        6978 :             if (allow_update_cdf)
     585        6978 :                 update_cdf(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], dc_sign, 2);
     586             :         }
     587             : 
     588             :         //TODO: CHKN  for 128x128 where we need more than one TXb, we need to update the txb_context(dc_sign+skip_ctx) in a Txb basis.
     589             : 
     590        9342 :         return 0;
     591             :     }
     592             : 
     593    43875700 :     cost += av1_cost_coeffs_txb_loop_cost_eob(eob, scan, qcoeff,
     594             :         coeff_contexts, coeff_costs, dc_sign_ctx, levels, bwl, transform_type);
     595             : 
     596    43835200 :     return cost;
     597             : }
     598             : #if FILTER_INTRA_FLAG
     599             :  int av1_filter_intra_allowed_bsize(uint8_t enable_filter_intra, BlockSize bs);
     600             : #if PAL_SUP
     601             :  int av1_filter_intra_allowed(
     602             :      uint8_t   enable_filter_intra,
     603             :      BlockSize bsize,
     604             :      uint8_t   palette_size,
     605             :      uint32_t  mode);
     606             : #else
     607             :  int av1_filter_intra_allowed(uint8_t   enable_filter_intra, BlockSize bsize, uint32_t  mode);
     608             : #endif
     609             : #endif
     610             : /*static*/ void model_rd_from_sse(
     611             :     BlockSize bsize,
     612             :     int16_t quantizer,
     613             :     //const Av1Comp *const cpi,
     614             :     //const MacroBlockD *const xd,
     615             :     //BlockSize bsize,
     616             :     //int32_t plane,
     617             :     uint64_t sse,
     618             :     uint32_t *rate,
     619             :     uint64_t *dist);
     620             : 
     621   241050000 : uint64_t av1_intra_fast_cost(
     622             :     CodingUnit            *cu_ptr,
     623             :     ModeDecisionCandidate *candidate_ptr,
     624             :     uint32_t                 qp,
     625             :     uint64_t                 luma_distortion,
     626             :     uint64_t                 chroma_distortion,
     627             :     uint64_t                 lambda,
     628             :     EbBool                   use_ssd,
     629             :     PictureControlSet     *picture_control_set_ptr,
     630             :     CandidateMv             *ref_mv_stack,
     631             :     const BlockGeom         *blk_geom,
     632             :     uint32_t                 miRow,
     633             :     uint32_t                 miCol,
     634             :     uint8_t                 md_pass,
     635             :     uint32_t                 left_neighbor_mode,
     636             :     uint32_t                 top_neighbor_mode)
     637             : 
     638             : {
     639             :     UNUSED(qp);
     640             :     UNUSED(ref_mv_stack);
     641             :     UNUSED(miRow);
     642             :     UNUSED(miCol);
     643             :     UNUSED(left_neighbor_mode);
     644             :     UNUSED(top_neighbor_mode);
     645             :     UNUSED(md_pass);
     646             : 
     647   241050000 :     FrameHeader *frm_hdr = &picture_control_set_ptr->parent_pcs_ptr->frm_hdr;
     648   241050000 :     if (av1_allow_intrabc(picture_control_set_ptr->parent_pcs_ptr->av1_cm) && candidate_ptr->use_intrabc) {
     649           0 :         uint64_t lumaSad = (LUMA_WEIGHT * luma_distortion) << AV1_COST_PRECISION;
     650           0 :         uint64_t chromaSad = chroma_distortion << AV1_COST_PRECISION;
     651           0 :         uint64_t totalDistortion = lumaSad + chromaSad;
     652             : 
     653           0 :         uint64_t rate = 0;
     654             : 
     655           0 :         EbReflist refListIdx = 0;
     656           0 :         int16_t predRefX = candidate_ptr->motion_vector_pred_x[refListIdx];
     657           0 :         int16_t predRefY = candidate_ptr->motion_vector_pred_y[refListIdx];
     658           0 :         int16_t mvRefX = candidate_ptr->motion_vector_xl0;
     659           0 :         int16_t mvRefY = candidate_ptr->motion_vector_yl0;
     660             :         MV mv;
     661           0 :         mv.row = mvRefY;
     662           0 :         mv.col = mvRefX;
     663             :         MV ref_mv;
     664           0 :         ref_mv.row = predRefY;
     665           0 :         ref_mv.col = predRefX;
     666             : 
     667           0 :         int *dvcost[2] = { (int *)&candidate_ptr->md_rate_estimation_ptr->dv_cost[0][MV_MAX],
     668           0 :                            (int *)&candidate_ptr->md_rate_estimation_ptr->dv_cost[1][MV_MAX] };
     669             : 
     670           0 :         int32_t mvRate = eb_av1_mv_bit_cost(
     671             :             &mv,
     672             :             &ref_mv,
     673           0 :             candidate_ptr->md_rate_estimation_ptr->dv_joint_cost,
     674             :             dvcost, MV_COST_WEIGHT_SUB);
     675             : 
     676           0 :         rate = mvRate + candidate_ptr->md_rate_estimation_ptr->intrabc_fac_bits[candidate_ptr->use_intrabc];
     677             : 
     678           0 :         candidate_ptr->fast_luma_rate = rate;
     679           0 :         candidate_ptr->fast_chroma_rate = 0;
     680             : 
     681           0 :         lumaSad = (LUMA_WEIGHT * luma_distortion) << AV1_COST_PRECISION;
     682           0 :         chromaSad = chroma_distortion << AV1_COST_PRECISION;
     683           0 :         totalDistortion = lumaSad + chromaSad;
     684             : 
     685           0 :         return(RDCOST(lambda, rate, totalDistortion));
     686             :     }
     687             :     else {
     688   241021000 :     EbBool isMonochromeFlag = EB_FALSE; // NM - isMonochromeFlag is harcoded to false.
     689   241021000 :     EbBool isCflAllowed = (blk_geom->bwidth <= 32 && blk_geom->bheight <= 32) ? 1 : 0;
     690             : 
     691   241021000 :     uint8_t   subSamplingX = 1; // NM - subsampling_x is harcoded to 1 for 420 chroma sampling.
     692   241021000 :     uint8_t   subSamplingY = 1; // NM - subsampling_y is harcoded to 1 for 420 chroma sampling.
     693             :     // In fast loop CFL alphas are not know yet. The chroma mode bits are calculated based on DC Mode, and if CFL is the winner compared to CFL, ChromaBits are updated
     694   241021000 :     uint32_t chroma_mode = candidate_ptr->intra_chroma_mode == UV_CFL_PRED ? UV_DC_PRED : candidate_ptr->intra_chroma_mode;
     695             : 
     696             :     // Number of bits for each synatax element
     697   241021000 :     uint64_t intraModeBitsNum = 0;
     698   241021000 :     uint64_t intraLumaModeBitsNum = 0;
     699   241021000 :     uint64_t intraLumaAngModeBitsNum = 0;
     700             : #if FILTER_INTRA_FLAG
     701   241021000 :     uint64_t intra_filter_mode_bits_num = 0;
     702             : #endif
     703   241021000 :     uint64_t intraChromaModeBitsNum = 0;
     704   241021000 :     uint64_t intraChromaAngModeBitsNum = 0;
     705   241021000 :     uint64_t skipModeRate = 0;
     706   241021000 :     uint8_t  skipModeCtx = cu_ptr->skip_flag_context; // NM - Harcoded to 1 until the skip_mode context is added.
     707   241021000 :     PredictionMode intra_mode = (PredictionMode)candidate_ptr->pred_mode;
     708             :     // Luma and chroma rate
     709             :     uint32_t rate;
     710   241021000 :     uint32_t lumaRate = 0;
     711   241021000 :     uint32_t chromaRate = 0;
     712             :     uint64_t lumaSad, chromaSad;
     713             : 
     714             :     // Luma and chroma distortion
     715             :     uint64_t totalDistortion;
     716   241021000 :     const int32_t AboveCtx = intra_mode_context[top_neighbor_mode];
     717   241021000 :     const int32_t LeftCtx = intra_mode_context[left_neighbor_mode];
     718   241021000 :     intraModeBitsNum = picture_control_set_ptr->slice_type != I_SLICE ? (uint64_t)candidate_ptr->md_rate_estimation_ptr->mb_mode_fac_bits[size_group_lookup[blk_geom->bsize]][intra_mode] : ZERO_COST;
     719   241021000 :     skipModeRate = picture_control_set_ptr->slice_type != I_SLICE ? (uint64_t)candidate_ptr->md_rate_estimation_ptr->skip_mode_fac_bits[skipModeCtx][0] : ZERO_COST;
     720             : 
     721             :     // Estimate luma nominal intra mode bits
     722   241021000 :     intraLumaModeBitsNum = picture_control_set_ptr->slice_type == I_SLICE ? (uint64_t)candidate_ptr->md_rate_estimation_ptr->y_mode_fac_bits[AboveCtx][LeftCtx][intra_mode] : ZERO_COST;
     723             :     // Estimate luma angular mode bits
     724   241021000 :     if (blk_geom->bsize >= BLOCK_8X8 && candidate_ptr->is_directional_mode_flag) {
     725   217096000 :         assert((intra_mode - V_PRED) < 8);
     726   217096000 :         assert((intra_mode - V_PRED) >= 0);
     727   217096000 :         intraLumaAngModeBitsNum = candidate_ptr->md_rate_estimation_ptr->angle_delta_fac_bits[intra_mode - V_PRED][MAX_ANGLE_DELTA + candidate_ptr->angle_delta[PLANE_TYPE_Y]];
     728             :     }
     729             : #if PAL_SUP
     730   241021000 :     if (av1_allow_palette(picture_control_set_ptr->parent_pcs_ptr->frm_hdr.allow_screen_content_tools, blk_geom->bsize) && intra_mode == DC_PRED) {
     731           0 :         const int use_palette = candidate_ptr->palette_info.pmi.palette_size[0] > 0;
     732           0 :         const int bsize_ctx = av1_get_palette_bsize_ctx(blk_geom->bsize);
     733           0 :         const int mode_ctx = av1_get_palette_mode_ctx(cu_ptr->av1xd);
     734           0 :         intraLumaModeBitsNum += candidate_ptr->md_rate_estimation_ptr->palette_ymode_fac_bits[bsize_ctx][mode_ctx][use_palette];
     735           0 :         if (use_palette) {
     736           0 :             const uint8_t *const color_map = candidate_ptr->palette_info.color_idx_map;
     737             :             int block_width, block_height, rows, cols;
     738           0 :             av1_get_block_dimensions(blk_geom->bsize, 0, cu_ptr->av1xd, &block_width, &block_height, &rows,
     739             :                 &cols);
     740           0 :             const int plt_size = candidate_ptr->palette_info.pmi.palette_size[0];
     741           0 :             int palette_mode_cost =
     742           0 :                 candidate_ptr->md_rate_estimation_ptr->palette_ysize_fac_bits[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
     743           0 :                 write_uniform_cost(plt_size, color_map[0]);
     744             :             uint16_t color_cache[2 * PALETTE_MAX_SIZE];
     745           0 :             const int n_cache = eb_get_palette_cache(cu_ptr->av1xd, 0, color_cache);
     746           0 :             palette_mode_cost +=
     747           0 :                 av1_palette_color_cost_y(&candidate_ptr->palette_info.pmi, color_cache,
     748             :                     n_cache, EB_8BIT);
     749           0 :             palette_mode_cost +=
     750           0 :                 av1_cost_color_map(&candidate_ptr->palette_info, candidate_ptr->md_rate_estimation_ptr, cu_ptr, 0, blk_geom->bsize, PALETTE_MAP);
     751           0 :             intraLumaModeBitsNum += palette_mode_cost;
     752             :         }
     753             :     }
     754             : #endif
     755             : #if FILTER_INTRA_FLAG
     756             : #if PAL_SUP
     757   241364000 :     if (av1_filter_intra_allowed(picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.enable_filter_intra, blk_geom->bsize, candidate_ptr->palette_info.pmi.palette_size[0], intra_mode)) {
     758             : #else
     759             :     if (av1_filter_intra_allowed(picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.enable_filter_intra, blk_geom->bsize, intra_mode)) {
     760             : #endif
     761     4632700 :        intra_filter_mode_bits_num = candidate_ptr->md_rate_estimation_ptr->filter_intra_fac_bits[blk_geom->bsize][candidate_ptr->filter_intra_mode != FILTER_INTRA_MODES];
     762     4632700 :         if (candidate_ptr->filter_intra_mode != FILTER_INTRA_MODES) {
     763     1048440 :             intra_filter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->filter_intra_mode_fac_bits[candidate_ptr->filter_intra_mode];
     764             :         }
     765             :     }
     766             : #endif
     767             : 
     768   241338000 :     if (blk_geom->has_uv) {
     769   239762000 :         if (!isMonochromeFlag && is_chroma_reference(miRow, miCol, blk_geom->bsize, subSamplingX, subSamplingY)) {
     770             :             // Estimate luma nominal intra mode bits
     771   239416000 :             intraChromaModeBitsNum = (uint64_t)candidate_ptr->md_rate_estimation_ptr->intra_uv_mode_fac_bits[isCflAllowed][intra_mode][chroma_mode];
     772             :             // Estimate luma angular mode bits
     773   239416000 :             if (blk_geom->bsize >= BLOCK_8X8 && candidate_ptr->is_directional_chroma_mode_flag) {
     774   210981000 :                 intraChromaAngModeBitsNum = candidate_ptr->md_rate_estimation_ptr->angle_delta_fac_bits[chroma_mode - V_PRED][MAX_ANGLE_DELTA + candidate_ptr->angle_delta[PLANE_TYPE_UV]];
     775             :             }
     776             : #if PAL_SUP
     777   239416000 :             if (av1_allow_palette(picture_control_set_ptr->parent_pcs_ptr->frm_hdr.allow_screen_content_tools, blk_geom->bsize) && chroma_mode == UV_DC_PRED) {
     778           0 :                 const PaletteModeInfo *pmi = &candidate_ptr->palette_info.pmi;
     779           0 :                 const int use_palette = pmi->palette_size[1] > 0;
     780           0 :                 intraChromaAngModeBitsNum +=
     781           0 :                     candidate_ptr->md_rate_estimation_ptr->palette_uv_mode_fac_bits[pmi->palette_size[0] > 0][use_palette];
     782             :             }
     783             : #endif
     784             :         }
     785             :     }
     786             : 
     787   241202000 :     uint32_t isInterRate = picture_control_set_ptr->slice_type != I_SLICE ? candidate_ptr->md_rate_estimation_ptr->intra_inter_fac_bits[cu_ptr->is_inter_ctx][0] : 0;
     788             : #if FILTER_INTRA_FLAG
     789   241202000 :     lumaRate = (uint32_t)(intraModeBitsNum + skipModeRate + intraLumaModeBitsNum + intraLumaAngModeBitsNum + isInterRate + intra_filter_mode_bits_num);
     790             : #else
     791             :     lumaRate = (uint32_t)(intraModeBitsNum + skipModeRate + intraLumaModeBitsNum + intraLumaAngModeBitsNum + isInterRate);
     792             : #endif
     793   241202000 :     if (av1_allow_intrabc(picture_control_set_ptr->parent_pcs_ptr->av1_cm))
     794           0 :         lumaRate += candidate_ptr->md_rate_estimation_ptr->intrabc_fac_bits[candidate_ptr->use_intrabc];
     795             : 
     796   240986000 :     chromaRate = (uint32_t)(intraChromaModeBitsNum + intraChromaAngModeBitsNum);
     797             : 
     798             :     // Keep the Fast Luma and Chroma rate for future use
     799   240986000 :     candidate_ptr->fast_luma_rate = lumaRate;
     800   240986000 :     candidate_ptr->fast_chroma_rate = chromaRate;
     801   240986000 :     if (use_ssd) {
     802           0 :         int32_t current_q_index = frm_hdr->quantization_params.base_q_idx;
     803           0 :         Dequants *const dequants = &picture_control_set_ptr->parent_pcs_ptr->deq;
     804             : 
     805           0 :         int16_t quantizer = dequants->y_dequant_Q3[current_q_index][1];
     806           0 :         rate = 0;
     807           0 :         model_rd_from_sse(
     808           0 :             blk_geom->bsize,
     809             :             quantizer,
     810             :             luma_distortion,
     811             :             &rate,
     812             :             &lumaSad);
     813           0 :         lumaRate += rate;
     814           0 :         totalDistortion = lumaSad;
     815             : 
     816           0 :         rate = 0;
     817           0 :         model_rd_from_sse(
     818           0 :             blk_geom->bsize_uv,
     819             :             quantizer,
     820             :             chroma_distortion,
     821             :             &chromaRate,
     822             :             &chromaSad);
     823           0 :         chromaRate += rate;
     824           0 :         totalDistortion += chromaSad;
     825             : 
     826           0 :         rate = lumaRate + chromaRate;
     827             : 
     828           0 :         return(RDCOST(lambda, rate, totalDistortion));
     829             :     }
     830             :     else {
     831   240986000 :         lumaSad = (LUMA_WEIGHT * luma_distortion) << AV1_COST_PRECISION;
     832   240986000 :         chromaSad = chroma_distortion << AV1_COST_PRECISION;
     833   240986000 :         totalDistortion = lumaSad + chromaSad;
     834             : 
     835   240986000 :         rate = lumaRate + chromaRate;
     836             : 
     837             :         // Assign fast cost
     838   240986000 :         return(RDCOST(lambda, rate, totalDistortion));
     839             :     }
     840             :     }
     841             : }
     842             : 
     843             : //extern INLINE int32_t have_newmv_in_inter_mode(PredictionMode mode);
     844   106130000 : static INLINE int32_t have_newmv_in_inter_mode(PredictionMode mode) {
     845    98157700 :     return (mode == NEWMV || mode == NEW_NEWMV || mode == NEAREST_NEWMV ||
     846   204288000 :         mode == NEW_NEARESTMV || mode == NEAR_NEWMV || mode == NEW_NEARMV);
     847             : }
     848             : 
     849             : extern void av1_set_ref_frame(MvReferenceFrame *rf,
     850             :     int8_t ref_frame_type);
     851             : 
     852   173456000 : static INLINE int has_second_ref(const MbModeInfo *mbmi) {
     853   173456000 :     return mbmi->block_mi.ref_frame[1] > INTRA_FRAME;
     854             : }
     855             : 
     856    71921200 : static INLINE int has_uni_comp_refs(const MbModeInfo *mbmi) {
     857   143841000 :     return has_second_ref(mbmi) && (!((mbmi->block_mi.ref_frame[0] >= BWDREF_FRAME) ^
     858    71919700 :         (mbmi->block_mi.ref_frame[1] >= BWDREF_FRAME)));
     859             : }
     860             : 
     861             : // This function encodes the reference frame
     862   106443000 : uint64_t EstimateRefFramesNumBits(
     863             :     PictureControlSet                    *picture_control_set_ptr,
     864             :     ModeDecisionCandidate                *candidate_ptr,
     865             :     CodingUnit                           *cu_ptr,
     866             :     uint32_t                                 bwidth,
     867             :     uint32_t                                 bheight,
     868             :     uint8_t                                  ref_frame_type,
     869             :     uint8_t                                   md_pass,
     870             :     EbBool                                is_compound)
     871             : {
     872             : 
     873   106443000 :     FrameHeader *frm_hdr = &picture_control_set_ptr->parent_pcs_ptr->frm_hdr;
     874   106443000 :     uint64_t refRateBits = 0;
     875             : 
     876   106443000 :     if (md_pass == 1) {
     877   102656000 :         uint64_t refRateA = 0;
     878   102656000 :         uint64_t refRateB = 0;
     879   102656000 :         uint64_t refRateC = 0;
     880   102656000 :         uint64_t refRateD = 0;
     881   102656000 :         uint64_t refRateE = 0;
     882   102656000 :         uint64_t refRateF = 0;
     883   102656000 :         uint64_t refRateG = 0;
     884   102656000 :         uint64_t refRateH = 0;
     885   102656000 :         uint64_t refRateI = 0;
     886   102656000 :         uint64_t refRateJ = 0;
     887   102656000 :         uint64_t refRateK = 0;
     888   102656000 :         uint64_t refRateL = 0;
     889   102656000 :         uint64_t refRateM = 0;
     890   102656000 :         uint64_t refRateN = 0;
     891   102656000 :         uint64_t refRateO = 0;
     892   102656000 :         uint64_t refRateP = 0;
     893             :         // const MbModeInfo *const mbmi = &cu_ptr->av1xd->mi[0]->mbmi;
     894   102656000 :         MbModeInfo *const mbmi = &cu_ptr->av1xd->mi[0]->mbmi;
     895             :         MvReferenceFrame refType[2];
     896   102656000 :         av1_set_ref_frame(refType, ref_frame_type);
     897   102683000 :         mbmi->block_mi.ref_frame[0] = refType[0];
     898   102683000 :         mbmi->block_mi.ref_frame[1] = refType[1];
     899             :         //const int is_compound = has_second_ref(mbmi);
     900             :         {
     901             :             // does the feature use compound prediction or not
     902             :             // (if not specified at the frame/segment level)
     903   102683000 :             if (frm_hdr->reference_mode == REFERENCE_MODE_SELECT) {
     904   101555000 :                 if (MIN(bwidth, bheight) >= 8) {
     905             :                     //aom_write_symbol(w, is_compound, av1_get_reference_mode_cdf(cu_ptr->av1xd), 2);
     906    96824400 :                     int32_t context = av1_get_reference_mode_context_new(cu_ptr->av1xd);
     907    96684300 :                     refRateA = candidate_ptr->md_rate_estimation_ptr->comp_inter_fac_bits[context][is_compound];
     908             :                 }
     909             :             }
     910             :             else {
     911     1128420 :                 assert((!is_compound) ==
     912             :                     (frm_hdr->reference_mode == SINGLE_REFERENCE));
     913             :             }
     914             : 
     915   102543000 :             if (is_compound) {
     916    71940200 :                 const CompReferenceType comp_ref_type = has_uni_comp_refs(mbmi)
     917             :                     ? UNIDIR_COMP_REFERENCE
     918    71929600 :                     : BIDIR_COMP_REFERENCE;
     919             : 
     920    71929600 :                 const int pred_context = av1_get_comp_reference_type_context_new(cu_ptr->av1xd);
     921    71931400 :                 refRateB = candidate_ptr->md_rate_estimation_ptr->comp_ref_type_fac_bits[pred_context][comp_ref_type];
     922             :                 /*aom_write_symbol(w, comp_ref_type, av1_get_comp_reference_type_cdf(cu_ptr->av1xd),
     923             :                     2);*/
     924             : 
     925    71931400 :                 if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
     926             :                     //printf("ERROR[AN]: UNIDIR_COMP_REFERENCE not supported\n");
     927    15853800 :                     const int bit = mbmi->block_mi.ref_frame[0] == BWDREF_FRAME;
     928             : 
     929    15853800 :                     const int pred_context = eb_av1_get_pred_context_uni_comp_ref_p(cu_ptr->av1xd);
     930    15854100 :                     refRateC = candidate_ptr->md_rate_estimation_ptr->uni_comp_ref_fac_bits[pred_context][0][bit];
     931             :                     //cu_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][0];
     932             :                     //WRITE_REF_BIT(bit, uni_comp_ref_p);
     933             : 
     934    15854100 :                     if (!bit) {
     935    13946500 :                         assert(mbmi->block_mi.ref_frame[0] == LAST_FRAME);
     936    22326100 :                         const int bit1 = mbmi->block_mi.ref_frame[1] == LAST3_FRAME ||
     937     8379630 :                             mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME;
     938    13946500 :                         const int pred_context = eb_av1_get_pred_context_uni_comp_ref_p1(cu_ptr->av1xd);
     939    13946800 :                         refRateD = candidate_ptr->md_rate_estimation_ptr->uni_comp_ref_fac_bits[pred_context][1][bit1];
     940             :                         //refRateD = cu_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][1];
     941             :                         //WRITE_REF_BIT(bit1, uni_comp_ref_p1);
     942    13946800 :                         if (bit1) {
     943     8167340 :                             const int bit2 = mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME;
     944     8167340 :                             const int pred_context = eb_av1_get_pred_context_uni_comp_ref_p2(cu_ptr->av1xd);
     945     8167660 :                             refRateE = candidate_ptr->md_rate_estimation_ptr->uni_comp_ref_fac_bits[pred_context][2][bit2];
     946             : 
     947             :                             // refRateE = cu_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][2];
     948             :                              //WRITE_REF_BIT(bit2, uni_comp_ref_p2);
     949             :                         }
     950             :                     }
     951             :                     //else {
     952             :                     //    assert(mbmi->block_mi.ref_frame[1] == ALTREF_FRAME);
     953             :                     //}
     954    15854700 :                     refRateBits = refRateA + refRateB + refRateC + refRateD + refRateE + refRateF + refRateG + refRateH + refRateI + refRateJ + refRateK + refRateL + refRateM;
     955    15854700 :                     return refRateBits;
     956             :                     //return;
     957             :                 }
     958             : 
     959    56077600 :                 assert(comp_ref_type == BIDIR_COMP_REFERENCE);
     960             : 
     961   106458000 :                 const int bit = (mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME ||
     962    50380600 :                     mbmi->block_mi.ref_frame[0] == LAST3_FRAME);
     963    56077600 :                 const int pred_ctx = eb_av1_get_pred_context_comp_ref_p(cu_ptr->av1xd);
     964    56060000 :                 refRateF = candidate_ptr->md_rate_estimation_ptr->comp_ref_fac_bits[pred_ctx][0][bit];
     965             :                 //refRateF = cu_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_ctx][0];
     966             :                 //WRITE_REF_BIT(bit, comp_ref_p);
     967             : 
     968    56060000 :                 if (!bit) {
     969    38210500 :                     const int bit1 = mbmi->block_mi.ref_frame[0] == LAST2_FRAME;
     970    38210500 :                     const int pred_context = eb_av1_get_pred_context_comp_ref_p1(cu_ptr->av1xd);
     971    38206300 :                     refRateG = candidate_ptr->md_rate_estimation_ptr->comp_ref_fac_bits[pred_context][1][bit1];
     972             :                     //refRateG = cu_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_context][1];
     973             :                     //WRITE_REF_BIT(bit1, comp_ref_p1);
     974             :                 }
     975             :                 else {
     976    17849500 :                     const int bit2 = mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME;
     977    17849500 :                     const int pred_context = eb_av1_get_pred_context_comp_ref_p2(cu_ptr->av1xd);
     978    17935400 :                     refRateH = candidate_ptr->md_rate_estimation_ptr->comp_ref_fac_bits[pred_context][2][bit2];
     979             :                     //refRateH = cu_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_context][2];
     980             :                     //WRITE_REF_BIT(bit2, comp_ref_p2);
     981             :                 }
     982             : 
     983    56141700 :                 const int bit_bwd = mbmi->block_mi.ref_frame[1] == ALTREF_FRAME;
     984    56141700 :                 const int pred_ctx_2 = eb_av1_get_pred_context_comp_bwdref_p(cu_ptr->av1xd);
     985    56045300 :                 refRateI = candidate_ptr->md_rate_estimation_ptr->comp_bwd_ref_fac_bits[pred_ctx_2][0][bit_bwd];
     986             :                 //refRateI = cu_ptr->av1xd->tile_ctx->comp_bwdref_cdf[pred_ctx_2][0];
     987             :                 //WRITE_REF_BIT(bit_bwd, comp_bwdref_p);
     988             : 
     989    56045300 :                 if (!bit_bwd) {
     990    50368400 :                     const int pred_context = eb_av1_get_pred_context_comp_bwdref_p1(cu_ptr->av1xd);
     991    50380600 :                     refRateJ = candidate_ptr->md_rate_estimation_ptr->comp_bwd_ref_fac_bits[pred_context][1][refType[1] == ALTREF2_FRAME];
     992             :                     //refRateJ = cu_ptr->av1xd->tile_ctx->comp_bwdref_cdf[pred_context][1];
     993             :                     //WRITE_REF_BIT(mbmi->block_mi.ref_frame[1] == ALTREF2_FRAME, comp_bwdref_p1);
     994             :                 }
     995             :             }
     996             :             else {
     997    61384800 :                 const int bit0 = (mbmi->block_mi.ref_frame[0] <= ALTREF_FRAME &&
     998    30781700 :                     mbmi->block_mi.ref_frame[0] >= BWDREF_FRAME);
     999    30603200 :                 refRateK = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[eb_av1_get_pred_context_single_ref_p1(cu_ptr->av1xd)][0][bit0];
    1000             :                 //refRateK = cu_ptr->av1xd->tile_ctx->single_ref_cdf[eb_av1_get_pred_context_single_ref_p1(cu_ptr->av1xd)][0];
    1001             :                 //WRITE_REF_BIT(bit0, single_ref_p1);
    1002             : 
    1003    30780400 :                 if (bit0) {
    1004    11650800 :                     const int bit1 = mbmi->block_mi.ref_frame[0] == ALTREF_FRAME;
    1005    11650800 :                     refRateL = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[eb_av1_get_pred_context_single_ref_p2(cu_ptr->av1xd)][1][bit1];
    1006             :                     //refRateL = cu_ptr->av1xd->tile_ctx->single_ref_cdf[eb_av1_get_pred_context_single_ref_p2(cu_ptr->av1xd)][1];
    1007             :                     //WRITE_REF_BIT(bit1, single_ref_p2);
    1008    11675700 :                     if (!bit1) {
    1009    10611000 :                         refRateM = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[eb_av1_get_pred_context_single_ref_p6(cu_ptr->av1xd)][5][ref_frame_type == ALTREF2_FRAME];
    1010             :                         //refRateM = cu_ptr->av1xd->tile_ctx->single_ref_cdf[eb_av1_get_pred_context_single_ref_p6(cu_ptr->av1xd)][5];
    1011             :                         //WRITE_REF_BIT(mbmi->block_mi.ref_frame[0] == ALTREF2_FRAME, single_ref_p6);
    1012             :                     }
    1013             :                 }
    1014             :                 else {
    1015    34371200 :                     const int bit2 = (mbmi->block_mi.ref_frame[0] == LAST3_FRAME ||
    1016    15241500 :                         mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME);
    1017    19129600 :                     refRateN = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[eb_av1_get_pred_context_single_ref_p3(cu_ptr->av1xd)][2][bit2];
    1018             :                     //refRateN = cu_ptr->av1xd->tile_ctx->single_ref_cdf[eb_av1_get_pred_context_single_ref_p3(cu_ptr->av1xd)][2];
    1019             :                     //WRITE_REF_BIT(bit2, single_ref_p3);
    1020    19121500 :                     if (!bit2) {
    1021    12968000 :                         const int bit3 = mbmi->block_mi.ref_frame[0] != LAST_FRAME;
    1022    12968000 :                         refRateO = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[eb_av1_get_pred_context_single_ref_p4(cu_ptr->av1xd)][3][bit3];
    1023             :                         //refRateO = cu_ptr->av1xd->tile_ctx->single_ref_cdf[eb_av1_get_pred_context_single_ref_p4(cu_ptr->av1xd)][3];
    1024             :                         //WRITE_REF_BIT(bit3, single_ref_p4);
    1025             :                     }
    1026             :                     else {
    1027     6153560 :                         const int bit4 = mbmi->block_mi.ref_frame[0] != LAST3_FRAME;
    1028     6153560 :                         refRateP = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[eb_av1_get_pred_context_single_ref_p5(cu_ptr->av1xd)][4][bit4];
    1029             :                         //refRateP = cu_ptr->av1xd->tile_ctx->single_ref_cdf[eb_av1_get_pred_context_single_ref_p5(cu_ptr->av1xd)][4];
    1030             :                         //WRITE_REF_BIT(bit4, single_ref_p5);
    1031             :                     }
    1032             :                 }
    1033             :             }
    1034             :         }
    1035    86605600 :         refRateBits = refRateA + refRateB + refRateC + refRateD + refRateE + refRateF + refRateG + refRateH + refRateI +
    1036    86605600 :             refRateJ + refRateK + refRateL + refRateM + refRateN + refRateO + refRateP;
    1037             :     }
    1038             :     else {
    1039     3787850 :     uint64_t refRateA = 0;
    1040     3787850 :     uint64_t refRateB = 0;
    1041     3787850 :     uint64_t refRateC = 0;
    1042     3787850 :     uint64_t refRateD = 0;
    1043     3787850 :     uint64_t refRateE = 0;
    1044     3787850 :     uint64_t refRateF = 0;
    1045     3787850 :     uint64_t refRateG = 0;
    1046     3787850 :     uint64_t refRateH = 0;
    1047     3787850 :     uint64_t refRateI = 0;
    1048     3787850 :     uint64_t refRateJ = 0;
    1049     3787850 :     uint64_t refRateK = 0;
    1050     3787850 :     uint64_t refRateL = 0;
    1051     3787850 :     uint64_t refRateM = 0;
    1052             : 
    1053             :     // If segment level coding of this signal is disabled...
    1054             :     // or the segment allows multiple reference frame options
    1055             :     /*if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
    1056             :     assert(!is_compound);
    1057             :     assert(mbmi->block_mi.ref_frame[0] ==
    1058             :     get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
    1059             :     }
    1060             :     else if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP) ||
    1061             :     segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) {
    1062             :     assert(!is_compound);
    1063             :     assert(mbmi->block_mi.ref_frame[0] == LAST_FRAME);
    1064             :     }
    1065             :     else*/ {
    1066             :     // does the feature use compound prediction or not
    1067             :     // (if not specified at the frame/segment level)
    1068     3787850 :         if (frm_hdr->reference_mode == REFERENCE_MODE_SELECT) {
    1069     3622570 :             if (MIN(bwidth, bheight) >= 8) {
    1070     1639040 :                 int32_t context = 0;
    1071     1639040 :                 context = cu_ptr->reference_mode_context;
    1072     1639040 :                 assert(context >= 0 && context < 5);
    1073     1639070 :                 refRateA = candidate_ptr->md_rate_estimation_ptr->comp_inter_fac_bits[context][is_compound];
    1074             :             }
    1075             :         }
    1076             :         else
    1077      165285 :             assert((!is_compound) == (frm_hdr->reference_mode == SINGLE_REFERENCE));
    1078     3787870 :         int32_t context = 0;
    1079     3787870 :         if (is_compound) {
    1080      191394 :             const CompReferenceType comp_ref_type = /*has_uni_comp_refs(mbmi)
    1081             :                                                       ? UNIDIR_COMP_REFERENCE
    1082             :                                                       : */BIDIR_COMP_REFERENCE;
    1083             :             MvReferenceFrame refType[2];
    1084      191394 :             av1_set_ref_frame(refType, ref_frame_type);
    1085             : 
    1086      191277 :             context = cu_ptr->compoud_reference_type_context;
    1087      191277 :             assert(context >= 0 && context < 5);
    1088      191275 :             refRateB = candidate_ptr->md_rate_estimation_ptr->comp_ref_type_fac_bits[context][comp_ref_type];
    1089             : 
    1090      191275 :             if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
    1091           0 :                 printf("ERROR[AN]: UNIDIR_COMP_REFERENCE not supported\n");
    1092             :                 //const int32_t bit = mbmi->block_mi.ref_frame[0] == BWDREF_FRAME;
    1093             :                 //WRITE_REF_BIT(bit, uni_comp_ref_p);
    1094             : 
    1095             :                 //if (!bit) {
    1096             :                 //    assert(mbmi->block_mi.ref_frame[0] == LAST_FRAME);
    1097             :                 //    const int32_t bit1 = mbmi->block_mi.ref_frame[1] == LAST3_FRAME ||
    1098             :                 //        mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME;
    1099             :                 //    WRITE_REF_BIT(bit1, uni_comp_ref_p1);
    1100             :                 //    if (bit1) {
    1101             :                 //        const int32_t bit2 = mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME;
    1102             :                 //        WRITE_REF_BIT(bit2, uni_comp_ref_p2);
    1103             :                 //    }
    1104             :                 //}
    1105             :                 //else {
    1106             :                 //    assert(mbmi->block_mi.ref_frame[1] == ALTREF_FRAME);
    1107             :                 //}
    1108             : 
    1109             :                 //return;
    1110             :             }
    1111             : 
    1112      191273 :             assert(comp_ref_type == BIDIR_COMP_REFERENCE);
    1113             : 
    1114      382538 :             const int32_t bit = (refType[0] == GOLDEN_FRAME ||
    1115      191265 :                 refType[0] == LAST3_FRAME);
    1116             : 
    1117      191273 :             context = eb_av1_get_pred_context_comp_ref_p(cu_ptr->av1xd);
    1118      191177 :             assert(context >= 0 && context < 3);
    1119      191179 :             refRateC = candidate_ptr->md_rate_estimation_ptr->comp_ref_fac_bits[context][0][bit];
    1120             :             //            WRITE_REF_BIT(bit, comp_ref_p);
    1121             : 
    1122      191179 :             if (!bit) {
    1123      191188 :                 const int32_t bit1 = (refType[0] == LAST2_FRAME);
    1124      191188 :                 context = eb_av1_get_pred_context_comp_ref_p1(cu_ptr->av1xd);
    1125             :                 /*aom_write_symbol(ec_writer, bit1, frameContext->comp_ref_cdf[context][1],
    1126             :                     2);*/
    1127      191082 :                 assert(context >= 0 && context < 3);
    1128      191083 :                 refRateD = candidate_ptr->md_rate_estimation_ptr->comp_ref_fac_bits[context][1][bit1];
    1129             : 
    1130             :                 //WRITE_REF_BIT(bit1, comp_ref_p1);
    1131             :             }
    1132             :             else {
    1133           0 :                 const int32_t bit2 = (refType[0] == GOLDEN_FRAME);
    1134           0 :                 context = eb_av1_get_pred_context_comp_ref_p2(cu_ptr->av1xd);
    1135             :                 /*aom_write_symbol(ec_writer, bit2, frameContext->comp_ref_cdf[context][2],
    1136             :                     2);*/
    1137           0 :                 assert(context >= 0 && context < 3);
    1138           0 :                 refRateE = candidate_ptr->md_rate_estimation_ptr->comp_ref_fac_bits[context][2][bit2];
    1139             : 
    1140             :                 //WRITE_REF_BIT(bit2, comp_ref_p2);
    1141             :             }
    1142             : 
    1143      191083 :             const int32_t bit_bwd = (refType[1] == ALTREF_FRAME);
    1144      191083 :             context = eb_av1_get_pred_context_comp_bwdref_p(cu_ptr->av1xd);
    1145             :             /*aom_write_symbol(ec_writer, bit_bwd, frameContext->comp_bwdref_cdf[context][0],
    1146             :                 2);*/
    1147      191051 :             assert(context >= 0 && context < 3);
    1148      191054 :             refRateF = candidate_ptr->md_rate_estimation_ptr->comp_bwd_ref_fac_bits[context][0][bit_bwd];
    1149             :             //WRITE_REF_BIT(bit_bwd, comp_bwdref_p);
    1150             : 
    1151      191054 :             if (!bit_bwd) {
    1152      191065 :                 context = eb_av1_get_pred_context_comp_bwdref_p1(cu_ptr->av1xd);
    1153             :                 /*aom_write_symbol(ec_writer, refType[1] == ALTREF2_FRAME, frameContext->comp_bwdref_cdf[context][1],
    1154             :                     2);*/
    1155      191077 :                 assert(context >= 0 && context < 3);
    1156      191080 :                 refRateG = candidate_ptr->md_rate_estimation_ptr->comp_bwd_ref_fac_bits[context][1][refType[1] == ALTREF2_FRAME];
    1157             :                 //WRITE_REF_BIT(mbmi->block_mi.ref_frame[1] == ALTREF2_FRAME, comp_bwdref_p1);
    1158             :             }
    1159             :         }
    1160             :         else {
    1161     3596480 :             const int32_t bit0 = (ref_frame_type <= ALTREF_FRAME &&
    1162             :                 ref_frame_type >= BWDREF_FRAME);//0
    1163             : 
    1164     3596480 :             context = eb_av1_get_pred_context_single_ref_p1(cu_ptr->av1xd);
    1165             :             /*aom_write_symbol(ec_writer, bit0, frameContext->single_ref_cdf[context][0],
    1166             :                 2);*/
    1167     3628660 :             assert(context >= 0 && context < 3);
    1168     3628670 :             refRateH = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[context][0][bit0];
    1169             :             //WRITE_REF_BIT(bit0, single_ref_p1);
    1170             : 
    1171     3628670 :             if (bit0) {
    1172           0 :                 const int32_t bit1 = (ref_frame_type == ALTREF_FRAME);
    1173           0 :                 context = eb_av1_get_pred_context_single_ref_p2(cu_ptr->av1xd);
    1174           0 :                 assert(context >= 0 && context < 3);
    1175             :                 /*aom_write_symbol(ec_writer, bit1, frameContext->single_ref_cdf[context][1],
    1176             :                     2);*/
    1177           0 :                 refRateI = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[context][1][bit1];
    1178             :                 //WRITE_REF_BIT(bit1, single_ref_p2);
    1179             : 
    1180           0 :                 if (!bit1) {
    1181           0 :                     context = eb_av1_get_pred_context_single_ref_p6(cu_ptr->av1xd);
    1182             :                     /*aom_write_symbol(ec_writer, cu_ptr->prediction_unit_array[0].ref_frame_type == ALTREF2_FRAME, frameContext->single_ref_cdf[context][5],
    1183             :                         2);*/
    1184           0 :                     assert(context >= 0 && context < 3);
    1185           0 :                     refRateJ = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[context][5][ref_frame_type == ALTREF2_FRAME];
    1186             :                     //WRITE_REF_BIT(mbmi->block_mi.ref_frame[0] == ALTREF2_FRAME, single_ref_p6);
    1187             :                 }
    1188             :             }
    1189             :             else {
    1190     3628670 :                 const int32_t bit2 = (ref_frame_type == LAST3_FRAME ||
    1191             :                     ref_frame_type == GOLDEN_FRAME); //0
    1192     3628670 :                 context = eb_av1_get_pred_context_single_ref_p3(cu_ptr->av1xd);
    1193             :                 /*aom_write_symbol(ec_writer, bit2, frameContext->single_ref_cdf[context][2],
    1194             :                     2);*/
    1195     3628350 :                 assert(context >= 0 && context < 3);
    1196     3628360 :                 refRateK = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[context][2][bit2];
    1197             :                 //WRITE_REF_BIT(bit2, single_ref_p3);
    1198             : 
    1199     3628360 :                 if (!bit2) {
    1200     3101030 :                     const int32_t bit3 = (ref_frame_type != LAST_FRAME); //0;
    1201     3101030 :                     context = eb_av1_get_pred_context_single_ref_p4(cu_ptr->av1xd);
    1202     3100990 :                     assert(context >= 0 && context < 3);
    1203             :                     /*aom_write_symbol(ec_writer, bit3, frameContext->single_ref_cdf[context][3],
    1204             :                         2);*/
    1205     3100990 :                     refRateL = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[context][3][bit3];
    1206             :                     //WRITE_REF_BIT(bit3, single_ref_p4);
    1207             :                 }
    1208             :                 else {
    1209      527333 :                     const int32_t bit4 = (ref_frame_type != LAST3_FRAME);
    1210      527333 :                     context = eb_av1_get_pred_context_single_ref_p5(cu_ptr->av1xd);
    1211             :                     /*aom_write_symbol(ec_writer, bit4, frameContext->single_ref_cdf[context][4],
    1212             :                         2);*/
    1213      527892 :                     assert(context >= 0 && context < 3);
    1214      527892 :                     refRateM = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[context][4][bit4];
    1215             :                     //WRITE_REF_BIT(bit4, single_ref_p5);
    1216             :                 }
    1217             :             }
    1218             :         }
    1219             :     }
    1220             : 
    1221     3819950 :     refRateBits = refRateA + refRateB + refRateC + refRateD + refRateE + refRateF + refRateG + refRateH + refRateI + refRateJ + refRateK + refRateL + refRateM;
    1222             : 
    1223             :     }
    1224    90425500 :     return refRateBits;
    1225             : }
    1226             : //extern INLINE int16_t Av1ModeContextAnalyzer(const int16_t *const mode_context, const MvReferenceFrame *const rf);
    1227             : 
    1228             : extern  int8_t av1_ref_frame_type(const MvReferenceFrame *const rf);
    1229             : uint16_t compound_mode_ctx_map_2[3][COMP_NEWMV_CTXS] = {
    1230             :    { 0, 1, 1, 1, 1 },
    1231             :    { 1, 2, 3, 4, 4 },
    1232             :    { 4, 4, 5, 6, 7 },
    1233             : };
    1234   106499000 : static INLINE int16_t Av1ModeContextAnalyzer(
    1235             :     const int16_t *const mode_context, const MvReferenceFrame *const rf) {
    1236   106499000 :     const int8_t ref_frame = av1_ref_frame_type(rf);
    1237             : 
    1238   106411000 :     if (rf[1] <= INTRA_FRAME) return mode_context[ref_frame];
    1239             : 
    1240    71995700 :     const int16_t newmv_ctx = mode_context[ref_frame] & NEWMV_CTX_MASK;
    1241    71995700 :     const int16_t refmv_ctx =
    1242    71995700 :         (mode_context[ref_frame] >> REFMV_OFFSET) & REFMV_CTX_MASK;
    1243    71995700 :     assert((refmv_ctx >> 1) < 3);
    1244    71995700 :     const int16_t comp_ctx = compound_mode_ctx_map_2[refmv_ctx >> 1][AOMMIN(
    1245             :         newmv_ctx, COMP_NEWMV_CTXS - 1)];
    1246    71995700 :     return comp_ctx;
    1247             : }
    1248             : 
    1249             : int get_comp_index_context_enc(
    1250             :     PictureParentControlSet   *pcs_ptr,
    1251             :     int cur_frame_index,
    1252             :     int bck_frame_index,
    1253             :     int fwd_frame_index,
    1254             :     const MacroBlockD *xd);
    1255             : int get_comp_group_idx_context_enc(const MacroBlockD *xd);
    1256             : int is_any_masked_compound_used(BlockSize sb_type);
    1257   102642000 : uint32_t get_compound_mode_rate(
    1258             :     uint8_t                 md_pass,
    1259             :     ModeDecisionCandidate *candidate_ptr,
    1260             :     CodingUnit            *cu_ptr,
    1261             :     uint8_t                ref_frame_type,
    1262             :     BlockSize              bsize,
    1263             :     SequenceControlSet    *sequence_control_set_ptr,
    1264             :     PictureControlSet     *picture_control_set_ptr
    1265             : )
    1266             : {
    1267   102642000 :     uint32_t comp_rate = 0;
    1268   102642000 :     if (md_pass == 0)
    1269      202938 :         return 0;
    1270             : 
    1271   102439000 :     MbModeInfo *const mbmi = &cu_ptr->av1xd->mi[0]->mbmi;
    1272             :     MvReferenceFrame rf[2];
    1273   102439000 :     av1_set_ref_frame(rf, ref_frame_type);
    1274   102412000 :     mbmi->block_mi.ref_frame[0] = rf[0];
    1275   102412000 :     mbmi->block_mi.ref_frame[1] = rf[1];
    1276             : 
    1277             :     //NOTE  :  Make sure, any cuPtr data is already set before   usage
    1278             : 
    1279   102412000 :     if (has_second_ref(mbmi)) {
    1280             : 
    1281   143646000 :         const int masked_compound_used = is_any_masked_compound_used(bsize) &&
    1282    71769300 :             sequence_control_set_ptr->seq_header.enable_masked_compound;
    1283             : 
    1284    71767400 :         if (masked_compound_used) {
    1285    71592300 :             const int ctx_comp_group_idx = get_comp_group_idx_context_enc(cu_ptr->av1xd);
    1286    71598400 :             comp_rate = candidate_ptr->md_rate_estimation_ptr->comp_group_idx_fac_bits[ctx_comp_group_idx][candidate_ptr->comp_group_idx];
    1287             :         }
    1288             :         else {
    1289      175043 :             assert(candidate_ptr->comp_group_idx == 0);
    1290             :         }
    1291             : 
    1292    71773500 :         if (candidate_ptr->comp_group_idx == 0) {
    1293    40600900 :             if (candidate_ptr->compound_idx)
    1294    20418500 :                 assert(candidate_ptr->interinter_comp.type == COMPOUND_AVERAGE);
    1295             : 
    1296    40600900 :             if (sequence_control_set_ptr->seq_header.order_hint_info.enable_jnt_comp) {
    1297    40421100 :                 const int comp_index_ctx = get_comp_index_context_enc(
    1298    40421100 :                     picture_control_set_ptr->parent_pcs_ptr,
    1299    40421100 :                     picture_control_set_ptr->parent_pcs_ptr->cur_order_hint,
    1300    40421100 :                     picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[0] - 1],
    1301    40421100 :                     picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[1] - 1],
    1302    40421100 :                     cu_ptr->av1xd);
    1303    40418600 :                 comp_rate += candidate_ptr->md_rate_estimation_ptr->comp_idx_fac_bits[comp_index_ctx][candidate_ptr->compound_idx];
    1304             :             }
    1305             :             else {
    1306      179787 :                 assert(candidate_ptr->compound_idx == 1);
    1307             :             }
    1308             :         }
    1309             :         else {
    1310             : 
    1311    31172500 :             assert(picture_control_set_ptr->parent_pcs_ptr->frm_hdr.reference_mode != SINGLE_REFERENCE &&
    1312             :                 is_inter_compound_mode(candidate_ptr->pred_mode ));
    1313    31353600 :             assert(masked_compound_used);
    1314             :             // compound_diffwtd, wedge
    1315    31353600 :             assert(candidate_ptr->interinter_comp.type == COMPOUND_WEDGE ||
    1316             :                 candidate_ptr->interinter_comp.type == COMPOUND_DIFFWTD);
    1317             : 
    1318    31353600 :             if (is_interinter_compound_used(COMPOUND_WEDGE, bsize))
    1319    29081800 :                 comp_rate += candidate_ptr->md_rate_estimation_ptr->compound_type_fac_bits[bsize][candidate_ptr->interinter_comp.type - COMPOUND_WEDGE];
    1320             : 
    1321    31352400 :             if (candidate_ptr->interinter_comp.type == COMPOUND_WEDGE) {
    1322    11524400 :                 assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
    1323    11523300 :                 comp_rate += candidate_ptr->md_rate_estimation_ptr->wedge_idx_fac_bits[bsize][candidate_ptr->interinter_comp.wedge_index];
    1324    11523300 :                 comp_rate += av1_cost_literal(1);
    1325             :             }
    1326             :             else {
    1327    19827900 :                 assert(candidate_ptr->interinter_comp.type == COMPOUND_DIFFWTD);
    1328    19827900 :                 comp_rate += av1_cost_literal(1);
    1329             :             }
    1330             :         }
    1331             :     }
    1332             : 
    1333   102407000 :     return comp_rate;
    1334             : }
    1335             :     #if II_COMP_FLAG
    1336             : int is_interintra_wedge_used(BlockSize sb_type);
    1337             : int svt_is_interintra_allowed(
    1338             :     uint8_t enable_inter_intra,
    1339             :     BlockSize sb_type,
    1340             :     PredictionMode mode,
    1341             :     MvReferenceFrame ref_frame[2]);
    1342             : #endif
    1343             : 
    1344             : #if ADD_MDC_FULL_COST
    1345     3617500 : uint64_t mdc_av1_inter_fast_cost(
    1346             :     CodingUnit                  *cu_ptr,
    1347             :     ModeDecisionCandidate       *candidate_ptr,
    1348             :     uint64_t                    luma_distortion,
    1349             :     uint64_t                    lambda,
    1350             :     EbBool                      use_ssd,
    1351             :     PictureControlSet           *picture_control_set_ptr,
    1352             :     CandidateMv                 *ref_mv_stack,
    1353             :     const BlockGeom             *blk_geom)
    1354             : 
    1355             : {
    1356             :     // Luma rate
    1357     3617500 :     uint32_t           luma_rate = 0;
    1358     3617500 :     uint32_t           chroma_rate = 0;
    1359     3617500 :     uint64_t           mv_rate = 0;
    1360             :     uint64_t           skip_mode_rate;
    1361             :     // Luma and chroma distortion
    1362             :     uint64_t           luma_sad;
    1363             :     uint64_t           total_distortion;
    1364             : 
    1365             :     uint32_t           rate;
    1366             : 
    1367             :     int16_t           pred_ref_x;
    1368             :     int16_t           pred_ref_y;
    1369             :     int16_t           mv_ref_x;
    1370             :     int16_t           mv_ref_y;
    1371             : 
    1372             :     EbReflist       ref_list_idx;
    1373             : 
    1374     3617500 :     candidate_ptr->fast_luma_rate = 0;
    1375             : 
    1376     3617500 :     PredictionMode inter_mode = (PredictionMode)candidate_ptr->pred_mode;
    1377             : 
    1378     3617500 :     uint64_t inter_mode_bits_num = 0;
    1379             : 
    1380     3617500 :     uint8_t skip_mode_ctx = 0;// cu_ptr->skip_flag_context;
    1381             :     MvReferenceFrame rf[2];
    1382     3617500 :     av1_set_ref_frame(rf, candidate_ptr->ref_frame_type);
    1383     3617420 :     const int8_t ref_frame = av1_ref_frame_type(rf);
    1384     3617350 :     cu_ptr->inter_mode_ctx[ref_frame] = 0;
    1385     3617350 :     uint32_t mode_ctx = Av1ModeContextAnalyzer(cu_ptr->inter_mode_ctx, rf);
    1386     3617090 :     skip_mode_rate = candidate_ptr->md_rate_estimation_ptr->skip_mode_fac_bits[skip_mode_ctx][0];
    1387     3617090 :     uint64_t reference_picture_bits_num = 0;
    1388             : 
    1389             :     //Reference Type and Mode Bit estimation
    1390             : 
    1391     3617090 :     reference_picture_bits_num = EstimateRefFramesNumBits(
    1392             :         picture_control_set_ptr,
    1393             :         candidate_ptr,
    1394             :         cu_ptr,
    1395     3617090 :         blk_geom->bwidth,
    1396     3617090 :         blk_geom->bheight,
    1397     3617090 :         candidate_ptr->ref_frame_type,
    1398             :         0,
    1399     3617090 :         candidate_ptr->is_compound);
    1400             : 
    1401     3616340 :     if (candidate_ptr->is_compound)
    1402           0 :         inter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->inter_compound_mode_fac_bits[mode_ctx][INTER_COMPOUND_OFFSET(inter_mode)];
    1403             :     else {
    1404             :         //uint32_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK;
    1405             :         //inter_mode_bits_num = candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->new_mv_mode_fac_bits[mode_ctx][0];
    1406             : 
    1407     3616340 :         int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK;
    1408             :         //aom_write_symbol(ec_writer, mode != NEWMV, frameContext->newmv_cdf[newmv_ctx], 2);
    1409     3616340 :         inter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->new_mv_mode_fac_bits[newmv_ctx][inter_mode != NEWMV];
    1410     3616340 :         if (inter_mode != NEWMV) {
    1411     3616360 :             const int16_t zeromvCtx = (mode_ctx >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
    1412             :             //aom_write_symbol(ec_writer, mode != GLOBALMV, frameContext->zeromv_cdf[zeromvCtx], 2);
    1413     3616360 :             inter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->zero_mv_mode_fac_bits[zeromvCtx][inter_mode != GLOBALMV];
    1414     3616360 :             if (inter_mode != GLOBALMV) {
    1415     3616340 :                 int16_t refmvCtx = (mode_ctx >> REFMV_OFFSET) & REFMV_CTX_MASK;
    1416             :                 /*aom_write_symbol(ec_writer, mode != NEARESTMV, frameContext->refmv_cdf[refmv_ctx], 2);*/
    1417     3616340 :                 inter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->ref_mv_mode_fac_bits[refmvCtx][inter_mode != NEARESTMV];
    1418             :             }
    1419             :         }
    1420             :     }
    1421     3616340 :     if (inter_mode == NEWMV || inter_mode == NEW_NEWMV || have_nearmv_in_inter_mode(inter_mode)) {
    1422             :         //drLIdex cost estimation
    1423           0 :         const int32_t new_mv = inter_mode == NEWMV || inter_mode == NEW_NEWMV;
    1424           0 :         if (new_mv) {
    1425             :             int32_t idx;
    1426           0 :             for (idx = 0; idx < 2; ++idx) {
    1427           0 :                 if (cu_ptr->av1xd->ref_mv_count[candidate_ptr->ref_frame_type] > idx + 1) {
    1428             :                     uint8_t drl1Ctx =
    1429           0 :                         av1_drl_ctx(ref_mv_stack, idx);
    1430           0 :                     inter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->drl_mode_fac_bits[drl1Ctx][candidate_ptr->drl_index != idx];
    1431           0 :                     if (candidate_ptr->drl_index == idx) break;
    1432             :                 }
    1433             :             }
    1434             :         }
    1435             : 
    1436           0 :         if (have_nearmv_in_inter_mode(inter_mode)) {
    1437             :             int32_t idx;
    1438             :             // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
    1439           0 :             for (idx = 1; idx < 3; ++idx) {
    1440           0 :                 if (cu_ptr->av1xd->ref_mv_count[candidate_ptr->ref_frame_type] > idx + 1) {
    1441             :                     uint8_t drl_ctx =
    1442           0 :                         av1_drl_ctx(ref_mv_stack, idx);
    1443           0 :                     inter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->drl_mode_fac_bits[drl_ctx][candidate_ptr->drl_index != (idx - 1)];
    1444             : 
    1445           0 :                     if (candidate_ptr->drl_index == (idx - 1)) break;
    1446             :                 }
    1447             :             }
    1448             :         }
    1449             :     }
    1450             : 
    1451     3616460 :     if (have_newmv_in_inter_mode(inter_mode)) {
    1452           0 :         if (candidate_ptr->is_compound) {
    1453           0 :             mv_rate = 0;
    1454             : 
    1455           0 :             if (inter_mode == NEW_NEWMV) {
    1456           0 :                 for (ref_list_idx = 0; ref_list_idx < 2; ++ref_list_idx) {
    1457           0 :                     pred_ref_x = candidate_ptr->motion_vector_pred_x[ref_list_idx];
    1458           0 :                     pred_ref_y = candidate_ptr->motion_vector_pred_y[ref_list_idx];
    1459           0 :                     mv_ref_x = ref_list_idx == REF_LIST_1 ? candidate_ptr->motion_vector_xl1 : candidate_ptr->motion_vector_xl0;
    1460           0 :                     mv_ref_y = ref_list_idx == REF_LIST_1 ? candidate_ptr->motion_vector_yl1 : candidate_ptr->motion_vector_yl0;
    1461             : 
    1462             :                     MV mv;
    1463           0 :                     mv.row = mv_ref_y;
    1464           0 :                     mv.col = mv_ref_x;
    1465             : 
    1466             :                     MV ref_mv;
    1467           0 :                     ref_mv.row = pred_ref_y;
    1468           0 :                     ref_mv.col = pred_ref_x;
    1469             : 
    1470           0 :                     mv_rate += eb_av1_mv_bit_cost(
    1471             :                         &mv,
    1472             :                         &ref_mv,
    1473           0 :                         candidate_ptr->md_rate_estimation_ptr->nmv_vec_cost,
    1474           0 :                         candidate_ptr->md_rate_estimation_ptr->nmvcoststack,
    1475             :                         MV_COST_WEIGHT);
    1476             :                 }
    1477             :             }
    1478           0 :             else if (inter_mode == NEAREST_NEWMV || inter_mode == NEAR_NEWMV) {
    1479           0 :                 pred_ref_x = candidate_ptr->motion_vector_pred_x[REF_LIST_1];
    1480           0 :                 pred_ref_y = candidate_ptr->motion_vector_pred_y[REF_LIST_1];
    1481           0 :                 mv_ref_x = candidate_ptr->motion_vector_xl1;
    1482           0 :                 mv_ref_y = candidate_ptr->motion_vector_yl1;
    1483             : 
    1484             :                 MV mv;
    1485           0 :                 mv.row = mv_ref_y;
    1486           0 :                 mv.col = mv_ref_x;
    1487             : 
    1488             :                 MV ref_mv;
    1489           0 :                 ref_mv.row = pred_ref_y;
    1490           0 :                 ref_mv.col = pred_ref_x;
    1491             : 
    1492           0 :                 mv_rate += eb_av1_mv_bit_cost(
    1493             :                     &mv,
    1494             :                     &ref_mv,
    1495           0 :                     candidate_ptr->md_rate_estimation_ptr->nmv_vec_cost,
    1496           0 :                     candidate_ptr->md_rate_estimation_ptr->nmvcoststack,
    1497             :                     MV_COST_WEIGHT);
    1498             :             }
    1499             :             else {
    1500           0 :                 assert(inter_mode == NEW_NEARESTMV || inter_mode == NEW_NEARMV);
    1501             : 
    1502           0 :                 pred_ref_x = candidate_ptr->motion_vector_pred_x[REF_LIST_0];
    1503           0 :                 pred_ref_y = candidate_ptr->motion_vector_pred_y[REF_LIST_0];
    1504           0 :                 mv_ref_x = candidate_ptr->motion_vector_xl0;
    1505           0 :                 mv_ref_y = candidate_ptr->motion_vector_yl0;
    1506             : 
    1507             :                 MV mv;
    1508           0 :                 mv.row = mv_ref_y;
    1509           0 :                 mv.col = mv_ref_x;
    1510             : 
    1511             :                 MV ref_mv;
    1512           0 :                 ref_mv.row = pred_ref_y;
    1513           0 :                 ref_mv.col = pred_ref_x;
    1514             : 
    1515           0 :                 mv_rate += eb_av1_mv_bit_cost(
    1516             :                     &mv,
    1517             :                     &ref_mv,
    1518           0 :                     candidate_ptr->md_rate_estimation_ptr->nmv_vec_cost,
    1519           0 :                     candidate_ptr->md_rate_estimation_ptr->nmvcoststack,
    1520             :                     MV_COST_WEIGHT);
    1521             :             }
    1522             :         }
    1523             :         else {
    1524           0 :             ref_list_idx = candidate_ptr->prediction_direction[0] == 0 ? 0 : 1;
    1525             : 
    1526           0 :             pred_ref_x = candidate_ptr->motion_vector_pred_x[ref_list_idx];
    1527           0 :             pred_ref_y = candidate_ptr->motion_vector_pred_y[ref_list_idx];
    1528             : 
    1529           0 :             mv_ref_x = ref_list_idx == 0 ? candidate_ptr->motion_vector_xl0 : candidate_ptr->motion_vector_xl1;
    1530           0 :             mv_ref_y = ref_list_idx == 0 ? candidate_ptr->motion_vector_yl0 : candidate_ptr->motion_vector_yl1;
    1531             : 
    1532             :             MV mv;
    1533           0 :             mv.row = mv_ref_y;
    1534           0 :             mv.col = mv_ref_x;
    1535             : 
    1536             :             MV ref_mv;
    1537           0 :             ref_mv.row = pred_ref_y;
    1538           0 :             ref_mv.col = pred_ref_x;
    1539             : 
    1540           0 :             mv_rate = eb_av1_mv_bit_cost(
    1541             :                 &mv,
    1542             :                 &ref_mv,
    1543           0 :                 candidate_ptr->md_rate_estimation_ptr->nmv_vec_cost,
    1544           0 :                 candidate_ptr->md_rate_estimation_ptr->nmvcoststack,
    1545             :                 MV_COST_WEIGHT);
    1546             :         }
    1547             :     }
    1548     3616480 :     EbBool is_inter = inter_mode >= SINGLE_INTER_MODE_START && inter_mode < SINGLE_INTER_MODE_END;
    1549     3616480 :     if (is_inter
    1550             :         //&& picture_control_set_ptr->parent_pcs_ptr->switchable_motion_mode
    1551     3616580 :         && rf[1] != INTRA_FRAME)
    1552             :     {
    1553     3616580 :         MotionMode motion_mode_rd = candidate_ptr->motion_mode;
    1554     3616580 :         BlockSize bsize = blk_geom->bsize;
    1555     3616580 :         cu_ptr->prediction_unit_array[0].num_proj_ref = candidate_ptr->num_proj_ref;
    1556     3616580 :         MotionMode last_motion_mode_allowed = motion_mode_allowed(
    1557             :             picture_control_set_ptr,
    1558             :             cu_ptr,
    1559             :             bsize,
    1560     3616580 :             rf[0],
    1561     3616580 :             rf[1],
    1562             :             inter_mode);
    1563             : 
    1564     3616590 :         switch (last_motion_mode_allowed) {
    1565     3616600 :         case SIMPLE_TRANSLATION: break;
    1566           0 :         case OBMC_CAUSAL:
    1567           0 :             assert(motion_mode_rd == SIMPLE_TRANSLATION); // TODO: remove when OBMC added
    1568           0 :             inter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->motion_mode_fac_bits1[bsize][motion_mode_rd];
    1569           0 :             break;
    1570           0 :         default:
    1571           0 :             inter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->motion_mode_fac_bits[bsize][motion_mode_rd];
    1572             :         }
    1573           0 :     }
    1574             : 
    1575     3616500 :     uint32_t is_inter_rate = candidate_ptr->md_rate_estimation_ptr->intra_inter_fac_bits[cu_ptr->is_inter_ctx][1];
    1576     3616500 :     luma_rate = (uint32_t)(reference_picture_bits_num + skip_mode_rate + inter_mode_bits_num + mv_rate + is_inter_rate);
    1577             :     // Keep the Fast Luma and Chroma rate for future use
    1578     3616500 :     candidate_ptr->fast_luma_rate = luma_rate;
    1579     3616500 :     candidate_ptr->fast_chroma_rate = chroma_rate;
    1580             : 
    1581     3616500 :     if (use_ssd) {
    1582           0 :         int32_t current_q_index = MAX(0, MIN(QINDEX_RANGE - 1, picture_control_set_ptr->parent_pcs_ptr->base_qindex));
    1583           0 :         Dequants *const dequants = &picture_control_set_ptr->parent_pcs_ptr->deq;
    1584             : 
    1585           0 :         int16_t quantizer = dequants->y_dequant_Q3[current_q_index][1];
    1586           0 :         rate = 0;
    1587           0 :         model_rd_from_sse(
    1588           0 :             blk_geom->bsize,
    1589             :             quantizer,
    1590             :             luma_distortion,
    1591             :             &rate,
    1592             :             &luma_sad);
    1593           0 :         luma_rate += rate;
    1594           0 :         total_distortion = luma_sad;
    1595           0 :         rate = luma_rate;
    1596             : 
    1597           0 :         if (candidate_ptr->merge_flag) {
    1598           0 :             uint64_t skip_mode_rate = candidate_ptr->md_rate_estimation_ptr->skip_mode_fac_bits[skip_mode_ctx][1];
    1599           0 :             if (skip_mode_rate < rate) {
    1600           0 :                 candidate_ptr->fast_luma_rate = skip_mode_rate;
    1601           0 :                 return(RDCOST(lambda, skip_mode_rate, total_distortion));
    1602             :             }
    1603             :         }
    1604           0 :         candidate_ptr->fast_luma_rate = rate;
    1605           0 :         return(RDCOST(lambda, rate, total_distortion));
    1606             :     }
    1607             :     else {
    1608     3616500 :         luma_sad = (LUMA_WEIGHT * luma_distortion) << AV1_COST_PRECISION;
    1609     3616500 :         total_distortion = luma_sad;
    1610     3616500 :         rate = luma_rate;
    1611             : 
    1612             :         // Assign fast cost
    1613     3616500 :         if (candidate_ptr->merge_flag) {
    1614           0 :             uint64_t skip_mode_rate = candidate_ptr->md_rate_estimation_ptr->skip_mode_fac_bits[skip_mode_ctx][1];
    1615           0 :             if (skip_mode_rate < rate) {
    1616           0 :                 candidate_ptr->fast_luma_rate = skip_mode_rate;
    1617           0 :                 return(RDCOST(lambda, skip_mode_rate, total_distortion));
    1618             :             }
    1619             :         }
    1620     3616500 :         candidate_ptr->fast_luma_rate = rate;
    1621     3616500 :         return(RDCOST(lambda, rate, total_distortion));
    1622             :     }
    1623             : }
    1624             : #endif
    1625             : #if TWO_PASS_IMPROVEMENT
    1626             : /* two_pass_cost_update
    1627             :  * This function adds some biases for distortion and rate.
    1628             :  * The function is used in the first pass only and for the purpose of data collection */
    1629           0 : void two_pass_cost_update(
    1630             :     PictureControlSet     *picture_control_set_ptr,
    1631             :     ModeDecisionCandidate *candidate_ptr,
    1632             :     uint32_t              *rate,
    1633             :     uint64_t              *distortion) {
    1634             : 
    1635             :     MvReferenceFrame ref_type[2];
    1636           0 :     av1_set_ref_frame(ref_type, candidate_ptr->ref_frame_type);
    1637           0 :     if ((candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME || ref_type[1] != BWDREF_FRAME)) ||
    1638           0 :         (!candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME && ref_type[0] != BWDREF_FRAME))) {
    1639           0 :         *rate += (*rate) * FIRST_PASS_COST_PENALTY / 100;
    1640           0 :         *distortion += (*distortion) * FIRST_PASS_COST_PENALTY / 100;
    1641             :     }
    1642           0 :     EbReferenceObject  *refObjL1 = (EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[REF_LIST_1][0]->object_ptr;
    1643           0 :     if (picture_control_set_ptr->slice_type == B_SLICE &&
    1644           0 :         (candidate_ptr->is_compound || ref_type[0] == BWDREF_FRAME) &&
    1645           0 :         (refObjL1->slice_type == I_SLICE && refObjL1->ref_poc > picture_control_set_ptr->picture_number)) {
    1646           0 :         *rate += (*rate * 2);
    1647           0 :         *distortion += (*distortion) * 2;
    1648             :     }
    1649           0 : }
    1650           0 : void two_pass_cost_update_64bit(
    1651             :     PictureControlSet     *picture_control_set_ptr,
    1652             :     ModeDecisionCandidate *candidate_ptr,
    1653             :     uint64_t              *rate,
    1654             :     uint64_t              *distortion) {
    1655             : 
    1656             :     MvReferenceFrame ref_type[2];
    1657           0 :     av1_set_ref_frame(ref_type, candidate_ptr->ref_frame_type);
    1658           0 :     if ((candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME || ref_type[1] != BWDREF_FRAME)) ||
    1659           0 :         (!candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME && ref_type[0] != BWDREF_FRAME))) {
    1660           0 :         *rate += (*rate) * FIRST_PASS_COST_PENALTY / 100;
    1661           0 :         *distortion += (*distortion) * FIRST_PASS_COST_PENALTY / 100;
    1662             :     }
    1663           0 :     EbReferenceObject  *refObjL1 = (EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[REF_LIST_1][0]->object_ptr;
    1664           0 :     if (picture_control_set_ptr->slice_type == B_SLICE &&
    1665           0 :         (candidate_ptr->is_compound || ref_type[0] == BWDREF_FRAME) &&
    1666           0 :         (refObjL1->slice_type == I_SLICE && refObjL1->ref_poc > picture_control_set_ptr->picture_number)) {
    1667           0 :         *rate += (*rate * 2);
    1668           0 :         *distortion += (*distortion) * 2;
    1669             :     }
    1670           0 : }
    1671             : #endif
    1672             : 
    1673   102988000 : uint64_t av1_inter_fast_cost(
    1674             :     CodingUnit            *cu_ptr,
    1675             :     ModeDecisionCandidate *candidate_ptr,
    1676             :     uint32_t                 qp,
    1677             :     uint64_t                 luma_distortion,
    1678             :     uint64_t                 chroma_distortion,
    1679             :     uint64_t                 lambda,
    1680             :     EbBool                   use_ssd,
    1681             :     PictureControlSet     *picture_control_set_ptr,
    1682             :     CandidateMv             *ref_mv_stack,
    1683             :     const BlockGeom         *blk_geom,
    1684             :     uint32_t                 miRow,
    1685             :     uint32_t                 miCol,
    1686             :     uint8_t                 md_pass,
    1687             :     uint32_t                 left_neighbor_mode,
    1688             :     uint32_t                 top_neighbor_mode)
    1689             : 
    1690             : {
    1691             :     UNUSED(top_neighbor_mode);
    1692             :     UNUSED(left_neighbor_mode);
    1693             :     UNUSED(miCol);
    1694             :     UNUSED(miRow);
    1695             : 
    1696   102988000 :     FrameHeader *frm_hdr = &picture_control_set_ptr->parent_pcs_ptr->frm_hdr;
    1697             : 
    1698             :     // Luma rate
    1699   102988000 :     uint32_t           lumaRate = 0;
    1700   102988000 :     uint32_t           chromaRate = 0;
    1701   102988000 :     uint64_t           mvRate = 0;
    1702             :     uint64_t           skipModeRate;
    1703             :     // Luma and chroma distortion
    1704             :     uint64_t           lumaSad;
    1705             :     uint64_t             chromaSad;
    1706             :     uint64_t           totalDistortion;
    1707             : 
    1708             :     uint32_t           rate;
    1709             : 
    1710             :     int16_t           predRefX;
    1711             :     int16_t           predRefY;
    1712             :     int16_t           mvRefX;
    1713             :     int16_t           mvRefY;
    1714             : 
    1715             :     EbReflist       refListIdx;
    1716             : 
    1717             :     (void)qp;
    1718             : 
    1719   102988000 :     PredictionMode inter_mode = (PredictionMode)candidate_ptr->pred_mode;
    1720             : 
    1721   102988000 :     uint64_t interModeBitsNum = 0;
    1722             : 
    1723   102988000 :     uint8_t skipModeCtx = cu_ptr->skip_flag_context;
    1724             :     MvReferenceFrame rf[2];
    1725   102988000 :     av1_set_ref_frame(rf, candidate_ptr->ref_frame_type);
    1726   102931000 :     uint32_t modeCtx = Av1ModeContextAnalyzer(cu_ptr->inter_mode_ctx, rf);
    1727   102843000 :     skipModeRate = candidate_ptr->md_rate_estimation_ptr->skip_mode_fac_bits[skipModeCtx][0];
    1728   102843000 :     uint64_t referencePictureBitsNum = 0;
    1729             : 
    1730             :     //Reference Type and Mode Bit estimation
    1731             : 
    1732   102843000 :     referencePictureBitsNum = EstimateRefFramesNumBits(
    1733             :         picture_control_set_ptr,
    1734             :         candidate_ptr,
    1735             :         cu_ptr,
    1736   102843000 :         blk_geom->bwidth,
    1737   102843000 :         blk_geom->bheight,
    1738   102843000 :         candidate_ptr->ref_frame_type,
    1739             :         md_pass,
    1740   102843000 :         candidate_ptr->is_compound);
    1741             : 
    1742   102822000 :     if (candidate_ptr->is_compound)
    1743    72041800 :         interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->inter_compound_mode_fac_bits[modeCtx][INTER_COMPOUND_OFFSET(inter_mode)];
    1744             :     else {
    1745             :         //uint32_t newmv_ctx = modeCtx & NEWMV_CTX_MASK;
    1746             :         //interModeBitsNum = candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->new_mv_mode_fac_bits[mode_ctx][0];
    1747             : 
    1748    30780000 :         int16_t newmv_ctx = modeCtx & NEWMV_CTX_MASK;
    1749             :         //aom_write_symbol(ec_writer, mode != NEWMV, frameContext->newmv_cdf[newmv_ctx], 2);
    1750    30780000 :         interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->new_mv_mode_fac_bits[newmv_ctx][inter_mode != NEWMV];
    1751    30780000 :         if (inter_mode != NEWMV) {
    1752    22733500 :             const int16_t zeromvCtx = (modeCtx >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
    1753             :             //aom_write_symbol(ec_writer, mode != GLOBALMV, frameContext->zeromv_cdf[zeromvCtx], 2);
    1754    22733500 :             interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->zero_mv_mode_fac_bits[zeromvCtx][inter_mode != GLOBALMV];
    1755    22733500 :             if (inter_mode != GLOBALMV) {
    1756    21742300 :                 int16_t refmvCtx = (modeCtx >> REFMV_OFFSET) & REFMV_CTX_MASK;
    1757             :                 /*aom_write_symbol(ec_writer, mode != NEARESTMV, frameContext->refmv_cdf[refmv_ctx], 2);*/
    1758    21742300 :                 interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->ref_mv_mode_fac_bits[refmvCtx][inter_mode != NEARESTMV];
    1759             :             }
    1760             :         }
    1761             :     }
    1762   102822000 :     if (inter_mode == NEWMV || inter_mode == NEW_NEWMV || have_nearmv_in_inter_mode(inter_mode)) {
    1763             :         //drLIdex cost estimation
    1764    72612300 :         const int32_t new_mv = inter_mode == NEWMV || inter_mode == NEW_NEWMV;
    1765    72612300 :         if (new_mv) {
    1766             :             int32_t idx;
    1767    47772600 :             for (idx = 0; idx < 2; ++idx) {
    1768    39149300 :                 if (cu_ptr->av1xd->ref_mv_count[candidate_ptr->ref_frame_type] > idx + 1) {
    1769             :                     uint8_t drl1Ctx =
    1770    33373700 :                         av1_drl_ctx(ref_mv_stack, idx);
    1771    33375500 :                     interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->drl_mode_fac_bits[drl1Ctx][candidate_ptr->drl_index != idx];
    1772    33375500 :                     if (candidate_ptr->drl_index == idx) break;
    1773             :                 }
    1774             :             }
    1775             :         }
    1776             : 
    1777    72614100 :         if (have_nearmv_in_inter_mode(inter_mode)) {
    1778             :             int32_t idx;
    1779             :             // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
    1780   107615000 :             for (idx = 1; idx < 3; ++idx) {
    1781    79862800 :                 if (cu_ptr->av1xd->ref_mv_count[candidate_ptr->ref_frame_type] > idx + 1) {
    1782             :                     uint8_t drl_ctx =
    1783    42192300 :                         av1_drl_ctx(ref_mv_stack, idx);
    1784    42198900 :                     interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->drl_mode_fac_bits[drl_ctx][candidate_ptr->drl_index != (idx - 1)];
    1785             : 
    1786    42198900 :                     if (candidate_ptr->drl_index == (idx - 1)) break;
    1787             :                 }
    1788             :             }
    1789             :         }
    1790             :     }
    1791             : 
    1792   102659000 :     if (have_newmv_in_inter_mode(inter_mode)) {
    1793    52873900 :         if (candidate_ptr->is_compound) {
    1794    44807500 :             mvRate = 0;
    1795             : 
    1796    44807500 :             if (inter_mode == NEW_NEWMV) {
    1797    56469100 :                 for (refListIdx = 0; refListIdx < 2; ++refListIdx) {
    1798    37656000 :                     predRefX = candidate_ptr->motion_vector_pred_x[refListIdx];
    1799    37656000 :                     predRefY = candidate_ptr->motion_vector_pred_y[refListIdx];
    1800    37656000 :                     mvRefX = refListIdx == REF_LIST_1 ? candidate_ptr->motion_vector_xl1 : candidate_ptr->motion_vector_xl0;
    1801    37656000 :                     mvRefY = refListIdx == REF_LIST_1 ? candidate_ptr->motion_vector_yl1 : candidate_ptr->motion_vector_yl0;
    1802             : 
    1803             :                     MV mv;
    1804    37656000 :                     mv.row = mvRefY;
    1805    37656000 :                     mv.col = mvRefX;
    1806             : 
    1807             :                     MV ref_mv;
    1808    37656000 :                     ref_mv.row = predRefY;
    1809    37656000 :                     ref_mv.col = predRefX;
    1810             : 
    1811    37643700 :                     mvRate += eb_av1_mv_bit_cost(
    1812             :                         &mv,
    1813             :                         &ref_mv,
    1814    37656000 :                         candidate_ptr->md_rate_estimation_ptr->nmv_vec_cost,
    1815    37656000 :                         candidate_ptr->md_rate_estimation_ptr->nmvcoststack,
    1816             :                         MV_COST_WEIGHT);
    1817             :                 }
    1818             :             }
    1819    25982200 :             else if (inter_mode == NEAREST_NEWMV || inter_mode == NEAR_NEWMV) {
    1820    12958400 :                 predRefX = candidate_ptr->motion_vector_pred_x[REF_LIST_1];
    1821    12958400 :                 predRefY = candidate_ptr->motion_vector_pred_y[REF_LIST_1];
    1822    12958400 :                 mvRefX = candidate_ptr->motion_vector_xl1;
    1823    12958400 :                 mvRefY = candidate_ptr->motion_vector_yl1;
    1824             : 
    1825             :                 MV mv;
    1826    12958400 :                 mv.row = mvRefY;
    1827    12958400 :                 mv.col = mvRefX;
    1828             : 
    1829             :                 MV ref_mv;
    1830    12958400 :                 ref_mv.row = predRefY;
    1831    12958400 :                 ref_mv.col = predRefX;
    1832             : 
    1833    13036200 :                 mvRate += eb_av1_mv_bit_cost(
    1834             :                     &mv,
    1835             :                     &ref_mv,
    1836    12958400 :                     candidate_ptr->md_rate_estimation_ptr->nmv_vec_cost,
    1837    12958400 :                     candidate_ptr->md_rate_estimation_ptr->nmvcoststack,
    1838             :                     MV_COST_WEIGHT);
    1839             :             }
    1840             :             else {
    1841    13023700 :                 assert(inter_mode == NEW_NEARESTMV || inter_mode == NEW_NEARMV);
    1842             : 
    1843    13023700 :                 predRefX = candidate_ptr->motion_vector_pred_x[REF_LIST_0];
    1844    13023700 :                 predRefY = candidate_ptr->motion_vector_pred_y[REF_LIST_0];
    1845    13023700 :                 mvRefX = candidate_ptr->motion_vector_xl0;
    1846    13023700 :                 mvRefY = candidate_ptr->motion_vector_yl0;
    1847             : 
    1848             :                 MV mv;
    1849    13023700 :                 mv.row = mvRefY;
    1850    13023700 :                 mv.col = mvRefX;
    1851             : 
    1852             :                 MV ref_mv;
    1853    13023700 :                 ref_mv.row = predRefY;
    1854    13023700 :                 ref_mv.col = predRefX;
    1855             : 
    1856    13026900 :                 mvRate += eb_av1_mv_bit_cost(
    1857             :                     &mv,
    1858             :                     &ref_mv,
    1859    13023700 :                     candidate_ptr->md_rate_estimation_ptr->nmv_vec_cost,
    1860    13023700 :                     candidate_ptr->md_rate_estimation_ptr->nmvcoststack,
    1861             :                     MV_COST_WEIGHT);
    1862             :             }
    1863             :         }
    1864             :         else {
    1865     8066440 :             refListIdx = candidate_ptr->prediction_direction[0] == 0 ? 0 : 1;
    1866             : 
    1867     8066440 :             predRefX = candidate_ptr->motion_vector_pred_x[refListIdx];
    1868     8066440 :             predRefY = candidate_ptr->motion_vector_pred_y[refListIdx];
    1869             : 
    1870     8066440 :             mvRefX = refListIdx == 0 ? candidate_ptr->motion_vector_xl0 : candidate_ptr->motion_vector_xl1;
    1871     8066440 :             mvRefY = refListIdx == 0 ? candidate_ptr->motion_vector_yl0 : candidate_ptr->motion_vector_yl1;
    1872             : 
    1873             :             MV mv;
    1874     8066440 :             mv.row = mvRefY;
    1875     8066440 :             mv.col = mvRefX;
    1876             : 
    1877             :             MV ref_mv;
    1878     8066440 :             ref_mv.row = predRefY;
    1879     8066440 :             ref_mv.col = predRefX;
    1880             : 
    1881     8067480 :             mvRate = eb_av1_mv_bit_cost(
    1882             :                 &mv,
    1883             :                 &ref_mv,
    1884     8066440 :                 candidate_ptr->md_rate_estimation_ptr->nmv_vec_cost,
    1885     8066440 :                 candidate_ptr->md_rate_estimation_ptr->nmvcoststack,
    1886             :                 MV_COST_WEIGHT);
    1887             :         }
    1888             :     }
    1889             : 
    1890             : #if II_COMP_FLAG
    1891   102637000 :     if (md_pass > 0) {
    1892             : 
    1893             :         // inter intra mode rate
    1894   102493000 :         if (picture_control_set_ptr->parent_pcs_ptr->frm_hdr.reference_mode != COMPOUND_REFERENCE &&
    1895   204397000 :             picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.enable_interintra_compound &&
    1896   101907000 :             svt_is_interintra_allowed(picture_control_set_ptr->parent_pcs_ptr->enable_inter_intra,blk_geom->bsize, candidate_ptr->inter_mode, rf)) {
    1897    21044800 :             const int interintra = candidate_ptr->is_interintra_used;
    1898    21044800 :             const int bsize_group = size_group_lookup[blk_geom->bsize];
    1899             : 
    1900    21044800 :             interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->inter_intra_fac_bits[bsize_group][candidate_ptr->is_interintra_used];
    1901             : 
    1902    21044800 :             if (interintra) {
    1903     8905100 :                 interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->inter_intra_mode_fac_bits[bsize_group][candidate_ptr->interintra_mode];
    1904             : 
    1905     8905100 :                 if (is_interintra_wedge_used(blk_geom->bsize)) {
    1906     8905080 :                     interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->wedge_inter_intra_fac_bits[blk_geom->bsize][candidate_ptr->use_wedge_interintra];
    1907             : 
    1908     8905080 :                     if (candidate_ptr->use_wedge_interintra) {
    1909     4453350 :                         interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->wedge_idx_fac_bits[blk_geom->bsize][candidate_ptr->interintra_wedge_index];
    1910             :                     }
    1911             :                 }
    1912             :             }
    1913             :         }
    1914             :     }
    1915             : #endif
    1916   102623000 :     EbBool is_inter = inter_mode >= SINGLE_INTER_MODE_START && inter_mode < SINGLE_INTER_MODE_END;
    1917   102623000 :     if (is_inter
    1918    30976600 :         && frm_hdr->is_motion_mode_switchable
    1919    30339200 :         && rf[1] != INTRA_FRAME)
    1920             :     {
    1921    30339200 :         MotionMode motion_mode_rd = candidate_ptr->motion_mode;
    1922    30339200 :         BlockSize bsize = blk_geom->bsize;
    1923    30339200 :         cu_ptr->prediction_unit_array[0].num_proj_ref = candidate_ptr->num_proj_ref;
    1924    30339200 :         MotionMode last_motion_mode_allowed = motion_mode_allowed(
    1925             :             picture_control_set_ptr,
    1926             :             cu_ptr,
    1927             :             bsize,
    1928    30339200 :             rf[0],
    1929    30339200 :             rf[1],
    1930             :             inter_mode);
    1931             : 
    1932    30340300 :         switch (last_motion_mode_allowed) {
    1933     6039190 :         case SIMPLE_TRANSLATION: break;
    1934    21923900 :         case OBMC_CAUSAL:
    1935             : #if OBMC_FLAG
    1936    21923900 :             interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->motion_mode_fac_bits1[bsize][motion_mode_rd==OBMC_CAUSAL];
    1937             : #else
    1938             :             assert(motion_mode_rd == SIMPLE_TRANSLATION); // TODO: remove when OBMC added
    1939             :             interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->motion_mode_fac_bits1[bsize][motion_mode_rd];
    1940             : #endif
    1941    21923900 :             break;
    1942     2377220 :         default:
    1943     2377220 :             interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->motion_mode_fac_bits[bsize][motion_mode_rd];
    1944             :         }
    1945    72284100 :     }
    1946             :     //this func return 0 if masked=0 and distance=0
    1947   205227000 :     interModeBitsNum += get_compound_mode_rate(
    1948             :         md_pass,
    1949             :         candidate_ptr,
    1950             :         cu_ptr,
    1951   102624000 :         candidate_ptr->ref_frame_type,
    1952   102624000 :         blk_geom->bsize,
    1953   102624000 :         picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr,
    1954             :         picture_control_set_ptr
    1955             :     );
    1956             :     // NM - To be added when the overlappable mode is adopted
    1957             :     //    read_compound_type(is_compound)
    1958             :     // NM - To be added when switchable filter is adopted
    1959             :     //    if (interpolation_filter == SWITCHABLE) {
    1960             :     //        for (dir = 0; dir < (enable_dual_filter ? 2 : 1); dir++) {
    1961             :     //            if (needs_interp_filter()) {
    1962             :     //                interp_filter[dir]    S()
    1963             :     //            }
    1964             :     //            else {
    1965             :     //                interp_filter[dir] = EIGHTTAP
    1966             :     //            }
    1967             :     //        }
    1968             :     //        if (!enable_dual_filter)
    1969             :     //            interp_filter[1] = interp_filter[0]
    1970             :     //    }
    1971             :     //    else {
    1972             :     //        for (dir = 0; dir < 2; dir++)
    1973             :     //            interp_filter[dir] = interpolation_filter
    1974             :     //    }
    1975   102603000 :     uint32_t isInterRate = candidate_ptr->md_rate_estimation_ptr->intra_inter_fac_bits[cu_ptr->is_inter_ctx][1];
    1976   102603000 :     lumaRate = (uint32_t)(referencePictureBitsNum + skipModeRate + interModeBitsNum + mvRate + isInterRate);
    1977             : 
    1978             :     //chromaRate = intraChromaModeBitsNum + intraChromaAngModeBitsNum;
    1979             : 
    1980             :     // Keep the Fast Luma and Chroma rate for future use
    1981   102603000 :     candidate_ptr->fast_luma_rate = lumaRate;
    1982   102603000 :     candidate_ptr->fast_chroma_rate = chromaRate;
    1983             : 
    1984   102603000 :     if (use_ssd) {
    1985           0 :         int32_t current_q_index = frm_hdr->quantization_params.base_q_idx;
    1986           0 :         Dequants *const dequants = &picture_control_set_ptr->parent_pcs_ptr->deq;
    1987             : 
    1988           0 :         int16_t quantizer = dequants->y_dequant_Q3[current_q_index][1];
    1989           0 :         rate = 0;
    1990           0 :         model_rd_from_sse(
    1991           0 :             blk_geom->bsize,
    1992             :             quantizer,
    1993             :             luma_distortion,
    1994             :             &rate,
    1995             :             &lumaSad);
    1996           0 :         lumaRate += rate;
    1997           0 :         totalDistortion = lumaSad;
    1998             : 
    1999           0 :         rate = 0;
    2000           0 :         model_rd_from_sse(
    2001           0 :             blk_geom->bsize_uv,
    2002             :             quantizer,
    2003             :             chroma_distortion,
    2004             :             &chromaRate,
    2005             :             &chromaSad);
    2006           0 :         chromaRate += rate;
    2007           0 :         totalDistortion += chromaSad;
    2008             : 
    2009           0 :         rate = lumaRate + chromaRate;
    2010             : 
    2011             : #if TWO_PASS
    2012           0 :         if (picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->use_output_stat_file) {
    2013             : #if TWO_PASS_IMPROVEMENT
    2014           0 :             two_pass_cost_update(
    2015             :                 picture_control_set_ptr,
    2016             :                 candidate_ptr,
    2017             :                 &rate,
    2018             :                 &totalDistortion);
    2019             : #else
    2020             :             MvReferenceFrame ref_type[2];
    2021             :             av1_set_ref_frame(ref_type, candidate_ptr->ref_frame_type);
    2022             :             if ((candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME || ref_type[1] != BWDREF_FRAME)) ||
    2023             :                 (!candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME && ref_type[0] != BWDREF_FRAME))) {
    2024             :                 rate += rate * FIRST_PASS_COST_PENALTY / 100;
    2025             :                 totalDistortion += totalDistortion * FIRST_PASS_COST_PENALTY / 100;
    2026             :             }
    2027             : #endif
    2028             :         }
    2029             : #endif
    2030           0 :         if (candidate_ptr->merge_flag) {
    2031           0 :             uint64_t skipModeRate = candidate_ptr->md_rate_estimation_ptr->skip_mode_fac_bits[skipModeCtx][1];
    2032           0 :             if (skipModeRate < rate)
    2033           0 :                 return(RDCOST(lambda, skipModeRate, totalDistortion));
    2034             :         }
    2035           0 :         return(RDCOST(lambda, rate, totalDistortion));
    2036             :     }
    2037             :     else {
    2038   102603000 :         lumaSad = (LUMA_WEIGHT * luma_distortion) << AV1_COST_PRECISION;
    2039   102603000 :         chromaSad = chroma_distortion << AV1_COST_PRECISION;
    2040   102603000 :         totalDistortion = lumaSad + chromaSad;
    2041   102603000 :         if (blk_geom->has_uv == 0 && chromaSad != 0)
    2042           0 :             printf("av1_inter_fast_cost: Chroma error");
    2043   102603000 :         rate = lumaRate + chromaRate;
    2044             : #if TWO_PASS
    2045   102603000 :         if (picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->use_output_stat_file) {
    2046             : #if TWO_PASS_IMPROVEMENT
    2047           0 :             two_pass_cost_update(
    2048             :                 picture_control_set_ptr,
    2049             :                 candidate_ptr,
    2050             :                 &rate,
    2051             :                 &totalDistortion);
    2052             : #else
    2053             :             MvReferenceFrame ref_type[2];
    2054             :             av1_set_ref_frame(ref_type, candidate_ptr->ref_frame_type);
    2055             :             if ((candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME || ref_type[1] != BWDREF_FRAME)) ||
    2056             :                 (!candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME && ref_type[0] != BWDREF_FRAME))) {
    2057             :                 rate += rate * FIRST_PASS_COST_PENALTY / 100;
    2058             :                 totalDistortion += totalDistortion * FIRST_PASS_COST_PENALTY / 100;
    2059             :             }
    2060             : #endif
    2061             :         }
    2062             : #endif
    2063             :         // Assign fast cost
    2064   102622000 :         if (candidate_ptr->merge_flag) {
    2065      410064 :             uint64_t skipModeRate = candidate_ptr->md_rate_estimation_ptr->skip_mode_fac_bits[skipModeCtx][1];
    2066      410064 :             if (skipModeRate < rate)
    2067      349767 :                 return(RDCOST(lambda, skipModeRate, totalDistortion));
    2068             :         }
    2069   102272000 :         return(RDCOST(lambda, rate, totalDistortion));
    2070             :     }
    2071             : }
    2072             : 
    2073             : 
    2074    77874300 : EbErrorType av1_tu_estimate_coeff_bits(
    2075             :     struct ModeDecisionContext         *md_context,
    2076             :     uint8_t                             allow_update_cdf,
    2077             :     FRAME_CONTEXT                      *ec_ctx,
    2078             :     PictureControlSet                  *picture_control_set_ptr,
    2079             :     struct ModeDecisionCandidateBuffer *candidate_buffer_ptr,
    2080             :     uint32_t                            tu_origin_index,
    2081             :     uint32_t                            tu_chroma_origin_index,
    2082             :     EntropyCoder                       *entropy_coder_ptr,
    2083             :     EbPictureBufferDesc                *coeff_buffer_sb,
    2084             :     uint32_t                            y_eob,
    2085             :     uint32_t                            cb_eob,
    2086             :     uint32_t                            cr_eob,
    2087             :     uint64_t                           *y_tu_coeff_bits,
    2088             :     uint64_t                           *cb_tu_coeff_bits,
    2089             :     uint64_t                           *cr_tu_coeff_bits,
    2090             :     TxSize                              txsize,
    2091             :     TxSize                              txsize_uv,
    2092             :     TxType                              tx_type,
    2093             :     TxType                              tx_type_uv,
    2094             :     COMPONENT_TYPE                      component_type)
    2095             : {
    2096             :     (void)entropy_coder_ptr;
    2097    77874300 :     EbErrorType return_error = EB_ErrorNone;
    2098             : 
    2099    77874300 :     FrameHeader *frm_hdr = &picture_control_set_ptr->parent_pcs_ptr->frm_hdr;
    2100             : 
    2101             :     int32_t *coeff_buffer;
    2102    77874300 :     int16_t  luma_txb_skip_context = md_context->luma_txb_skip_context;
    2103    77874300 :     int16_t  luma_dc_sign_context = md_context->luma_dc_sign_context;
    2104    77874300 :     int16_t  cb_txb_skip_context = md_context->cb_txb_skip_context;
    2105    77874300 :     int16_t  cb_dc_sign_context = md_context->cb_dc_sign_context;
    2106    77874300 :     int16_t  cr_txb_skip_context = md_context->cr_txb_skip_context;
    2107    77874300 :     int16_t  cr_dc_sign_context = md_context->cr_dc_sign_context;
    2108             : 
    2109    77874300 :     EbBool reducedTransformSetFlag = frm_hdr->reduced_tx_set ? EB_TRUE : EB_FALSE;
    2110             : 
    2111             :     //Estimate the rate of the transform type and coefficient for Luma
    2112             : 
    2113    77874300 :     if (component_type == COMPONENT_LUMA || component_type == COMPONENT_ALL) {
    2114    60760500 :         if (y_eob) {
    2115    34052100 :             coeff_buffer = (int32_t*)&coeff_buffer_sb->buffer_y[tu_origin_index * sizeof(int32_t)];
    2116             : 
    2117    34019600 :             *y_tu_coeff_bits = eb_av1_cost_coeffs_txb(
    2118             :                 allow_update_cdf,
    2119             :                 ec_ctx,
    2120             :                 candidate_buffer_ptr,
    2121             :                 coeff_buffer,
    2122    34052100 :                 (uint16_t)y_eob,
    2123             :                 PLANE_TYPE_Y,
    2124             :                 txsize,
    2125             :                 tx_type,
    2126             :                 luma_txb_skip_context,
    2127             :                 luma_dc_sign_context,
    2128             :                 reducedTransformSetFlag);
    2129             :         }
    2130             :         else {
    2131    26708400 :             *y_tu_coeff_bits = av1_cost_skip_txb(
    2132             :                 allow_update_cdf,
    2133             :                 ec_ctx,
    2134             :                 candidate_buffer_ptr,
    2135             :                 txsize,
    2136             :                 PLANE_TYPE_Y,
    2137             :                 luma_txb_skip_context);
    2138             :         }
    2139             :     }
    2140             :     //Estimate the rate of the transform type and coefficient for chroma Cb
    2141             : 
    2142    77843500 :     if (component_type == COMPONENT_CHROMA_CB || component_type == COMPONENT_CHROMA || component_type == COMPONENT_ALL) {
    2143    12531800 :         if (cb_eob) {
    2144     5622360 :             coeff_buffer = (int32_t*)&coeff_buffer_sb->buffer_cb[tu_chroma_origin_index * sizeof(int32_t)];
    2145             : 
    2146     5621170 :             *cb_tu_coeff_bits = eb_av1_cost_coeffs_txb(
    2147             :                 allow_update_cdf,
    2148             :                 ec_ctx,
    2149             :                 candidate_buffer_ptr,
    2150             :                 coeff_buffer,
    2151     5622360 :                 (uint16_t)cb_eob,
    2152             :                 PLANE_TYPE_UV,
    2153             :                 txsize_uv,
    2154             :                 tx_type_uv,
    2155             :                 cb_txb_skip_context,
    2156             :                 cb_dc_sign_context,
    2157             :                 reducedTransformSetFlag);
    2158             :         }
    2159             :         else {
    2160     6909470 :             *cb_tu_coeff_bits = av1_cost_skip_txb(
    2161             :                 allow_update_cdf,
    2162             :                 ec_ctx,
    2163             :                 candidate_buffer_ptr,
    2164             :                 txsize_uv,
    2165             :                 PLANE_TYPE_UV,
    2166             :                 cb_txb_skip_context);
    2167             :         }
    2168             :     }
    2169             : 
    2170    77842300 :     if (component_type == COMPONENT_CHROMA_CR || component_type == COMPONENT_CHROMA || component_type == COMPONENT_ALL) {
    2171             :         //Estimate the rate of the transform type and coefficient for chroma Cr
    2172    12165600 :         if (cr_eob) {
    2173     3725450 :             coeff_buffer = (int32_t*)&coeff_buffer_sb->buffer_cr[tu_chroma_origin_index * sizeof(int32_t)];
    2174             : 
    2175     3725330 :             *cr_tu_coeff_bits = eb_av1_cost_coeffs_txb(
    2176             :                 allow_update_cdf,
    2177             :                 ec_ctx,
    2178             :                 candidate_buffer_ptr,
    2179             :                 coeff_buffer,
    2180     3725450 :                 (uint16_t)cr_eob,
    2181             :                 PLANE_TYPE_UV,
    2182             :                 txsize_uv,
    2183             :                 tx_type_uv,
    2184             :                 cr_txb_skip_context,
    2185             :                 cr_dc_sign_context,
    2186             :                 reducedTransformSetFlag);
    2187             :         }
    2188             :         else {
    2189     8440150 :             *cr_tu_coeff_bits = av1_cost_skip_txb(
    2190             :                 allow_update_cdf,
    2191             :                 ec_ctx,
    2192             :                 candidate_buffer_ptr,
    2193             :                 txsize_uv,
    2194             :                 PLANE_TYPE_UV,
    2195             :                 cr_txb_skip_context);
    2196             :         }
    2197             :     }
    2198             : 
    2199    77842000 :     return return_error;
    2200             : }
    2201             : 
    2202             : /*********************************************************************************
    2203             : * av1_intra_full_cost function is used to estimate the cost of an intra candidate mode
    2204             : * for full mode decisoion module.
    2205             : *
    2206             : *   @param *cu_ptr(input)
    2207             : *       cu_ptr is the pointer of the target CU.
    2208             : *   @param *candidate_buffer_ptr(input)
    2209             : *       chromaBufferPtr is the buffer pointer of the candidate luma mode.
    2210             : *   @param qp(input)
    2211             : *       qp is the quantizer parameter.
    2212             : *   @param luma_distortion (input)
    2213             : *       luma_distortion is the intra condidate luma distortion.
    2214             : *   @param lambda(input)
    2215             : *       lambda is the Lagrange multiplier
    2216             : **********************************************************************************/
    2217    36879900 : EbErrorType Av1FullCost(
    2218             :     PictureControlSet                    *picture_control_set_ptr,
    2219             :     ModeDecisionContext                  *context_ptr,
    2220             :     struct ModeDecisionCandidateBuffer   *candidate_buffer_ptr,
    2221             :     CodingUnit                           *cu_ptr,
    2222             :     uint64_t                               *y_distortion,
    2223             :     uint64_t                               *cb_distortion,
    2224             :     uint64_t                               *cr_distortion,
    2225             :     uint64_t                                lambda,
    2226             :     uint64_t                               *y_coeff_bits,
    2227             :     uint64_t                               *cb_coeff_bits,
    2228             :     uint64_t                               *cr_coeff_bits,
    2229             :     BlockSize                               bsize)
    2230             : {
    2231             :     UNUSED(picture_control_set_ptr);
    2232             :     UNUSED(bsize);
    2233             :     UNUSED(cu_ptr);
    2234    36879900 :     EbErrorType return_error = EB_ErrorNone;
    2235             : 
    2236             :     // Luma and chroma rate
    2237    36879900 :     uint64_t lumaRate = 0;
    2238    36879900 :     uint64_t chromaRate = 0;
    2239    36879900 :     uint64_t coeffRate = 0;
    2240             : 
    2241             :     // Luma and chroma SSE
    2242             :     uint64_t luma_sse;
    2243             :     uint64_t chromaSse;
    2244             :     uint64_t totalDistortion;
    2245             :     uint64_t rate;
    2246             : 
    2247             :     //Estimate the rate of the transform type and coefficient for Luma
    2248             :     // Add fast rate to get the total rate of the subject mode
    2249    36879900 :     lumaRate += candidate_buffer_ptr->candidate_ptr->fast_luma_rate;
    2250    36879900 :     chromaRate += candidate_buffer_ptr->candidate_ptr->fast_chroma_rate;
    2251             : 
    2252             :     // For CFL, costs of alphas are not computed in fast loop, since they are computed in the full loop. The rate costs are added to the full loop.
    2253             :     // In fast loop CFL alphas are not know yet. The chroma mode bits are calculated based on DC Mode, and if CFL is the winner compared to CFL, ChromaBits are updated in Full loop
    2254    36879900 :     if (context_ptr->blk_geom->has_uv) {
    2255    32548800 :         if (candidate_buffer_ptr->candidate_ptr->type == INTRA_MODE && candidate_buffer_ptr->candidate_ptr->intra_chroma_mode == UV_CFL_PRED) {
    2256     7634110 :             EbBool isCflAllowed = (context_ptr->blk_geom->bwidth <= 32 &&
    2257     3817060 :                 context_ptr->blk_geom->bheight <= 32) ? 1 : 0;
    2258             : 
    2259     3817060 :             chromaRate += candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->cfl_alpha_fac_bits[candidate_buffer_ptr->candidate_ptr->cfl_alpha_signs][CFL_PRED_U][CFL_IDX_U(candidate_buffer_ptr->candidate_ptr->cfl_alpha_idx)] +
    2260     3817060 :                 candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->cfl_alpha_fac_bits[candidate_buffer_ptr->candidate_ptr->cfl_alpha_signs][CFL_PRED_V][CFL_IDX_V(candidate_buffer_ptr->candidate_ptr->cfl_alpha_idx)];
    2261             : 
    2262     3817060 :             chromaRate += (uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->intra_uv_mode_fac_bits[isCflAllowed][candidate_buffer_ptr->candidate_ptr->intra_luma_mode][UV_CFL_PRED];
    2263     3817060 :             chromaRate -= (uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->intra_uv_mode_fac_bits[isCflAllowed][candidate_buffer_ptr->candidate_ptr->intra_luma_mode][UV_DC_PRED];
    2264             :         }
    2265             :     }
    2266             : 
    2267             : #if ENHANCE_ATB
    2268    36879900 :     uint64_t tx_size_bits = 0;
    2269    36879900 :     if (picture_control_set_ptr->parent_pcs_ptr->frm_hdr.tx_mode == TX_MODE_SELECT)
    2270     7567970 :         tx_size_bits = get_tx_size_bits(
    2271             :             candidate_buffer_ptr,
    2272             :             context_ptr,
    2273             :             picture_control_set_ptr,
    2274     7567970 :             candidate_buffer_ptr->candidate_ptr->tx_depth,
    2275     7567970 :             candidate_buffer_ptr->candidate_ptr->block_has_coeff);
    2276             : #endif
    2277             : 
    2278             :     // Coeff rate
    2279             : 
    2280    67876900 :     if (context_ptr->blk_skip_decision && candidate_buffer_ptr->candidate_ptr->type != INTRA_MODE) {
    2281             : #if ENHANCE_ATB
    2282    30980500 :         uint64_t non_skip_cost = RDCOST(lambda, (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + tx_size_bits + (uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->skip_fac_bits[cu_ptr->skip_coeff_context][0]), (y_distortion[0] + cb_distortion[0] + cr_distortion[0]));
    2283             : #else
    2284             :         uint64_t non_skip_cost = RDCOST(lambda, (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + (uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->skip_fac_bits[cu_ptr->skip_coeff_context][0]), (y_distortion[0] + cb_distortion[0] + cr_distortion[0]));
    2285             : #endif
    2286    30980500 :         uint64_t skip_cost = RDCOST(lambda, ((uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->skip_fac_bits[cu_ptr->skip_coeff_context][1]), (y_distortion[1] + cb_distortion[1] + cr_distortion[1]));
    2287    30980500 :         if ((candidate_buffer_ptr->candidate_ptr->block_has_coeff == 0) || (skip_cost < non_skip_cost)) {
    2288    24338500 :             y_distortion[0] = y_distortion[1];
    2289    24338500 :             cb_distortion[0] = cb_distortion[1];
    2290    24338500 :             cr_distortion[0] = cr_distortion[1];
    2291    24338500 :             candidate_buffer_ptr->candidate_ptr->block_has_coeff = 0;
    2292             :         }
    2293    30980500 :         if (candidate_buffer_ptr->candidate_ptr->block_has_coeff)
    2294     6652490 :             coeffRate = (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + (uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->skip_fac_bits[cu_ptr->skip_coeff_context][0]);
    2295             :         else
    2296    24328000 :             coeffRate = MIN((uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->skip_fac_bits[cu_ptr->skip_coeff_context][1],
    2297             :             (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + (uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->skip_fac_bits[cu_ptr->skip_coeff_context][0]));
    2298             :     }
    2299             :     else
    2300     5915980 :         coeffRate = (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + (uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->skip_fac_bits[cu_ptr->skip_coeff_context][0]);
    2301    36896400 :     luma_sse = y_distortion[0];
    2302    36896400 :     chromaSse = cb_distortion[0] + cr_distortion[0];
    2303    36896400 :     totalDistortion = luma_sse + chromaSse;
    2304             : 
    2305    36896400 :     rate = lumaRate + chromaRate + coeffRate;
    2306             : #if ENHANCE_ATB
    2307    36896400 :     if (candidate_buffer_ptr->candidate_ptr->block_has_coeff)
    2308    11598200 :         rate += tx_size_bits;
    2309             : #endif
    2310             : 
    2311             : #if TWO_PASS
    2312    36896400 :     if (picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->use_output_stat_file && candidate_buffer_ptr->candidate_ptr->type != INTRA_MODE) {
    2313             : #if TWO_PASS_IMPROVEMENT
    2314           0 :         two_pass_cost_update_64bit(
    2315             :             picture_control_set_ptr,
    2316             :             candidate_buffer_ptr->candidate_ptr,
    2317             :             &rate,
    2318             :             &totalDistortion);
    2319             : #else
    2320             :         MvReferenceFrame ref_type[2];
    2321             :         av1_set_ref_frame(ref_type, candidate_buffer_ptr->candidate_ptr->ref_frame_type);
    2322             :         if ((candidate_buffer_ptr->candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME || ref_type[1] != BWDREF_FRAME)) ||
    2323             :             (!candidate_buffer_ptr->candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME && ref_type[0] != BWDREF_FRAME))) {
    2324             :             rate += rate * FIRST_PASS_COST_PENALTY / 100;
    2325             :             totalDistortion += totalDistortion * FIRST_PASS_COST_PENALTY / 100;
    2326             :         }
    2327             : #endif
    2328             :     }
    2329             : #endif
    2330             :     // Assign full cost
    2331    36895400 :     *(candidate_buffer_ptr->full_cost_ptr) = RDCOST(lambda, rate, totalDistortion);
    2332             : 
    2333    36895400 :     candidate_buffer_ptr->full_lambda_rate = *candidate_buffer_ptr->full_cost_ptr - totalDistortion;
    2334    36895400 :     coeffRate = *y_coeff_bits;
    2335    36895400 :     candidate_buffer_ptr->full_cost_luma = RDCOST(lambda, lumaRate + *y_coeff_bits, luma_sse);
    2336             : 
    2337    36895400 :     return return_error;
    2338             : }
    2339             : 
    2340             : /*********************************************************************************
    2341             : * merge_skip_full_cost function is used to estimate the cost of an AMVPSkip candidate
    2342             : * mode for full mode decisoion module.
    2343             : *
    2344             : *   @param *cu_ptr(input)
    2345             : *       cu_ptr is the pointer of the target CU.
    2346             : *   @param *candidate_buffer_ptr(input)
    2347             : *       chromaBufferPtr is the buffer pointer of the candidate luma mode.
    2348             : *   @param qp(input)
    2349             : *       qp is the quantizer parameter.
    2350             : *   @param luma_distortion (input)
    2351             : *       luma_distortion is the inter condidate luma distortion.
    2352             : *   @param lambda(input)
    2353             : *       lambda is the Lagrange multiplier
    2354             : **********************************************************************************/
    2355      652229 : EbErrorType  Av1MergeSkipFullCost(
    2356             :     PictureControlSet                    *picture_control_set_ptr,
    2357             :     ModeDecisionContext                  *context_ptr,
    2358             :     struct ModeDecisionCandidateBuffer   *candidate_buffer_ptr,
    2359             :     CodingUnit                           *cu_ptr,
    2360             :     uint64_t                               *y_distortion,
    2361             :     uint64_t                               *cb_distortion,
    2362             :     uint64_t                               *cr_distortion,
    2363             :     uint64_t                                lambda,
    2364             :     uint64_t                               *y_coeff_bits,
    2365             :     uint64_t                               *cb_coeff_bits,
    2366             :     uint64_t                               *cr_coeff_bits,
    2367             :     BlockSize                               bsize)
    2368             : {
    2369             :     UNUSED(bsize);
    2370             :     UNUSED(context_ptr);
    2371             :     UNUSED(picture_control_set_ptr);
    2372             : 
    2373      652229 :     EbErrorType  return_error = EB_ErrorNone;
    2374      652229 :     uint64_t skipModeCtx = cu_ptr->skip_flag_context;
    2375      652229 :     uint64_t mergeRate = 0;
    2376      652229 :     uint64_t skipRate = 0;
    2377             :     // Merge
    2378             :     //uint64_t mergeChromaRate;
    2379             :     uint64_t mergeDistortion;
    2380             :     uint64_t merge_cost;
    2381             :     //uint64_t mergeLumaCost;
    2382             :     uint64_t mergeLumaSse;
    2383             :     uint64_t mergeChromaSse;
    2384             :     uint64_t coeffRate;
    2385             :     //uint64_t lumaCoeffRate;
    2386             : 
    2387             :     // SKIP
    2388             :     uint64_t skipDistortion;
    2389             :     uint64_t skip_cost;
    2390             :     //uint64_t skipLumaCost;
    2391             : 
    2392             :     // Luma and chroma transform size shift for the distortion
    2393             :     uint64_t skipLumaSse;
    2394             :     uint64_t skipChromaSse;
    2395             : 
    2396      652229 :     uint64_t skipModeRate = candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->skip_mode_fac_bits[skipModeCtx][1];
    2397             : 
    2398             :     // Coeff rate
    2399      652229 :     coeffRate = (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits);
    2400             : 
    2401             :     // Compute Merge Cost
    2402      652229 :     mergeLumaSse = y_distortion[0] << AV1_COST_PRECISION;
    2403      652229 :     mergeChromaSse = (cb_distortion[0] + cr_distortion[0]) << AV1_COST_PRECISION;
    2404             : 
    2405      652229 :     skipLumaSse = y_distortion[1] << AV1_COST_PRECISION;
    2406      652229 :     skipChromaSse = (cb_distortion[1] + cr_distortion[1]) << AV1_COST_PRECISION;
    2407             : 
    2408             :     // *Note - As in JCTVC-G1102, the JCT-VC uses the Mode Decision forumula where the chromaSse has been weighted
    2409             :     //  CostMode = (luma_sse + wchroma * chromaSse) + lambdaSse * rateMode
    2410             : 
    2411             :     //if (picture_control_set_ptr->parent_pcs_ptr->pred_structure == EB_PRED_RANDOM_ACCESS) {
    2412             :     //    // Random Access
    2413             :     //    if (picture_control_set_ptr->temporal_layer_index == 0) {
    2414             :     //        mergeChromaSse = (((mergeChromaSse * chroma_weight_factor_ra[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
    2415             :     //    }
    2416             :     //    else if (picture_control_set_ptr->temporal_layer_index < 3) {
    2417             :     //        mergeChromaSse = (((mergeChromaSse * chroma_weight_factor_ra_qp_scaling_l1[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
    2418             :     //    }
    2419             :     //    else {
    2420             :     //        mergeChromaSse = (((mergeChromaSse * chroma_weight_factor_ra_qp_scaling_l3[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
    2421             :     //    }
    2422             :     //}
    2423             :     //else {
    2424             :     //    // Low delay
    2425             :     //    if (picture_control_set_ptr->temporal_layer_index == 0) {
    2426             :     //        mergeChromaSse = (((mergeChromaSse * chroma_weight_factor_ld[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
    2427             :     //    }
    2428             :     //    else {
    2429             :     //        mergeChromaSse = (((mergeChromaSse * chroma_weight_factor_ld_qp_scaling[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
    2430             :     //    }
    2431             :     //}
    2432             : 
    2433             :     // Add fast rate to get the total rate of the subject mode
    2434      652229 :     mergeRate += candidate_buffer_ptr->candidate_ptr->fast_luma_rate;
    2435      652229 :     mergeRate += candidate_buffer_ptr->candidate_ptr->fast_chroma_rate;
    2436             : 
    2437      652229 :     mergeRate += coeffRate;
    2438             : #if ENHANCE_ATB
    2439      652229 :     uint64_t tx_size_bits = 0;
    2440      652229 :     if (picture_control_set_ptr->parent_pcs_ptr->frm_hdr.tx_mode == TX_MODE_SELECT)
    2441           0 :         tx_size_bits = get_tx_size_bits(
    2442             :             candidate_buffer_ptr,
    2443             :             context_ptr,
    2444             :             picture_control_set_ptr,
    2445           0 :             candidate_buffer_ptr->candidate_ptr->tx_depth,
    2446           0 :             candidate_buffer_ptr->candidate_ptr->block_has_coeff);
    2447      652285 :     mergeRate += tx_size_bits;
    2448             : #endif
    2449             : 
    2450      652285 :     mergeDistortion = (mergeLumaSse + mergeChromaSse);
    2451             : 
    2452             :     //merge_cost = mergeDistortion + (((lambda * coeffRate + lambda * mergeLumaRate + lambda_chroma * mergeChromaRate) + MD_OFFSET) >> MD_SHIFT);
    2453             : 
    2454      652285 :     merge_cost = RDCOST(lambda, mergeRate, mergeDistortion);
    2455             :     // mergeLumaCost = mergeLumaSse    + (((lambda * lumaCoeffRate + lambda * mergeLumaRate) + MD_OFFSET) >> MD_SHIFT);
    2456             : 
    2457             :     // *Note - As in JCTVC-G1102, the JCT-VC uses the Mode Decision forumula where the chromaSse has been weighted
    2458             :     //  CostMode = (luma_sse + wchroma * chromaSse) + lambdaSse * rateMode
    2459             : 
    2460             :     //if (picture_control_set_ptr->parent_pcs_ptr->pred_structure == EB_PRED_RANDOM_ACCESS) {
    2461             :     //    if (picture_control_set_ptr->temporal_layer_index == 0) {
    2462             :     //        skipChromaSse = (((skipChromaSse * chroma_weight_factor_ra[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
    2463             :     //    }
    2464             :     //    else if (picture_control_set_ptr->temporal_layer_index < 3) {
    2465             :     //        skipChromaSse = (((skipChromaSse * chroma_weight_factor_ra_qp_scaling_l1[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
    2466             :     //    }
    2467             :     //    else {
    2468             :     //        skipChromaSse = (((skipChromaSse * chroma_weight_factor_ra_qp_scaling_l3[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
    2469             :     //    }
    2470             :     //}
    2471             :     //else {
    2472             :     //    // Low Delay
    2473             :     //    if (picture_control_set_ptr->temporal_layer_index == 0) {
    2474             :     //        skipChromaSse = (((skipChromaSse * chroma_weight_factor_ld[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
    2475             :     //    }
    2476             :     //    else {
    2477             :     //        skipChromaSse = (((skipChromaSse * chroma_weight_factor_ld_qp_scaling[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
    2478             :     //    }
    2479             :     //}
    2480             : 
    2481      652285 :     skipDistortion = skipLumaSse + skipChromaSse;
    2482      652285 :     skipRate = skipModeRate;
    2483      652285 :     skip_cost = RDCOST(lambda, skipRate, skipDistortion);
    2484             : #if TWO_PASS
    2485      652285 :     if (picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->use_output_stat_file) {
    2486             :         MvReferenceFrame ref_type[2];
    2487           0 :         av1_set_ref_frame(ref_type, candidate_buffer_ptr->candidate_ptr->ref_frame_type);
    2488           0 :         if ((candidate_buffer_ptr->candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME || ref_type[1] != BWDREF_FRAME)) ||
    2489           0 :             (!candidate_buffer_ptr->candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME && ref_type[0] != BWDREF_FRAME))) {
    2490           0 :             skip_cost += skip_cost * FIRST_PASS_COST_PENALTY / 100;
    2491           0 :             merge_cost += merge_cost * FIRST_PASS_COST_PENALTY / 100;
    2492             :         }
    2493             : #if TWO_PASS_IMPROVEMENT
    2494           0 :         EbReferenceObject  *refObjL1 = (EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[REF_LIST_1][0]->object_ptr;
    2495           0 :         if (picture_control_set_ptr->slice_type == B_SLICE &&
    2496           0 :             (candidate_buffer_ptr->candidate_ptr->is_compound || ref_type[0] == BWDREF_FRAME)
    2497           0 :             && refObjL1->slice_type == I_SLICE && refObjL1->ref_poc > picture_control_set_ptr->picture_number) {
    2498           0 :             skip_cost += skip_cost * 2;
    2499           0 :             merge_cost += merge_cost * 2;
    2500             :         }
    2501             : #endif
    2502             :     }
    2503             : #endif
    2504             :     // Assigne full cost
    2505      652285 :     *candidate_buffer_ptr->full_cost_ptr = (skip_cost <= merge_cost) ? skip_cost : merge_cost;
    2506             : 
    2507             :     uint64_t tempDistortion;
    2508      652285 :     tempDistortion = (skip_cost <= merge_cost) ? skipDistortion : mergeDistortion;
    2509      652285 :     candidate_buffer_ptr->full_lambda_rate = *candidate_buffer_ptr->full_cost_ptr - tempDistortion;
    2510      652285 :     *candidate_buffer_ptr->full_cost_merge_ptr = merge_cost;
    2511      652285 :     *candidate_buffer_ptr->full_cost_skip_ptr = skip_cost;
    2512             :     // Assigne merge flag
    2513      652285 :     candidate_buffer_ptr->candidate_ptr->merge_flag = EB_TRUE;
    2514             :     // Assigne skip flag
    2515             : 
    2516      652285 :     candidate_buffer_ptr->candidate_ptr->skip_flag = (skip_cost <= merge_cost) ? EB_TRUE : EB_FALSE;
    2517             : 
    2518             :     //CHKN:  skip_flag context is not accurate as MD does not keep skip info in sync with EncDec.
    2519             : 
    2520      652285 :     return return_error;
    2521             : }
    2522             : /*********************************************************************************
    2523             : * av1_intra_full_cost function is used to estimate the cost of an intra candidate mode
    2524             : * for full mode decisoion module.
    2525             : *
    2526             : *   @param *cu_ptr(input)
    2527             : *       cu_ptr is the pointer of the target CU.
    2528             : *   @param *candidate_buffer_ptr(input)
    2529             : *       chromaBufferPtr is the buffer pointer of the candidate luma mode.
    2530             : *   @param qp(input)
    2531             : *       qp is the quantizer parameter.
    2532             : *   @param luma_distortion (input)
    2533             : *       luma_distortion is the intra condidate luma distortion.
    2534             : *   @param lambda(input)
    2535             : *       lambda is the Lagrange multiplier
    2536             : **********************************************************************************/
    2537     5853270 : EbErrorType av1_intra_full_cost(
    2538             :     PictureControlSet                    *picture_control_set_ptr,
    2539             :     ModeDecisionContext                  *context_ptr,
    2540             :     struct ModeDecisionCandidateBuffer   *candidate_buffer_ptr,
    2541             :     CodingUnit                           *cu_ptr,
    2542             :     uint64_t                                 *y_distortion,
    2543             :     uint64_t                                 *cb_distortion,
    2544             :     uint64_t                                 *cr_distortion,
    2545             :     uint64_t                                  lambda,
    2546             :     uint64_t                                 *y_coeff_bits,
    2547             :     uint64_t                                 *cb_coeff_bits,
    2548             :     uint64_t                                 *cr_coeff_bits,
    2549             :     BlockSize                              bsize)
    2550             : 
    2551             : {
    2552     5853270 :     EbErrorType return_error = EB_ErrorNone;
    2553             : 
    2554     5853270 :     Av1FullCost(
    2555             :         picture_control_set_ptr,
    2556             :         context_ptr,
    2557             :         candidate_buffer_ptr,
    2558             :         cu_ptr,
    2559             :         y_distortion,
    2560             :         cb_distortion,
    2561             :         cr_distortion,
    2562             :         lambda,
    2563             :         y_coeff_bits,
    2564             :         cb_coeff_bits,
    2565             :         cr_coeff_bits,
    2566             :         bsize);
    2567             : 
    2568     5853160 :     return return_error;
    2569             : }
    2570             : 
    2571             : /*********************************************************************************
    2572             : * av1_inter_full_cost function is used to estimate the cost of an inter candidate mode
    2573             : * for full mode decisoion module in inter frames.
    2574             : *
    2575             : *   @param *cu_ptr(input)
    2576             : *       cu_ptr is the pointer of the target CU.
    2577             : *   @param *candidate_buffer_ptr(input)
    2578             : *       chromaBufferPtr is the buffer pointer of the candidate luma mode.
    2579             : *   @param qp(input)
    2580             : *       qp is the quantizer parameter.
    2581             : *   @param luma_distortion (input)
    2582             : *       luma_distortion is the inter condidate luma distortion.
    2583             : *   @param lambda(input)
    2584             : *       lambda is the Lagrange multiplier
    2585             : **********************************************************************************/
    2586    31690400 : EbErrorType av1_inter_full_cost(
    2587             :     PictureControlSet                    *picture_control_set_ptr,
    2588             :     ModeDecisionContext                  *context_ptr,
    2589             :     struct ModeDecisionCandidateBuffer   *candidate_buffer_ptr,
    2590             :     CodingUnit                           *cu_ptr,
    2591             :     uint64_t                                 *y_distortion,
    2592             :     uint64_t                                 *cb_distortion,
    2593             :     uint64_t                                 *cr_distortion,
    2594             :     uint64_t                                  lambda,
    2595             :     uint64_t                                 *y_coeff_bits,
    2596             :     uint64_t                                 *cb_coeff_bits,
    2597             :     uint64_t                                 *cr_coeff_bits,
    2598             :     BlockSize                              bsize
    2599             : )
    2600             : {
    2601    31690400 :     EbErrorType  return_error = EB_ErrorNone;
    2602             : 
    2603    31690400 :     if (candidate_buffer_ptr->candidate_ptr->merge_flag == EB_TRUE) {
    2604      652242 :         Av1MergeSkipFullCost(
    2605             :             picture_control_set_ptr,
    2606             :             context_ptr,
    2607             :             candidate_buffer_ptr,
    2608             :             cu_ptr,
    2609             :             y_distortion,
    2610             :             cb_distortion,
    2611             :             cr_distortion,
    2612             :             lambda,
    2613             :             y_coeff_bits,
    2614             :             cb_coeff_bits,
    2615             :             cr_coeff_bits,
    2616             :             bsize);
    2617             :     }
    2618             :     else {
    2619    31038200 :         Av1FullCost(
    2620             :             picture_control_set_ptr,
    2621             :             context_ptr,
    2622             :             candidate_buffer_ptr,
    2623             :             cu_ptr,
    2624             :             y_distortion,
    2625             :             cb_distortion,
    2626             :             cr_distortion,
    2627             :             lambda,
    2628             :             y_coeff_bits,
    2629             :             cb_coeff_bits,
    2630             :             cr_coeff_bits,
    2631             :             bsize);
    2632             :     }
    2633    31696300 :     return return_error;
    2634             : }
    2635             : 
    2636             : /************************************************************
    2637             : * Coding Loop Context Generation
    2638             : ************************************************************/
    2639      811343 : void coding_loop_context_generation(
    2640             :     ModeDecisionContext      *context_ptr,
    2641             :     CodingUnit               *cu_ptr,
    2642             :     uint32_t                      cu_origin_x,
    2643             :     uint32_t                      cu_origin_y,
    2644             :     uint32_t                      sb_sz,
    2645             :     NeighborArrayUnit        *skip_coeff_neighbor_array,
    2646             :     NeighborArrayUnit        *inter_pred_dir_neighbor_array,
    2647             :     NeighborArrayUnit        *ref_frame_type_neighbor_array,
    2648             :     NeighborArrayUnit        *intra_luma_mode_neighbor_array,
    2649             :     NeighborArrayUnit        *skip_flag_neighbor_array,
    2650             :     NeighborArrayUnit        *mode_type_neighbor_array,
    2651             :     NeighborArrayUnit        *leaf_depth_neighbor_array,
    2652             :     NeighborArrayUnit       *leaf_partition_neighbor_array)
    2653             : {
    2654             :     (void)sb_sz;
    2655             :     UNUSED(ref_frame_type_neighbor_array);
    2656      811343 :     uint32_t modeTypeLeftNeighborIndex = get_neighbor_array_unit_left_index(
    2657             :         mode_type_neighbor_array,
    2658             :         cu_origin_y);
    2659      811342 :     uint32_t modeTypeTopNeighborIndex = get_neighbor_array_unit_top_index(
    2660             :         mode_type_neighbor_array,
    2661             :         cu_origin_x);
    2662      811361 :     uint32_t leafDepthLeftNeighborIndex = get_neighbor_array_unit_left_index(
    2663             :         leaf_depth_neighbor_array,
    2664             :         cu_origin_y);
    2665      811385 :     uint32_t leafDepthTopNeighborIndex = get_neighbor_array_unit_top_index(
    2666             :         leaf_depth_neighbor_array,
    2667             :         cu_origin_x);
    2668      811397 :     uint32_t skipFlagLeftNeighborIndex = get_neighbor_array_unit_left_index(
    2669             :         skip_flag_neighbor_array,
    2670             :         cu_origin_y);
    2671      811400 :     uint32_t skipFlagTopNeighborIndex = get_neighbor_array_unit_top_index(
    2672             :         skip_flag_neighbor_array,
    2673             :         cu_origin_x);
    2674      811409 :     uint32_t intraLumaModeLeftNeighborIndex = get_neighbor_array_unit_left_index(
    2675             :         intra_luma_mode_neighbor_array,
    2676             :         cu_origin_y);
    2677      811399 :     uint32_t intraLumaModeTopNeighborIndex = get_neighbor_array_unit_top_index(
    2678             :         intra_luma_mode_neighbor_array,
    2679             :         cu_origin_x);
    2680             : 
    2681      811385 :     uint32_t partition_left_neighbor_index = get_neighbor_array_unit_left_index(
    2682             :         leaf_partition_neighbor_array,
    2683             :         cu_origin_y);
    2684      811381 :     uint32_t partition_above_neighbor_index = get_neighbor_array_unit_top_index(
    2685             :         leaf_partition_neighbor_array,
    2686             :         cu_origin_x);
    2687             : 
    2688             :     // Intra Luma Neighbor Modes
    2689             : 
    2690      736610 :     cu_ptr->prediction_unit_array->intra_luma_left_mode = (uint32_t)(
    2691      811379 :         (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] != INTRA_MODE) ? (uint32_t)DC_PRED :
    2692       74769 :         intra_luma_mode_neighbor_array->left_array[intraLumaModeLeftNeighborIndex]);
    2693             : 
    2694      737531 :     cu_ptr->prediction_unit_array->intra_luma_top_mode = (uint32_t)(
    2695      811379 :         (mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] != INTRA_MODE) ? (uint32_t)DC_PRED :
    2696       73848 :         intra_luma_mode_neighbor_array->top_array[intraLumaModeTopNeighborIndex]);
    2697             : 
    2698             :     int32_t contextIndex;
    2699      811379 :     if (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] != (uint8_t)INVALID_MODE && mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] != (uint8_t)INVALID_MODE) {
    2700     1420890 :         contextIndex = (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] == (uint8_t)INTRA_MODE && mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] == (uint8_t)INTRA_MODE) ? 3 :
    2701      676253 :             (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] == (uint8_t)INTRA_MODE || mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] == (uint8_t)INTRA_MODE) ? 1 : 0;
    2702             :     }
    2703       66740 :     else  if (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] != (uint8_t)INVALID_MODE)
    2704       35458 :         contextIndex = (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] == (uint8_t)INTRA_MODE) ? 2 : 0;
    2705       31282 :     else if (mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] != (uint8_t)INVALID_MODE)
    2706       30046 :         contextIndex = (mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] == (uint8_t)INTRA_MODE) ? 2 : 0;
    2707             :     else
    2708        1236 :         contextIndex = 0;
    2709      811379 :     cu_ptr->is_inter_ctx = contextIndex;
    2710             :     //  if(cu_ptr->is_inter_ctx!=0) //
    2711             :     //      printf("ctx:%i \n",cu_ptr->is_inter_ctx);
    2712             : 
    2713             :       //   Top Intra Mode Neighbor Array instead of a Full
    2714             :       // Skip Flag Context
    2715      811379 :     cu_ptr->skip_flag_context =
    2716     1591470 :         (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] == (uint8_t)INVALID_MODE) ? 0 :
    2717      780094 :         (skip_flag_neighbor_array->left_array[skipFlagLeftNeighborIndex] == EB_TRUE) ? 1 : 0;
    2718     1622760 :     cu_ptr->skip_flag_context +=
    2719     1586070 :         (mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] == (uint8_t)INVALID_MODE) ? 0 :
    2720      774688 :         (skip_flag_neighbor_array->top_array[skipFlagTopNeighborIndex] == EB_TRUE) ? 1 : 0;
    2721             : 
    2722             :     // Split Flag Context (neighbor info)
    2723      736609 :     context_ptr->md_local_cu_unit[cu_ptr->mds_idx].left_neighbor_mode = (uint32_t)(
    2724      811379 :         (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] != INTRA_MODE) ? (uint32_t)DC_PRED :
    2725       74770 :         intra_luma_mode_neighbor_array->left_array[intraLumaModeLeftNeighborIndex]);
    2726      811379 :     context_ptr->md_local_cu_unit[cu_ptr->mds_idx].left_neighbor_depth = leaf_depth_neighbor_array->left_array[leafDepthLeftNeighborIndex];
    2727      737530 :     context_ptr->md_local_cu_unit[cu_ptr->mds_idx].top_neighbor_mode = (uint32_t)(
    2728      811379 :         (mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] != INTRA_MODE) ? (uint32_t)DC_PRED :
    2729       73849 :         intra_luma_mode_neighbor_array->top_array[intraLumaModeTopNeighborIndex]);
    2730      811379 :     context_ptr->md_local_cu_unit[cu_ptr->mds_idx].top_neighbor_depth = leaf_depth_neighbor_array->top_array[leafDepthTopNeighborIndex];
    2731             : 
    2732             :     // Generate Partition context
    2733      811379 :     context_ptr->md_local_cu_unit[cu_ptr->mds_idx].above_neighbor_partition = (((PartitionContext*)leaf_partition_neighbor_array->top_array)[partition_above_neighbor_index].above == (int8_t)INVALID_NEIGHBOR_DATA) ?
    2734      774706 :         0 : ((PartitionContext*)leaf_partition_neighbor_array->top_array)[partition_above_neighbor_index].above;
    2735             : 
    2736      811379 :     context_ptr->md_local_cu_unit[cu_ptr->mds_idx].left_neighbor_partition = (((PartitionContext*)leaf_partition_neighbor_array->left_array)[partition_left_neighbor_index].left == (int8_t)INVALID_NEIGHBOR_DATA) ?
    2737      780116 :         0 : ((PartitionContext*)leaf_partition_neighbor_array->left_array)[partition_left_neighbor_index].left;
    2738             :     // Skip Coeff AV1 Context
    2739      811379 :     uint32_t skipCoeffLeftNeighborIndex = get_neighbor_array_unit_left_index(
    2740             :         skip_coeff_neighbor_array,
    2741             :         cu_origin_y);
    2742      811400 :     uint32_t skipCoeffTopNeighborIndex = get_neighbor_array_unit_top_index(
    2743             :         skip_coeff_neighbor_array,
    2744             :         cu_origin_x);
    2745             : 
    2746      811395 :     cu_ptr->skip_coeff_context =
    2747      811395 :         (skip_coeff_neighbor_array->left_array[skipCoeffLeftNeighborIndex] == (uint8_t)INVALID_NEIGHBOR_DATA) ? 0 :
    2748           0 :         (skip_coeff_neighbor_array->left_array[skipCoeffLeftNeighborIndex]) ? 1 : 0;
    2749             : 
    2750     1622790 :     cu_ptr->skip_coeff_context +=
    2751      811395 :         (skip_coeff_neighbor_array->top_array[skipCoeffTopNeighborIndex] == (uint8_t)INVALID_NEIGHBOR_DATA) ? 0 :
    2752           0 :         (skip_coeff_neighbor_array->top_array[skipCoeffTopNeighborIndex]) ? 1 : 0;
    2753             :     // Generate reference mode context
    2754             : 
    2755      811395 :     cu_ptr->reference_mode_context = (uint8_t)eb_av1_get_reference_mode_context(
    2756             :         cu_origin_x,
    2757             :         cu_origin_y,
    2758             :         mode_type_neighbor_array,
    2759             :         inter_pred_dir_neighbor_array);
    2760             : 
    2761      811366 :     cu_ptr->compoud_reference_type_context = (uint8_t)eb_av1_get_comp_reference_type_context(
    2762             :         cu_origin_x,
    2763             :         cu_origin_y,
    2764             :         mode_type_neighbor_array,
    2765             :         inter_pred_dir_neighbor_array);
    2766             : 
    2767             :     //Collect Neighbor ref cout
    2768      811366 :     av1_collect_neighbors_ref_counts_new(cu_ptr->av1xd);
    2769             : 
    2770      811355 :     return;
    2771             : }
    2772             : 
    2773             : /********************************************
    2774             : * tu_calc_cost
    2775             : *   computes TU Cost and generetes TU Cbf
    2776             : ********************************************/
    2777    17194900 : EbErrorType av1_tu_calc_cost(
    2778             :     ModeDecisionCandidate *candidate_ptr,                        // input parameter, prediction result Ptr
    2779             :     int16_t                   txb_skip_ctx,
    2780             :     uint32_t                   tu_index,                             // input parameter, TU index inside the CU
    2781             :     uint32_t                   y_count_non_zero_coeffs,                 // input parameter, number of non zero Y quantized coefficients
    2782             :     uint32_t                   cb_count_non_zero_coeffs,                // input parameter, number of non zero cb quantized coefficients
    2783             :     uint32_t                   cr_count_non_zero_coeffs,                // input parameter, number of non zero cr quantized coefficients
    2784             :     uint64_t                   y_tu_distortion[DIST_CALC_TOTAL],      // input parameter, Y distortion for both Normal and Cbf zero modes
    2785             :     uint64_t                   cb_tu_distortion[DIST_CALC_TOTAL],     // input parameter, Cb distortion for both Normal and Cbf zero modes
    2786             :     uint64_t                   cr_tu_distortion[DIST_CALC_TOTAL],     // input parameter, Cr distortion for both Normal and Cbf zero modes
    2787             :     COMPONENT_TYPE           component_type,
    2788             :     uint64_t                  *y_tu_coeff_bits,                        // input parameter, Y quantized coefficients rate
    2789             :     uint64_t                  *cb_tu_coeff_bits,                       // input parameter, Cb quantized coefficients rate
    2790             :     uint64_t                  *cr_tu_coeff_bits,                       // input parameter, Cr quantized coefficients rate
    2791             :     TxSize                  txsize,
    2792             :     uint64_t                   lambda)                              // input parameter, lambda for Luma
    2793             : 
    2794             : {
    2795             :     (void)cr_tu_coeff_bits;
    2796             :     (void)cb_tu_coeff_bits;
    2797             :     (void)cr_tu_distortion;
    2798             :     (void)cb_tu_distortion;
    2799    17194900 :     EbErrorType return_error = EB_ErrorNone;
    2800             :     // Non Zero coeff mode variables
    2801    17194900 :     uint64_t y_nonzero_coeff_distortion = y_tu_distortion[DIST_CALC_RESIDUAL];
    2802             :     uint64_t y_nonzero_coeff_rate;
    2803             : 
    2804    17194900 :     uint64_t y_nonzero_coeff_cost = 0;
    2805             : 
    2806             :     // Zero Cbf mode variables
    2807    17194900 :     uint64_t y_zero_coeff_distortion = y_tu_distortion[DIST_CALC_PREDICTION];
    2808             : 
    2809    17194900 :     uint64_t y_zero_coeff_luma_flag_bits_num = 0;
    2810             : 
    2811             :     uint64_t y_zero_coeff_rate;
    2812             : 
    2813    17194900 :     uint64_t y_zero_coeff_cost = 0;
    2814    17194900 :     if (component_type == COMPONENT_LUMA || component_type == COMPONENT_ALL) {
    2815             :         // Non Zero Distortion
    2816             :         // *Note - As of Oct 2011, the JCT-VC uses the PSNR forumula
    2817             :         //  PSNR = (LUMA_WEIGHT * PSNRy + PSNRu + PSNRv) / (2+LUMA_WEIGHT)
    2818           0 :         y_nonzero_coeff_distortion = LUMA_WEIGHT * (y_nonzero_coeff_distortion << AV1_COST_PRECISION);
    2819             : 
    2820             :         // Zero distortion
    2821             :         // *Note - As of Oct 2011, the JCT-VC uses the PSNR forumula
    2822             :         //  PSNR = (LUMA_WEIGHT * PSNRy + PSNRu + PSNRv) / (2+LUMA_WEIGHT)
    2823           0 :         y_zero_coeff_distortion = LUMA_WEIGHT * (y_zero_coeff_distortion << AV1_COST_PRECISION);
    2824             : 
    2825             :         // **Compute Rate
    2826             : 
    2827             :         // Esimate Cbf's Bits
    2828             : 
    2829           0 :         const TxSize txs_ctx = (TxSize)((txsize_sqr_map[txsize] + txsize_sqr_up_map[txsize] + 1) >> 1);
    2830           0 :         assert(txs_ctx < TX_SIZES);
    2831           0 :         const LvMapCoeffCost *const coeff_costs = &candidate_ptr->md_rate_estimation_ptr->coeff_fac_bits[txs_ctx][0];
    2832             : 
    2833           0 :         y_zero_coeff_luma_flag_bits_num = coeff_costs->txb_skip_cost[txb_skip_ctx][1];
    2834             : 
    2835           0 :         y_nonzero_coeff_rate = *y_tu_coeff_bits; // yNonZeroCbfLumaFlagBitsNum is already calculated inside y_tu_coeff_bits
    2836             : 
    2837           0 :         y_zero_coeff_rate = y_zero_coeff_luma_flag_bits_num;
    2838             : 
    2839             :         if (1)
    2840           0 :             y_zero_coeff_cost = 0xFFFFFFFFFFFFFFFFull;
    2841             :         else
    2842             :             y_zero_coeff_cost = RDCOST(lambda, y_zero_coeff_rate, y_zero_coeff_distortion);
    2843             :         // **Compute Cost
    2844           0 :         y_nonzero_coeff_cost = RDCOST(lambda, y_nonzero_coeff_rate, y_nonzero_coeff_distortion);
    2845             : 
    2846           0 :         candidate_ptr->y_has_coeff |= (((y_count_non_zero_coeffs != 0) && (y_nonzero_coeff_cost < y_zero_coeff_cost)) << tu_index);
    2847           0 :         *y_tu_coeff_bits = (y_nonzero_coeff_cost < y_zero_coeff_cost) ? *y_tu_coeff_bits : 0;
    2848           0 :         y_tu_distortion[DIST_CALC_RESIDUAL] = (y_nonzero_coeff_cost < y_zero_coeff_cost) ? y_tu_distortion[DIST_CALC_RESIDUAL] : y_tu_distortion[DIST_CALC_PREDICTION];
    2849             :         }
    2850    17194900 :     if (component_type == COMPONENT_CHROMA_CB || component_type == COMPONENT_CHROMA || component_type == COMPONENT_ALL)
    2851    12494100 :         candidate_ptr->u_has_coeff |= ((cb_count_non_zero_coeffs != 0) << tu_index);
    2852    17194900 :     if (component_type == COMPONENT_CHROMA_CR || component_type == COMPONENT_CHROMA || component_type == COMPONENT_ALL)
    2853    12131400 :         candidate_ptr->v_has_coeff |= ((cr_count_non_zero_coeffs != 0) << tu_index);
    2854    17194900 :     return return_error;
    2855             :     }
    2856             : 
    2857             : /********************************************
    2858             : * tu_calc_cost
    2859             : *   computes TU Cost and generetes TU Cbf
    2860             : ********************************************/
    2861             : 
    2862    48202900 : EbErrorType av1_tu_calc_cost_luma(
    2863             :     int16_t                   txb_skip_ctx,
    2864             :     ModeDecisionCandidate *candidate_ptr,                        // input parameter, prediction result Ptr
    2865             :     uint32_t                   tu_index,                             // input parameter, TU index inside the CU
    2866             :     TxSize                  tx_size,
    2867             :     uint32_t                   y_count_non_zero_coeffs,                 // input parameter, number of non zero Y quantized coefficients
    2868             :     uint64_t                   y_tu_distortion[DIST_CALC_TOTAL],      // input parameter, Y distortion for both Normal and Cbf zero modes
    2869             :     uint64_t                  *y_tu_coeff_bits,                        // input parameter, Y quantized coefficients rate
    2870             :     uint64_t                  *y_full_cost,
    2871             :     uint64_t                   lambda)                              // input parameter, lambda for Luma
    2872             : 
    2873             : {
    2874    48202900 :     EbErrorType return_error = EB_ErrorNone;
    2875             : 
    2876             :     // Non Zero Cbf mode variables
    2877    48202900 :     uint64_t yNonZeroCbfDistortion = y_tu_distortion[DIST_CALC_RESIDUAL];
    2878             : 
    2879             :     uint64_t yNonZeroCbfRate;
    2880             : 
    2881    48202900 :     uint64_t yNonZeroCbfCost = 0;
    2882             : 
    2883             :     // Zero Cbf mode variables
    2884    48202900 :     uint64_t yZeroCbfDistortion = y_tu_distortion[DIST_CALC_PREDICTION];
    2885             : 
    2886    48202900 :     uint64_t yZeroCbfLumaFlagBitsNum = 0;
    2887             : 
    2888             :     uint64_t yZeroCbfRate;
    2889             : 
    2890    48202900 :     uint64_t yZeroCbfCost = 0;
    2891             : 
    2892             :     // **Compute distortion
    2893             :     // Non Zero Distortion
    2894             :     // *Note - As of Oct 2011, the JCT-VC uses the PSNR forumula
    2895             :     //  PSNR = (LUMA_WEIGHT * PSNRy + PSNRu + PSNRv) / (2+LUMA_WEIGHT)
    2896    48202900 :     yNonZeroCbfDistortion = LUMA_WEIGHT * (yNonZeroCbfDistortion << AV1_COST_PRECISION);
    2897             : 
    2898             :     // Zero distortion
    2899             :     // *Note - As of Oct 2011, the JCT-VC uses the PSNR forumula
    2900             :     //  PSNR = (LUMA_WEIGHT * PSNRy + PSNRu + PSNRv) / (2+LUMA_WEIGHT)
    2901    48202900 :     yZeroCbfDistortion = LUMA_WEIGHT * (yZeroCbfDistortion << AV1_COST_PRECISION);
    2902             : 
    2903             :     // **Compute Rate
    2904             : 
    2905             :     // Esimate Cbf's Bits
    2906             : 
    2907    48202900 :     const TxSize txs_ctx = (TxSize)((txsize_sqr_map[tx_size] + txsize_sqr_up_map[tx_size] + 1) >> 1);
    2908    48202900 :     assert(txs_ctx < TX_SIZES);
    2909    48202900 :     const LvMapCoeffCost *const coeff_costs = &candidate_ptr->md_rate_estimation_ptr->coeff_fac_bits[txs_ctx][0];
    2910             : 
    2911    48202900 :     yZeroCbfLumaFlagBitsNum = coeff_costs->txb_skip_cost[txb_skip_ctx][1];
    2912             : 
    2913    48202900 :     yNonZeroCbfRate = *y_tu_coeff_bits; // yNonZeroCbfLumaFlagBitsNum is already calculated inside y_tu_coeff_bits
    2914             : 
    2915    48202900 :     yZeroCbfRate = yZeroCbfLumaFlagBitsNum;
    2916             : 
    2917             :     if (1)
    2918    48202900 :         yZeroCbfCost = 0xFFFFFFFFFFFFFFFFull;
    2919             :     else
    2920             :         yZeroCbfCost = RDCOST(lambda, yZeroCbfRate, yZeroCbfDistortion);
    2921             :     // **Compute Cost
    2922    48202900 :     yNonZeroCbfCost = RDCOST(lambda, yNonZeroCbfRate, yNonZeroCbfDistortion);
    2923    48202900 :     candidate_ptr->y_has_coeff |= ((y_count_non_zero_coeffs != 0) << tu_index);
    2924    48202900 :     *y_tu_coeff_bits = (yNonZeroCbfCost < yZeroCbfCost) ? *y_tu_coeff_bits : 0;
    2925    48202900 :     y_tu_distortion[DIST_CALC_RESIDUAL] = (yNonZeroCbfCost < yZeroCbfCost) ? y_tu_distortion[DIST_CALC_RESIDUAL] : y_tu_distortion[DIST_CALC_PREDICTION];
    2926             : 
    2927    48202900 :     *y_full_cost = MIN(yNonZeroCbfCost, yZeroCbfCost);
    2928             : 
    2929    48202900 :     return return_error;
    2930             :     }
    2931             : 
    2932             : //static INLINE int32_t partition_plane_context(const MacroBlockD *xd, int32_t mi_row,
    2933             : //    int32_t mi_col, BlockSize bsize) {
    2934             : //    const PartitionContextType *above_ctx = xd->above_seg_context + mi_col;
    2935             : //    const PartitionContextType *left_ctx =
    2936             : //        xd->left_seg_context + (mi_row & MAX_MIB_MASK);
    2937             : //    // Minimum partition point is 8x8. Offset the bsl accordingly.
    2938             : //    const int32_t bsl = mi_size_wide_log2[bsize] - mi_size_wide_log2[BLOCK_8X8];
    2939             : //    int32_t above = (*above_ctx >> bsl) & 1, left = (*left_ctx >> bsl) & 1;
    2940             : //
    2941             : //    assert(mi_size_wide_log2[bsize] == mi_size_high_log2[bsize]);
    2942             : //    assert(bsl >= 0);
    2943             : //
    2944             : //    return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
    2945             : //}
    2946             : 
    2947             : /*********************************************************************************
    2948             : * split_flag_rate function is used to generate the Split rate
    2949             : *
    2950             : *   @param *cu_ptr(input)
    2951             : *       cu_ptr is the pointer of the target CU.
    2952             : *   @param split_flag(input)
    2953             : *       split_flag is the split flag value.
    2954             : *   @param split_rate(output)
    2955             : *       split_rate contains rate.
    2956             : *   @param lambda(input)
    2957             : *       lambda is the Lagrange multiplier
    2958             : *   @param md_rate_estimation_ptr(input)
    2959             : *       md_rate_estimation_ptr is pointer to MD rate Estimation Tables
    2960             : **********************************************************************************/
    2961     1427770 : EbErrorType av1_split_flag_rate(
    2962             :     SequenceControlSet                  *sequence_control_set_ptr,
    2963             :     ModeDecisionContext                  *context_ptr,
    2964             :     CodingUnit                           *cu_ptr,
    2965             :     uint32_t                                  leaf_index,
    2966             :     PartitionType                          partitionType,
    2967             :     uint64_t                                 *split_rate,
    2968             :     uint64_t                                  lambda,
    2969             :     MdRateEstimationContext              *md_rate_estimation_ptr,
    2970             :     uint32_t                                  tb_max_depth)
    2971             : {
    2972             :     (void)tb_max_depth;
    2973             :     (void)leaf_index;
    2974             : 
    2975     1427770 :     const BlockGeom          *blk_geom = get_blk_geom_mds(cu_ptr->mds_idx);
    2976     1427680 :     EbErrorType return_error = EB_ErrorNone;
    2977             : 
    2978     1427680 :     uint32_t cu_origin_x = context_ptr->sb_origin_x + blk_geom->origin_x;
    2979     1427680 :     uint32_t cu_origin_y = context_ptr->sb_origin_y + blk_geom->origin_y;
    2980             : 
    2981     1427680 :     PartitionType p = partitionType;
    2982             : 
    2983     1427680 :     uint32_t cu_depth = blk_geom->depth;
    2984             :     UNUSED(cu_depth);
    2985     1427680 :     BlockSize bsize = blk_geom->bsize;
    2986     1427680 :     assert(bsize<BlockSizeS_ALL);
    2987     1427670 :     const int32_t is_partition_point = blk_geom->bsize >= BLOCK_8X8;
    2988             : 
    2989     1427670 :     if (is_partition_point) {
    2990     1427670 :         const int32_t hbs = (mi_size_wide[bsize] << 2) >> 1;
    2991     1427670 :         const int32_t hasRows = (cu_origin_y + hbs) < sequence_control_set_ptr->seq_header.max_frame_height;
    2992     1427670 :         const int32_t hasCols = (cu_origin_x + hbs) < sequence_control_set_ptr->seq_header.max_frame_width;
    2993             : 
    2994     1427670 :         uint32_t contextIndex = 0;
    2995             : 
    2996     1427670 :         const PartitionContextType left_ctx = context_ptr->md_local_cu_unit[cu_ptr->mds_idx].left_neighbor_partition == (int8_t)(INVALID_NEIGHBOR_DATA) ? 0 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].left_neighbor_partition;
    2997     1427670 :         const PartitionContextType above_ctx = context_ptr->md_local_cu_unit[cu_ptr->mds_idx].above_neighbor_partition == (int8_t)(INVALID_NEIGHBOR_DATA) ? 0 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].above_neighbor_partition;
    2998             : 
    2999     1427670 :         const int32_t bsl = mi_size_wide_log2[bsize] - mi_size_wide_log2[BLOCK_8X8];
    3000             : 
    3001     1427670 :         int32_t above = (above_ctx >> bsl) & 1, left = (left_ctx >> bsl) & 1;
    3002             : 
    3003     1427670 :         assert(mi_size_wide_log2[bsize] == mi_size_high_log2[bsize]);
    3004     1427670 :         assert(bsl >= 0);
    3005             : 
    3006     1427670 :         contextIndex = (left * 2 + above) + bsl * PARTITION_PLOFFSET;
    3007             : 
    3008     1427670 :         if (hasRows && hasCols) {
    3009     1294220 :             *split_rate = (uint64_t)md_rate_estimation_ptr->partition_fac_bits[contextIndex][partitionType];
    3010             : 
    3011             :         }
    3012      133452 :         else if (!hasRows && hasCols) {
    3013      133560 :             *split_rate = (uint64_t)md_rate_estimation_ptr->partition_fac_bits[2][p == PARTITION_SPLIT];
    3014             : 
    3015             :         }
    3016             :         else {
    3017           0 :             *split_rate = (uint64_t)md_rate_estimation_ptr->partition_fac_bits[2][p == PARTITION_SPLIT];
    3018             : 
    3019             :         }
    3020             :     }
    3021             :     else
    3022           0 :         *split_rate = (uint64_t)md_rate_estimation_ptr->partition_fac_bits[0][partitionType];
    3023     1427670 :     *split_rate = RDCOST(lambda, *split_rate, 0);
    3024             : 
    3025     1427670 :     return return_error;
    3026             : }
    3027             : 
    3028             : /********************************************
    3029             : * tu_calc_cost
    3030             : *   Computes TU Cost and generetes TU Cbf
    3031             : *   at the level of the encode pass
    3032             : ********************************************/
    3033       20940 : EbErrorType av1_encode_tu_calc_cost(
    3034             :     EncDecContext          *context_ptr,
    3035             :     uint32_t                   *count_non_zero_coeffs,
    3036             :     uint64_t                    y_tu_distortion[DIST_CALC_TOTAL],
    3037             :     uint64_t                   *y_tu_coeff_bits,
    3038             :     uint32_t                    component_mask
    3039             : )
    3040             : {
    3041       20940 :     CodingUnit              *cu_ptr = context_ptr->cu_ptr;
    3042       20940 :     uint32_t                     tu_index = context_ptr->txb_itr;
    3043       20940 :     MdRateEstimationContext *md_rate_estimation_ptr = context_ptr->md_rate_estimation_ptr;
    3044       20940 :     uint64_t                     lambda = context_ptr->full_lambda;
    3045       20940 :     uint32_t                     y_count_non_zero_coeffs = count_non_zero_coeffs[0];
    3046       20940 :     uint32_t                     cb_count_non_zero_coeffs = count_non_zero_coeffs[1];
    3047       20940 :     uint32_t                     cr_count_non_zero_coeffs = count_non_zero_coeffs[2];
    3048             : 
    3049       20940 :     EbErrorType return_error = EB_ErrorNone;
    3050             : 
    3051             :     // Non Zero Cbf mode variables
    3052       20940 :     uint64_t yNonZeroCbfDistortion = y_tu_distortion[DIST_CALC_RESIDUAL];
    3053             : 
    3054             :     uint64_t yNonZeroCbfRate;
    3055             : 
    3056       20940 :     uint64_t yNonZeroCbfCost = 0;
    3057             : 
    3058             :     // Zero Cbf mode variables
    3059       20940 :     uint64_t yZeroCbfDistortion = y_tu_distortion[DIST_CALC_PREDICTION];
    3060             : 
    3061       20940 :     uint64_t yZeroCbfLumaFlagBitsNum = 0;
    3062             : 
    3063             :     uint64_t yZeroCbfRate;
    3064             : 
    3065       20940 :     uint64_t yZeroCbfCost = 0;
    3066       20940 :     int16_t  txb_skip_ctx = context_ptr->md_context->luma_txb_skip_context;
    3067             :     // **Compute distortion
    3068       20940 :     if (component_mask == PICTURE_BUFFER_DESC_LUMA_MASK || component_mask == PICTURE_BUFFER_DESC_FULL_MASK) {
    3069             :         // Non Zero Distortion
    3070             :         // *Note - As of Oct 2011, the JCT-VC uses the PSNR forumula
    3071             :         //  PSNR = (LUMA_WEIGHT * PSNRy + PSNRu + PSNRv) / (2+LUMA_WEIGHT)
    3072       20940 :         yNonZeroCbfDistortion = LUMA_WEIGHT * (yNonZeroCbfDistortion << AV1_COST_PRECISION);
    3073             : 
    3074             :         // Zero distortion
    3075             :         // *Note - As of Oct 2011, the JCT-VC uses the PSNR forumula
    3076             :         //  PSNR = (LUMA_WEIGHT * PSNRy + PSNRu + PSNRv) / (2+LUMA_WEIGHT)
    3077       20940 :         yZeroCbfDistortion = LUMA_WEIGHT * (yZeroCbfDistortion << AV1_COST_PRECISION);
    3078       20940 :         TxSize    txSize = context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr];
    3079       20940 :         assert(txSize < TX_SIZES_ALL);
    3080             : 
    3081       20940 :         const TxSize txs_ctx = (TxSize)((txsize_sqr_map[txSize] + txsize_sqr_up_map[txSize] + 1) >> 1);
    3082       20940 :         assert(txs_ctx < TX_SIZES);
    3083       20940 :         const LvMapCoeffCost *const coeff_costs = &md_rate_estimation_ptr->coeff_fac_bits[txs_ctx][0];
    3084             : 
    3085       20940 :         yZeroCbfLumaFlagBitsNum = coeff_costs->txb_skip_cost[txb_skip_ctx][1];
    3086             : 
    3087       20940 :         yNonZeroCbfRate = *y_tu_coeff_bits; // yNonZeroCbfLumaFlagBitsNum is already calculated inside y_tu_coeff_bits
    3088             : 
    3089       20940 :         yZeroCbfRate = yZeroCbfLumaFlagBitsNum;
    3090       20940 :         TransformUnit       *txb_ptr = &cu_ptr->transform_unit_array[context_ptr->txb_itr];
    3091       20940 :         if (txb_ptr->transform_type[PLANE_TYPE_Y] != DCT_DCT) {
    3092        5764 :             yZeroCbfCost = 0xFFFFFFFFFFFFFFFFull;
    3093             :         }
    3094             :         else
    3095       15176 :             yZeroCbfCost = RDCOST(lambda, yZeroCbfRate, yZeroCbfDistortion);
    3096             :         // **Compute Cost
    3097       20940 :         yNonZeroCbfCost = RDCOST(lambda, yNonZeroCbfRate, yNonZeroCbfDistortion);
    3098       20940 :         cu_ptr->transform_unit_array[tu_index].y_has_coeff = ((y_count_non_zero_coeffs != 0) && (yNonZeroCbfCost < yZeroCbfCost)) ? EB_TRUE : EB_FALSE;
    3099       20940 :         *y_tu_coeff_bits = (yNonZeroCbfCost < yZeroCbfCost) ? *y_tu_coeff_bits : 0;
    3100       20940 :         y_tu_distortion[DIST_CALC_RESIDUAL] = (yNonZeroCbfCost < yZeroCbfCost) ? y_tu_distortion[DIST_CALC_RESIDUAL] : y_tu_distortion[DIST_CALC_PREDICTION];
    3101             :         }
    3102             :     else
    3103           0 :         cu_ptr->transform_unit_array[tu_index].y_has_coeff = EB_FALSE;
    3104       20940 :     cu_ptr->transform_unit_array[tu_index].u_has_coeff = cb_count_non_zero_coeffs != 0 ? EB_TRUE : EB_FALSE;
    3105       20940 :     cu_ptr->transform_unit_array[tu_index].v_has_coeff = cr_count_non_zero_coeffs != 0 ? EB_TRUE : EB_FALSE;
    3106             : 
    3107       20940 :     return return_error;
    3108             :     }
    3109             : 
    3110           0 : uint64_t GetPMCost(
    3111             :     uint64_t                   lambda,
    3112             :     uint64_t                   tuDistortion,
    3113             :     uint64_t                   y_tu_coeff_bits
    3114             : )
    3115             : {
    3116           0 :     uint64_t yNonZeroCbfDistortion = LUMA_WEIGHT * (tuDistortion << COST_PRECISION);
    3117           0 :     uint64_t yNonZeroCbfRate = (y_tu_coeff_bits);
    3118           0 :     uint64_t yNonZeroCbfCost = yNonZeroCbfDistortion + (((lambda       * yNonZeroCbfRate) + MD_OFFSET) >> MD_SHIFT);
    3119             : 
    3120           0 :     return yNonZeroCbfCost;
    3121             : }

Generated by: LCOV version 1.14