LCOV - code coverage report
Current view: top level - Codec - EbFullLoop.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1092 1545 70.7 %
Date: 2019-11-25 17:38:06 Functions: 30 49 61.2 %

          Line data    Source code
       1             : /*
       2             : * Copyright(c) 2019 Intel Corporation
       3             : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
       4             : */
       5             : 
       6             : /*
       7             : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       8             : *
       9             : * This source code is subject to the terms of the BSD 2 Clause License and
      10             : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      11             : * was not distributed with this source code in the LICENSE file, you can
      12             : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      13             : * Media Patent License 1.0 was not distributed with this source code in the
      14             : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      15             : */
      16             : 
      17             : #include "EbDefinitions.h"
      18             : #include "EbModeDecisionProcess.h"
      19             : #include "EbTransforms.h"
      20             : #include "EbFullLoop.h"
      21             : #include "EbRateDistortionCost.h"
      22             : #include "EbCommonUtils.h"
      23             : #include "aom_dsp_rtcd.h"
      24             : 
      25             : #ifdef __GNUC__
      26             : #define LIKELY(v) __builtin_expect(v, 1)
      27             : #define UNLIKELY(v) __builtin_expect(v, 0)
      28             : #else
      29             : #define LIKELY(v) (v)
      30             : #define UNLIKELY(v) (v)
      31             : #endif
      32             : static PartitionType from_shape_to_part[] = {
      33             :     PARTITION_NONE,
      34             :     PARTITION_HORZ,
      35             :     PARTITION_VERT,
      36             :     PARTITION_HORZ_A,
      37             :     PARTITION_HORZ_B,
      38             :     PARTITION_VERT_A,
      39             :     PARTITION_VERT_B,
      40             :     PARTITION_HORZ_4,
      41             :     PARTITION_VERT_4,
      42             :     PARTITION_SPLIT
      43             : };
      44           0 : void quantize_b_helper_c_II(const TranLow *coeff_ptr, intptr_t n_coeffs,
      45             :     int32_t skip_block, const int16_t *zbin_ptr,
      46             :     const int16_t *round_ptr, const int16_t *quant_ptr,
      47             :     const int16_t *quant_shift_ptr, TranLow *qcoeff_ptr,
      48             :     TranLow *dqcoeff_ptr, const int16_t *dequant_ptr,
      49             :     uint16_t *eob_ptr, const int16_t *scan,
      50             :     const int16_t *iscan, const QmVal *qm_ptr,
      51             :     const QmVal *iqm_ptr, const int32_t log_scale) {
      52           0 :     const int32_t zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
      53           0 :                            ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
      54           0 :     const int32_t nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
      55           0 :     int32_t i, non_zero_count = (int32_t)n_coeffs, eob = -1;
      56             :     (void)iscan;
      57             : 
      58           0 :     memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
      59           0 :     memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
      60             : 
      61           0 :     if (!skip_block) {
      62             :         // Pre-scan pass
      63           0 :         for (i = (int32_t)n_coeffs - 1; i >= 0; i--) {
      64           0 :             const int32_t rc = scan[i];
      65           0 :             const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
      66           0 :             const int32_t coeff = coeff_ptr[rc] * wt;
      67             : 
      68           0 :             if (coeff < (zbins[rc != 0] * (1 << AOM_QM_BITS)) &&
      69           0 :                 coeff >(nzbins[rc != 0] * (1 << AOM_QM_BITS)))
      70           0 :                 non_zero_count--;
      71             :             else
      72             :                 break;
      73             :         }
      74             : 
      75             :         // Quantization pass: All coefficients with index >= zero_flag are
      76             :         // skippable. Note: zero_flag can be zero.
      77           0 :         for (i = 0; i < non_zero_count; i++) {
      78           0 :             const int32_t rc = scan[i];
      79           0 :             const int32_t coeff = coeff_ptr[rc];
      80           0 :             const int32_t coeff_sign = (coeff >> 31);
      81           0 :             const int32_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
      82             :             int32_t tmp32;
      83             : 
      84           0 :             const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
      85           0 :             if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
      86           0 :                 int64_t tmp =
      87           0 :                     clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
      88             :                         INT16_MIN, INT16_MAX);
      89           0 :                 tmp *= wt;
      90           0 :                 tmp32 = (int32_t)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
      91           0 :                     quant_shift_ptr[rc != 0]) >>
      92           0 :                     (16 - log_scale + AOM_QM_BITS));  // quantization
      93           0 :                 qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
      94           0 :                 const int32_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
      95           0 :                 const int32_t dequant =
      96           0 :                     (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
      97             :                     AOM_QM_BITS;
      98           0 :                 const TranLow abs_dqcoeff = (tmp32 * dequant) >> log_scale;
      99           0 :                 dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
     100             : 
     101           0 :                 if (tmp32) eob = i;
     102             :             }
     103             :         }
     104             :     }
     105           0 :     *eob_ptr = (uint16_t)(eob + 1);
     106           0 : }
     107           0 : void eb_aom_quantize_b_c_II(const TranLow *coeff_ptr, intptr_t n_coeffs,
     108             :     int32_t skip_block, const int16_t *zbin_ptr,
     109             :     const int16_t *round_ptr, const int16_t *quant_ptr,
     110             :     const int16_t *quant_shift_ptr, TranLow *qcoeff_ptr,
     111             :     TranLow *dqcoeff_ptr, const int16_t *dequant_ptr,
     112             :     uint16_t *eob_ptr, const int16_t *scan,
     113             :     const int16_t *iscan) {
     114           0 :     quantize_b_helper_c_II(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
     115             :         quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
     116             :         dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 0);
     117           0 : }
     118             : 
     119           0 : void eb_aom_quantize_b_32x32_c_II(const TranLow *coeff_ptr, intptr_t n_coeffs,
     120             :     int32_t skip_block, const int16_t *zbin_ptr,
     121             :     const int16_t *round_ptr, const int16_t *quant_ptr,
     122             :     const int16_t *quant_shift_ptr,
     123             :     TranLow *qcoeff_ptr, TranLow *dqcoeff_ptr,
     124             :     const int16_t *dequant_ptr, uint16_t *eob_ptr,
     125             :     const int16_t *scan, const int16_t *iscan) {
     126           0 :     quantize_b_helper_c_II(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
     127             :         quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
     128             :         dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 1);
     129           0 : }
     130             : 
     131           0 : void eb_aom_quantize_b_64x64_c_II(const TranLow *coeff_ptr, intptr_t n_coeffs,
     132             :     int32_t skip_block, const int16_t *zbin_ptr,
     133             :     const int16_t *round_ptr, const int16_t *quant_ptr,
     134             :     const int16_t *quant_shift_ptr,
     135             :     TranLow *qcoeff_ptr, TranLow *dqcoeff_ptr,
     136             :     const int16_t *dequant_ptr, uint16_t *eob_ptr,
     137             :     const int16_t *scan, const int16_t *iscan) {
     138           0 :     quantize_b_helper_c_II(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
     139             :         quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
     140             :         dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 2);
     141           0 : }
     142             : 
     143           0 : void eb_quantize_b_helper_c(
     144             :     const TranLow *coeff_ptr,
     145             :     int32_t stride,
     146             : #
     147             :     int32_t width,
     148             :     int32_t height,
     149             :     intptr_t n_coeffs,
     150             :     int32_t skip_block,
     151             :     const int16_t *zbin_ptr,
     152             :     const int16_t *round_ptr,
     153             :     const int16_t *quant_ptr,
     154             :     const int16_t *quant_shift_ptr,
     155             :     TranLow *qcoeff_ptr,
     156             :     TranLow *dqcoeff_ptr,
     157             :     const int16_t *dequant_ptr,
     158             :     uint16_t *eob_ptr,
     159             :     const int16_t *scan,
     160             :     const int16_t *iscan,
     161             :     const QmVal *qm_ptr,
     162             :     const QmVal *iqm_ptr,
     163             :     const int32_t log_scale)
     164             : {
     165           0 :     const int32_t zbins[2] = {
     166           0 :         ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
     167           0 :         ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale)
     168             :     };
     169           0 :     const int32_t nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
     170           0 :     int32_t i, non_zero_count = (int32_t)n_coeffs, eob = -1;
     171             :     (void)iscan;
     172             : 
     173             :     // Nader quantisation
     174           0 :     for (int32_t x = 0; x < height; x++) {
     175           0 :         memset(qcoeff_ptr + (x * stride), 0, width /*n_coeffs*/ * sizeof(*qcoeff_ptr));
     176           0 :         memset(dqcoeff_ptr + (x * stride), 0, width /*n_coeffs*/ * sizeof(*dqcoeff_ptr));
     177             :     }
     178             : 
     179           0 :     if (!skip_block) {
     180             :         // Pre-scan pass
     181           0 :         for (i = (int32_t)n_coeffs - 1; i >= 0; i--) {
     182           0 :             const int32_t mapRc = scan[i];
     183             : 
     184           0 :             const int32_t rc = ((mapRc / MIN(32, height))  * stride) + (mapRc % MIN(32, width));
     185             : 
     186           0 :             const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
     187           0 :             const int32_t coeff = coeff_ptr[rc] * wt;
     188             : 
     189             :             ////if (mapRc != NewTab[rc])
     190             :             //printf("%d\n", coeff);
     191             : 
     192           0 :             if (coeff < (zbins[rc != 0] * (1 << AOM_QM_BITS)) &&
     193           0 :                 coeff >(nzbins[rc != 0] * (1 << AOM_QM_BITS)))
     194           0 :                 non_zero_count--;
     195             :             else
     196             :                 break;
     197             :         }
     198             :         // Quantization pass: All coefficients with index >= zero_flag are
     199             :         // skippable. Note: zero_flag can be zero.
     200           0 :         for (i = 0; i < non_zero_count; i++) {
     201           0 :             const int32_t mapRc = scan[i];
     202             : 
     203           0 :             const int32_t rc = ((mapRc / MIN(32, height))  * stride) + (mapRc % MIN(32, width));
     204           0 :             const int32_t coeff = coeff_ptr[rc];
     205           0 :             const int32_t coeff_sign = (coeff >> 31);
     206           0 :             const int32_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
     207             :             int32_t tmp32;
     208             : 
     209           0 :             const QmVal wt = qm_ptr != NULL ? qm_ptr[mapRc] : (1 << AOM_QM_BITS);
     210             : 
     211           0 :             if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
     212           0 :                 int64_t tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale), INT16_MIN, INT16_MAX);
     213             : 
     214           0 :                 tmp *= wt;
     215             : 
     216           0 :                 tmp32 = (int32_t)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *    quant_shift_ptr[rc != 0]) >> (16 - log_scale + AOM_QM_BITS));  // quantization
     217             : 
     218           0 :                 qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
     219             : 
     220           0 :                 const int32_t iwt = iqm_ptr != NULL ? iqm_ptr[mapRc] : (1 << AOM_QM_BITS);
     221             : 
     222           0 :                 const int32_t dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
     223             : 
     224           0 :                 dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / (1 << log_scale);
     225             : 
     226           0 :                 if (tmp32) eob = i;
     227             :             }
     228             :         }
     229             :     }
     230             : 
     231           0 :     *eob_ptr = (uint16_t)(eob + 1);
     232           0 : }
     233           0 : void eb_highbd_quantize_b_helper_c(
     234             :     const TranLow *coeff_ptr, intptr_t n_coeffs, int32_t skip_block,
     235             :     const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
     236             :     const int16_t *quant_shift_ptr, TranLow *qcoeff_ptr,
     237             :     TranLow *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     238             :     const int16_t *scan, const int16_t *iscan, const QmVal *qm_ptr,
     239             :     const QmVal *iqm_ptr, const int32_t log_scale) {
     240           0 :     int32_t i, eob = -1;
     241           0 :     const int32_t zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
     242           0 :         ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
     243           0 :     const int32_t nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
     244             :     int32_t dequant;
     245             :     int32_t idx_arr[4096];
     246             :     (void)iscan;
     247           0 :     int32_t idx = 0;
     248             : 
     249           0 :     memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
     250           0 :     memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
     251             : 
     252           0 :     if (!skip_block) {
     253             :         // Pre-scan pass
     254           0 :         for (i = 0; i < n_coeffs; i++) {
     255           0 :             const int32_t rc = scan[i];
     256           0 :             const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
     257           0 :             const int32_t coeff = coeff_ptr[rc] * wt;
     258             : 
     259             :             // If the coefficient is out of the base ZBIN range, keep it for
     260             :             // quantization.
     261           0 :             if (coeff >= (zbins[rc != 0] * (1 << AOM_QM_BITS)) ||
     262           0 :                 coeff <= (nzbins[rc != 0] * (1 << AOM_QM_BITS)))
     263           0 :                 idx_arr[idx++] = i;
     264             :         }
     265             : 
     266             :         // Quantization pass: only process the coefficients selected in
     267             :         // pre-scan pass. Note: idx can be zero.
     268           0 :         for (i = 0; i < idx; i++) {
     269           0 :             const int32_t rc = scan[idx_arr[i]];
     270           0 :             const int32_t coeff = coeff_ptr[rc];
     271           0 :             const int32_t coeff_sign = (coeff >> 31);
     272           0 :             const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
     273           0 :             const QmVal iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
     274           0 :             const int32_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
     275           0 :             const int64_t tmp1 =
     276           0 :                 abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
     277           0 :             const int64_t tmpw = tmp1 * wt;
     278           0 :             const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
     279           0 :             const int32_t abs_qcoeff = (int32_t)((tmp2 * quant_shift_ptr[rc != 0]) >>
     280           0 :                 (16 - log_scale + AOM_QM_BITS));
     281           0 :             qcoeff_ptr[rc] = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign);
     282           0 :             dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
     283             :                 AOM_QM_BITS;
     284           0 :             const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
     285           0 :             dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
     286           0 :             if (abs_qcoeff) eob = idx_arr[i];
     287             :         }
     288             :     }
     289           0 :     *eob_ptr = (uint16_t)(eob + 1);
     290           0 : }
     291             : 
     292           0 : void eb_aom_highbd_quantize_b_c(const TranLow *coeff_ptr, intptr_t n_coeffs,
     293             :     int32_t skip_block, const int16_t *zbin_ptr,
     294             :     const int16_t *round_ptr, const int16_t *quant_ptr,
     295             :     const int16_t *quant_shift_ptr,
     296             :     TranLow *qcoeff_ptr, TranLow *dqcoeff_ptr,
     297             :     const int16_t *dequant_ptr, uint16_t *eob_ptr,
     298             :     const int16_t *scan, const int16_t *iscan) {
     299           0 :     eb_highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
     300             :         round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
     301             :         dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
     302             :         NULL, NULL, 0);
     303           0 : }
     304             : 
     305           0 : void eb_aom_highbd_quantize_b_32x32_c(
     306             :     const TranLow *coeff_ptr, intptr_t n_coeffs, int32_t skip_block,
     307             :     const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
     308             :     const int16_t *quant_shift_ptr, TranLow *qcoeff_ptr,
     309             :     TranLow *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     310             :     const int16_t *scan, const int16_t *iscan) {
     311           0 :     eb_highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
     312             :         round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
     313             :         dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
     314             :         NULL, NULL, 1);
     315           0 : }
     316             : 
     317           0 : void eb_aom_highbd_quantize_b_64x64_c(
     318             :     const TranLow *coeff_ptr, intptr_t n_coeffs, int32_t skip_block,
     319             :     const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
     320             :     const int16_t *quant_shift_ptr, TranLow *qcoeff_ptr,
     321             :     TranLow *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     322             :     const int16_t *scan, const int16_t *iscan) {
     323           0 :     eb_highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
     324             :         round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
     325             :         dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
     326             :         NULL, NULL, 2);
     327           0 : }
     328             : 
     329           0 : void eb_av1_highbd_quantize_b_facade(const TranLow *coeff_ptr,
     330             :     intptr_t n_coeffs, const MacroblockPlane *p,
     331             :     TranLow *qcoeff_ptr,
     332             :     TranLow *dqcoeff_ptr, uint16_t *eob_ptr,
     333             :     const ScanOrder *sc,
     334             :     const QuantParam *qparam) {
     335             :     // obsolete skip_block
     336           0 :     const int32_t skip_block = 0;
     337           0 :     const QmVal *qm_ptr = qparam->qmatrix;
     338           0 :     const QmVal *iqm_ptr = qparam->iqmatrix;
     339           0 :     if (qm_ptr != NULL && iqm_ptr != NULL) {
     340           0 :         eb_highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
     341             :             p->round_QTX, p->quant_QTX, p->quant_shift_QTX,
     342             :             qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
     343             :             sc->scan, sc->iscan, qm_ptr, iqm_ptr,
     344             :             qparam->log_scale);
     345             :     }
     346             :     else {
     347           0 :         switch (qparam->log_scale) {
     348           0 :         case 0:
     349           0 :             if (LIKELY(n_coeffs >= 8)) {
     350           0 :                 eb_aom_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
     351             :                     p->round_QTX, p->quant_QTX, p->quant_shift_QTX,
     352             :                     qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
     353             :                     eob_ptr, sc->scan, sc->iscan);
     354             :             }
     355             :             else {
     356           0 :                 eb_aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
     357             :                     p->round_QTX, p->quant_QTX,
     358             :                     p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
     359             :                     p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
     360             :             }
     361           0 :             break;
     362           0 :         case 1:
     363           0 :             eb_aom_highbd_quantize_b_32x32(
     364             :                 coeff_ptr, n_coeffs, skip_block, p->zbin_QTX, p->round_QTX,
     365             :                 p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
     366             :                 p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
     367           0 :             break;
     368           0 :         case 2:
     369           0 :             eb_aom_highbd_quantize_b_64x64(
     370             :                 coeff_ptr, n_coeffs, skip_block, p->zbin_QTX, p->round_QTX,
     371             :                 p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
     372             :                 p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
     373           0 :             break;
     374           0 :         default: assert(0);
     375             :         }
     376             :     }
     377           0 : }
     378             : 
     379    64414900 : void av1_quantize_b_facade_II(
     380             :     const TranLow *coeff_ptr,
     381             :     int32_t stride,
     382             :     int32_t                width,
     383             :     int32_t                height,
     384             :     intptr_t n_coeffs,
     385             :     const MacroblockPlane *p,
     386             :     TranLow *qcoeff_ptr,
     387             :     TranLow *dqcoeff_ptr,
     388             :     uint16_t *eob_ptr,
     389             :     const ScanOrder *sc,
     390             :     const QuantParam *qparam)
     391             : {
     392             :     // obsolete skip_block
     393    64414900 :     const int32_t skip_block = 0;
     394    64414900 :     const QmVal *qm_ptr = qparam->qmatrix;
     395    64414900 :     const QmVal *iqm_ptr = qparam->iqmatrix;
     396    64414900 :     if (qm_ptr != NULL && iqm_ptr != NULL) {
     397           0 :         eb_quantize_b_helper_c(
     398             :             coeff_ptr,
     399             :             stride,
     400             :             width,
     401             :             height,
     402             :             n_coeffs,
     403             :             skip_block,
     404             :             p->zbin_QTX,
     405             :             p->round_QTX,
     406             :             p->quant_QTX,
     407             :             p->quant_shift_QTX,
     408             :             qcoeff_ptr,
     409             :             dqcoeff_ptr,
     410             :             p->dequant_QTX,
     411             :             eob_ptr,
     412             :             sc->scan,
     413             :             sc->iscan,
     414             :             qm_ptr,
     415             :             iqm_ptr,
     416             :             qparam->log_scale);
     417             :     }
     418             :     else {
     419    64414900 :         switch (qparam->log_scale) {
     420    56360000 :         case 0:
     421    56360000 :             eb_aom_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
     422             :                 p->round_QTX, p->quant_QTX, p->quant_shift_QTX,
     423             :                 qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
     424             :                 sc->scan, sc->iscan);
     425             : 
     426    56385000 :             break;
     427     6721270 :         case 1:
     428             : 
     429     6721270 :             eb_aom_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
     430             :                 p->round_QTX, p->quant_QTX, p->quant_shift_QTX,
     431             :                 qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
     432             :                 sc->scan, sc->iscan);
     433             : 
     434     6722010 :             break;
     435     1367350 :         case 2:
     436             : 
     437     1367350 :             eb_aom_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
     438             :                 p->round_QTX, p->quant_QTX, p->quant_shift_QTX,
     439             :                 qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
     440             :                 sc->scan, sc->iscan);
     441             : 
     442     1367390 :             break;
     443           0 :         default: assert(0);
     444             :         }
     445             :     }
     446    64474400 : }
     447             : 
     448             : 
     449           0 : static void quantize_fp_helper_c(
     450             :     const TranLow *coeff_ptr,
     451             :     intptr_t n_coeffs,
     452             :     const int16_t *zbin_ptr,
     453             :     const int16_t *round_ptr,
     454             :     const int16_t *quant_ptr,
     455             :     const int16_t *quant_shift_ptr,
     456             :     TranLow *qcoeff_ptr,
     457             :     TranLow *dqcoeff_ptr,
     458             :     const int16_t *dequant_ptr,
     459             :     uint16_t *eob_ptr,
     460             :     const int16_t *scan,
     461             :     const int16_t *iscan,
     462             :     const QmVal *qm_ptr,
     463             :     const QmVal *iqm_ptr,
     464             :     int log_scale)
     465             : {
     466           0 :     int i, eob = -1;
     467           0 :     const int rounding[2] = { ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
     468           0 :                               ROUND_POWER_OF_TWO(round_ptr[1], log_scale) };
     469             :     // TODO(jingning) Decide the need of these arguments after the
     470             :     // quantization process is completed.
     471             :     (void)zbin_ptr;
     472             :     (void)quant_shift_ptr;
     473             :     (void)iscan;
     474             : 
     475           0 :     memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
     476           0 :     memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
     477             : 
     478           0 :     if (qm_ptr == NULL && iqm_ptr == NULL) {
     479           0 :         for (i = 0; i < n_coeffs; i++) {
     480           0 :             const int rc = scan[i];
     481           0 :             const int32_t thresh = (int32_t)(dequant_ptr[rc != 0]);
     482           0 :             const int coeff = coeff_ptr[rc];
     483           0 :             const int coeff_sign = (coeff >> 31);
     484           0 :             int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
     485           0 :             int tmp32 = 0;
     486           0 :             if ((abs_coeff << (1 + log_scale)) >= thresh) {
     487             :                 abs_coeff =
     488           0 :                     clamp64(abs_coeff + rounding[rc != 0], INT16_MIN, INT16_MAX);
     489           0 :                 tmp32 = (int)((abs_coeff * quant_ptr[rc != 0]) >> (16 - log_scale));
     490           0 :                 if (tmp32) {
     491           0 :                     qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
     492           0 :                     const TranLow abs_dqcoeff =
     493           0 :                         (tmp32 * dequant_ptr[rc != 0]) >> log_scale;
     494           0 :                     dqcoeff_ptr[rc] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
     495             :                 }
     496             :             }
     497           0 :             if (tmp32) eob = i;
     498             :         }
     499             :     }
     500             :     else {
     501             :         // Quantization pass: All coefficients with index >= zero_flag are
     502             :         // skippable. Note: zero_flag can be zero.
     503           0 :         for (i = 0; i < n_coeffs; i++) {
     504           0 :             const int rc = scan[i];
     505           0 :             const int coeff = coeff_ptr[rc];
     506           0 :             const QmVal wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
     507           0 :             const QmVal iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
     508           0 :             const int dequant =
     509           0 :                 (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
     510             :                 AOM_QM_BITS;
     511           0 :             const int coeff_sign = (coeff >> 31);
     512           0 :             int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
     513           0 :             int tmp32 = 0;
     514           0 :             if (abs_coeff * wt >=
     515           0 :                 (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
     516           0 :                 abs_coeff += rounding[rc != 0];
     517           0 :                 abs_coeff = clamp64(abs_coeff, INT16_MIN, INT16_MAX);
     518           0 :                 tmp32 = (int)((abs_coeff * wt * quant_ptr[rc != 0]) >>
     519           0 :                     (16 - log_scale + AOM_QM_BITS));
     520           0 :                 qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
     521           0 :                 const TranLow abs_dqcoeff = (tmp32 * dequant) >> log_scale;
     522           0 :                 dqcoeff_ptr[rc] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
     523             :             }
     524             : 
     525           0 :             if (tmp32) eob = i;
     526             :         }
     527             :     }
     528           0 :     *eob_ptr = eob + 1;
     529           0 : }
     530             : 
     531           0 : void eb_av1_quantize_fp_c(const TranLow *coeff_ptr, intptr_t n_coeffs,
     532             :     const int16_t *zbin_ptr, const int16_t *round_ptr,
     533             :     const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
     534             :     TranLow *qcoeff_ptr, TranLow *dqcoeff_ptr,
     535             :     const int16_t *dequant_ptr, uint16_t *eob_ptr,
     536             :     const int16_t *scan, const int16_t *iscan) {
     537           0 :     quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
     538             :         quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
     539             :         eob_ptr, scan, iscan, NULL, NULL, 0);
     540           0 : }
     541             : 
     542           0 : static void eb_highbd_quantize_fp_helper_c(
     543             :     const TranLow *coeff_ptr,
     544             :     intptr_t count,
     545             :     const int16_t *zbin_ptr,
     546             :     const int16_t *round_ptr,
     547             :     const int16_t *quant_ptr,
     548             :     const int16_t *quant_shift_ptr,
     549             :     TranLow *qcoeff_ptr,
     550             :     TranLow *dqcoeff_ptr,
     551             :     const int16_t *dequant_ptr,
     552             :     uint16_t *eob_ptr,
     553             :     const int16_t *scan,
     554             :     const int16_t *iscan,
     555             :     const QmVal *qm_ptr,
     556             :     const QmVal *iqm_ptr,
     557             :     int16_t log_scale)
     558             : {
     559             :   int i;
     560           0 :   int eob = -1;
     561           0 :   const int shift = 16 - log_scale;
     562             :   // TODO(jingning) Decide the need of these arguments after the
     563             :   // quantization process is completed.
     564             :   (void)zbin_ptr;
     565             :   (void)quant_shift_ptr;
     566             :   (void)iscan;
     567             : 
     568           0 :   if (qm_ptr || iqm_ptr) {
     569             :     // Quantization pass: All coefficients with index >= zero_flag are
     570             :     // skippable. Note: zero_flag can be zero.
     571           0 :     for (i = 0; i < count; i++) {
     572           0 :       const int rc = scan[i];
     573           0 :       const int coeff = coeff_ptr[rc];
     574           0 :       const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
     575           0 :       const QmVal iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
     576           0 :       const int dequant =
     577           0 :           (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
     578             :           AOM_QM_BITS;
     579           0 :       const int coeff_sign = (coeff >> 31);
     580           0 :       const int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
     581           0 :       int abs_qcoeff = 0;
     582           0 :       if (abs_coeff * wt >=
     583           0 :           (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
     584           0 :         const int64_t tmp =
     585           0 :             abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
     586           0 :         abs_qcoeff =
     587           0 :             (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
     588           0 :         qcoeff_ptr[rc] = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign);
     589           0 :         const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
     590           0 :         dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
     591           0 :         if (abs_qcoeff) eob = i;
     592             :       } else {
     593           0 :         qcoeff_ptr[rc] = 0;
     594           0 :         dqcoeff_ptr[rc] = 0;
     595             :       }
     596             :     }
     597             :   } else {
     598           0 :     const int log_scaled_round_arr[2] = {
     599           0 :       ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
     600           0 :       ROUND_POWER_OF_TWO(round_ptr[1], log_scale),
     601             :     };
     602           0 :     for (i = 0; i < count; i++) {
     603           0 :       const int rc = scan[i];
     604           0 :       const int coeff = coeff_ptr[rc];
     605           0 :       const int rc01 = (rc != 0);
     606           0 :       const int coeff_sign = (coeff >> 31);
     607           0 :       const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
     608           0 :       const int log_scaled_round = log_scaled_round_arr[rc01];
     609           0 :       if ((abs_coeff << (1 + log_scale)) >= dequant_ptr[rc01]) {
     610           0 :         const int quant = quant_ptr[rc01];
     611           0 :         const int dequant = dequant_ptr[rc01];
     612           0 :         const int64_t tmp = (int64_t)abs_coeff + log_scaled_round;
     613           0 :         const int abs_qcoeff = (int)((tmp * quant) >> shift);
     614           0 :         qcoeff_ptr[rc] = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign);
     615           0 :         const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
     616           0 :         if (abs_qcoeff) eob = i;
     617           0 :         dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
     618             :       } else {
     619           0 :         qcoeff_ptr[rc] = 0;
     620           0 :         dqcoeff_ptr[rc] = 0;
     621             :       }
     622             :     }
     623             :   }
     624           0 :   *eob_ptr = eob + 1;
     625           0 : }
     626             : 
     627           0 : static void highbd_quantize_fp_helper_c(
     628             :     const TranLow *coeff_ptr,
     629             :     intptr_t count,
     630             :     const int16_t *zbin_ptr,
     631             :     const int16_t *round_ptr,
     632             :     const int16_t *quant_ptr,
     633             :     const int16_t *quant_shift_ptr,
     634             :     TranLow *qcoeff_ptr,
     635             :     TranLow *dqcoeff_ptr,
     636             :     const int16_t *dequant_ptr,
     637             :     uint16_t *eob_ptr,
     638             :     const int16_t *scan,
     639             :     const int16_t *iscan,
     640             :     const QmVal *qm_ptr,
     641             :     const QmVal *iqm_ptr,
     642             :     int16_t log_scale)
     643             : {
     644             :   int i;
     645           0 :   int eob = -1;
     646           0 :   const int shift = 16 - log_scale;
     647             :   // TODO(jingning) Decide the need of these arguments after the
     648             :   // quantization process is completed.
     649             :   (void)zbin_ptr;
     650             :   (void)quant_shift_ptr;
     651             :   (void)iscan;
     652             : 
     653           0 :   if (qm_ptr || iqm_ptr) {
     654             :     // Quantization pass: All coefficients with index >= zero_flag are
     655             :     // skippable. Note: zero_flag can be zero.
     656           0 :     for (i = 0; i < count; i++) {
     657           0 :       const int rc = scan[i];
     658           0 :       const int coeff = coeff_ptr[rc];
     659           0 :       const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
     660           0 :       const QmVal iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
     661           0 :       const int dequant =
     662           0 :           (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
     663             :           AOM_QM_BITS;
     664           0 :       const int coeff_sign = (coeff >> 31);
     665           0 :       const int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
     666           0 :       int abs_qcoeff = 0;
     667           0 :       if (abs_coeff * wt >=
     668           0 :           (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
     669           0 :         const int64_t tmp =
     670           0 :             abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
     671           0 :         abs_qcoeff =
     672           0 :             (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
     673           0 :         qcoeff_ptr[rc] = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign);
     674           0 :         const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
     675           0 :         dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
     676           0 :         if (abs_qcoeff) eob = i;
     677             :       } else {
     678           0 :         qcoeff_ptr[rc] = 0;
     679           0 :         dqcoeff_ptr[rc] = 0;
     680             :       }
     681             :     }
     682             :   } else {
     683           0 :     const int log_scaled_round_arr[2] = {
     684           0 :       ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
     685           0 :       ROUND_POWER_OF_TWO(round_ptr[1], log_scale),
     686             :     };
     687           0 :     for (i = 0; i < count; i++) {
     688           0 :       const int rc = scan[i];
     689           0 :       const int coeff = coeff_ptr[rc];
     690           0 :       const int rc01 = (rc != 0);
     691           0 :       const int coeff_sign = (coeff >> 31);
     692           0 :       const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
     693           0 :       const int log_scaled_round = log_scaled_round_arr[rc01];
     694           0 :       if ((abs_coeff << (1 + log_scale)) >= dequant_ptr[rc01]) {
     695           0 :         const int quant = quant_ptr[rc01];
     696           0 :         const int dequant = dequant_ptr[rc01];
     697           0 :         const int64_t tmp = (int64_t)abs_coeff + log_scaled_round;
     698           0 :         const int abs_qcoeff = (int)((tmp * quant) >> shift);
     699           0 :         qcoeff_ptr[rc] = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign);
     700           0 :         const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
     701           0 :         if (abs_qcoeff) eob = i;
     702           0 :         dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
     703             :       } else {
     704           0 :         qcoeff_ptr[rc] = 0;
     705           0 :         dqcoeff_ptr[rc] = 0;
     706             :       }
     707             :     }
     708             :   }
     709           0 :   *eob_ptr = eob + 1;
     710           0 : }
     711             : 
     712           0 : void eb_av1_highbd_quantize_fp_c(
     713             :     const TranLow *coeff_ptr,
     714             :     intptr_t count,
     715             :     const int16_t *zbin_ptr,
     716             :     const int16_t *round_ptr,
     717             :     const int16_t *quant_ptr,
     718             :     const int16_t *quant_shift_ptr,
     719             :     TranLow *qcoeff_ptr,
     720             :     TranLow *dqcoeff_ptr,
     721             :     const int16_t *dequant_ptr,
     722             :     uint16_t *eob_ptr,
     723             :     const int16_t *scan,
     724             :     const int16_t *iscan,
     725             :     int16_t log_scale)
     726             : {
     727           0 :   highbd_quantize_fp_helper_c(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
     728             :                               quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
     729             :                               dequant_ptr, eob_ptr, scan, iscan, NULL, NULL,
     730             :                               log_scale);
     731           0 : }
     732             : 
     733           0 : void eb_av1_quantize_fp_32x32_c(const TranLow *coeff_ptr, intptr_t n_coeffs,
     734             :     const int16_t *zbin_ptr, const int16_t *round_ptr,
     735             :     const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
     736             :     TranLow *qcoeff_ptr, TranLow *dqcoeff_ptr,
     737             :     const int16_t *dequant_ptr, uint16_t *eob_ptr,
     738             :     const int16_t *scan, const int16_t *iscan) {
     739           0 :     quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
     740             :         quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
     741             :         eob_ptr, scan, iscan, NULL, NULL, 1);
     742           0 : }
     743             : 
     744           0 : void eb_av1_quantize_fp_64x64_c(const TranLow *coeff_ptr, intptr_t n_coeffs,
     745             :     const int16_t *zbin_ptr, const int16_t *round_ptr,
     746             :     const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
     747             :     TranLow *qcoeff_ptr, TranLow *dqcoeff_ptr,
     748             :     const int16_t *dequant_ptr, uint16_t *eob_ptr,
     749             :     const int16_t *scan, const int16_t *iscan) {
     750           0 :     quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
     751             :         quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
     752             :         eob_ptr, scan, iscan, NULL, NULL, 2);
     753           0 : }
     754             : 
     755    41327800 : void eb_av1_quantize_fp_facade(
     756             :     const TranLow *coeff_ptr,
     757             :     intptr_t n_coeffs,
     758             :     const MacroblockPlane *p,
     759             :     TranLow *qcoeff_ptr,
     760             :     TranLow *dqcoeff_ptr,
     761             :     uint16_t *eob_ptr,
     762             :     const ScanOrder *sc,
     763             :     const QuantParam *qparam)  {
     764             : 
     765    41327800 :     const QmVal *qm_ptr = qparam->qmatrix;
     766    41327800 :     const QmVal *iqm_ptr = qparam->iqmatrix;
     767             : 
     768    41327800 :     if (qm_ptr || iqm_ptr)
     769           0 :         quantize_fp_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
     770             :             p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
     771             :             dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
     772             :             sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
     773             :     else {
     774    41329400 :         switch (qparam->log_scale) {
     775    40665700 :         case 0:
     776    40665700 :             eb_av1_quantize_fp(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
     777             :                 p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
     778             :                 dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
     779             :                 sc->iscan);
     780    40675500 :             break;
     781      647908 :         case 1:
     782      647908 :             eb_av1_quantize_fp_32x32(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
     783             :                 p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
     784             :                 dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
     785             :                 sc->iscan);
     786      647893 :             break;
     787       17528 :         case 2:
     788       17528 :             eb_av1_quantize_fp_64x64(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
     789             :                 p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
     790             :                 dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
     791             :                 sc->iscan);
     792       17527 :             break;
     793           0 :         default: assert(0);
     794             :         }
     795             :     }
     796    41340900 : }
     797             : 
     798           0 : void eb_av1_highbd_quantize_fp_facade(
     799             :     const TranLow *coeff_ptr,
     800             :     intptr_t n_coeffs,
     801             :     const MacroblockPlane *p,
     802             :     TranLow *qcoeff_ptr,
     803             :     TranLow *dqcoeff_ptr,
     804             :     uint16_t *eob_ptr,
     805             :     const ScanOrder *sc,
     806             :     const QuantParam *qparam)
     807             : {
     808           0 :   const QmVal *qm_ptr = qparam->qmatrix;
     809           0 :   const QmVal *iqm_ptr = qparam->iqmatrix;
     810           0 :   if (qm_ptr != NULL && iqm_ptr != NULL) {
     811           0 :     eb_highbd_quantize_fp_helper_c(
     812             :         coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, p->quant_fp_QTX,
     813             :         p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
     814           0 :         sc->scan, sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
     815             :   } else {
     816           0 :     eb_av1_highbd_quantize_fp(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
     817             :                            p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
     818             :                            dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
     819           0 :                            sc->iscan, qparam->log_scale);
     820             :   }
     821           0 : }
     822             : 
     823             : // Hsan: code clean up; from static to extern as now used @ more than 1 file
     824             : 
     825             : 
     826             : static const int8_t eob_to_pos_small[33] = {
     827             :     0, 1, 2,                                        // 0-2
     828             :     3, 3,                                           // 3-4
     829             :     4, 4, 4, 4,                                     // 5-8
     830             :     5, 5, 5, 5, 5, 5, 5, 5,                         // 9-16
     831             :     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6  // 17-32
     832             : };
     833             : 
     834             : static const int8_t eob_to_pos_large[17] = {
     835             :     6,                               // place holder
     836             :     7,                               // 33-64
     837             :     8, 8,                            // 65-128
     838             :     9, 9, 9, 9,                      // 129-256
     839             :     10, 10, 10, 10, 10, 10, 10, 10,  // 257-512
     840             :     11                               // 513-
     841             : };
     842             : 
     843   114260000 : static INLINE int32_t get_eob_pos_token(const int32_t eob, int32_t *const extra) {
     844             :     int32_t t;
     845             : 
     846   114260000 :     if (eob < 33)
     847    77123300 :         t = eob_to_pos_small[eob];
     848             :     else {
     849    37136800 :         const int32_t e = AOMMIN((eob - 1) >> 5, 16);
     850    37136800 :         t = eob_to_pos_large[e];
     851             :     }
     852             : 
     853   114260000 :     *extra = eob - eb_k_eob_group_start[t];
     854             : 
     855   114260000 :     return t;
     856             : }
     857             : 
     858    35083000 : static INLINE TxSize get_txsize_entropy_ctx(TxSize txsize) {
     859    35083000 :     return (TxSize)((txsize_sqr_map[txsize] + txsize_sqr_up_map[txsize] + 1) >>
     860             :         1);
     861             : }
     862    35086200 : static INLINE PlaneType get_plane_type(int plane) {
     863    35086200 :     return (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
     864             : }
     865   114263000 : static int32_t get_eob_cost(int32_t eob, const LvMapEobCost *txb_eob_costs,
     866             :     const LvMapCoeffCost *txb_costs, TxType tx_type) {
     867             :     int32_t eob_extra;
     868   114263000 :     const int32_t eob_pt = get_eob_pos_token(eob, &eob_extra);
     869   114269000 :     int32_t eob_cost = 0;
     870   114269000 :     const int32_t eob_multi_ctx = (tx_type_to_class[tx_type] == TX_CLASS_2D) ? 0 : 1;
     871   114269000 :     eob_cost = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1];
     872             : 
     873   114269000 :     if (eb_k_eob_offset_bits[eob_pt] > 0) {
     874   100106000 :         const int32_t eob_shift = eb_k_eob_offset_bits[eob_pt] - 1;
     875   100106000 :         const int32_t bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
     876   100106000 :         eob_cost += txb_costs->eob_extra_cost[eob_pt][bit];
     877   100106000 :         const int32_t offset_bits = eb_k_eob_offset_bits[eob_pt];
     878   100106000 :         if (offset_bits > 1) eob_cost += av1_cost_literal(offset_bits - 1);
     879             :     }
     880   114269000 :     return eob_cost;
     881             : }
     882             : 
     883    21917800 : static INLINE int get_lower_levels_ctx_general(int is_last, int scan_idx,
     884             :     int bwl, int height,
     885             :     const uint8_t *levels,
     886             :     int coeff_idx, TxSize tx_size,
     887             :     TxClass tx_class) {
     888    21917800 :     if (is_last) {
     889     1519990 :         if (scan_idx == 0) return 0;
     890     1011910 :         if (scan_idx <= (height << bwl) >> 3) return 1;
     891      917076 :         if (scan_idx <= (height << bwl) >> 2) return 2;
     892      842002 :         return 3;
     893             :     }
     894    40797700 :     return get_lower_levels_ctx(levels, coeff_idx, bwl, tx_size, tx_class);
     895             : }
     896             : 
     897    17214000 : static INLINE int32_t get_golomb_cost(int32_t abs_qc) {
     898    17214000 :     if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
     899     1183580 :         const int32_t r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
     900     1183580 :         const int32_t length = get_msb(r) + 1;
     901     1183580 :         return av1_cost_literal(2 * length - 1);
     902             :     }
     903    16030400 :     return 0;
     904             : }
     905    17205600 : static INLINE int get_br_cost(TranLow level, const int *coeff_lps) {
     906    17205600 :     const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
     907    17205600 :     return coeff_lps[base_range] + get_golomb_cost(level);
     908             : }
     909   114633000 : static INLINE int get_coeff_cost_general(int is_last, int ci, TranLow abs_qc,
     910             :     int sign, int coeff_ctx,
     911             :     int dc_sign_ctx,
     912             :     const LvMapCoeffCost *txb_costs,
     913             :     int bwl, TxClass tx_class,
     914             :     const uint8_t *levels) {
     915   114633000 :     int cost = 0;
     916   114633000 :     if (is_last)
     917     3039680 :         cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
     918             :     else
     919   111593000 :         cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
     920   114633000 :     if (abs_qc != 0) {
     921   114755000 :         if (ci == 0)
     922    33153500 :             cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign];
     923             :         else
     924    81601100 :             cost += av1_cost_literal(1);
     925   114755000 :         if (abs_qc > NUM_BASE_LEVELS) {
     926             :             int br_ctx;
     927    14667600 :             if (is_last)
     928      429348 :                 br_ctx = get_br_ctx_eob(ci, bwl, tx_class);
     929             :             else
     930    14238200 :                 br_ctx = get_br_ctx(levels, ci, bwl, (const TxType)tx_class);
     931    14669100 :             cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
     932             :         }
     933             :     }
     934   114633000 :     return cost;
     935             : }
     936   633256000 : static INLINE int64_t get_coeff_dist(TranLow tcoeff, TranLow dqcoeff,
     937             :     int shift) {
     938   633256000 :     const int64_t diff = (tcoeff - dqcoeff) * (1 << shift);
     939   633256000 :     const int64_t error = diff * diff;
     940   633256000 :     return error;
     941             : }
     942    19001200 : static INLINE void get_qc_dqc_low(TranLow abs_qc, int sign, int dqv,
     943             :     int shift, TranLow *qc_low,
     944             :     TranLow *dqc_low) {
     945    19001200 :     TranLow abs_qc_low = abs_qc - 1;
     946    19001200 :     *qc_low = (-sign ^ abs_qc_low) + sign;
     947    19001200 :     assert((sign ? -abs_qc_low : abs_qc_low) == *qc_low);
     948    19001200 :     TranLow abs_dqc_low = (abs_qc_low * dqv) >> shift;
     949    19001200 :     *dqc_low = (-sign ^ abs_dqc_low) + sign;
     950    19001200 :     assert((sign ? -abs_dqc_low : abs_dqc_low) == *dqc_low);
     951    19001200 : }
     952             : static const int golomb_bits_cost[32] = {
     953             :   0,       512,     512 * 3, 512 * 3, 512 * 5, 512 * 5, 512 * 5, 512 * 5,
     954             :   512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7,
     955             :   512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9,
     956             :   512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9
     957             : };
     958             : static const int golomb_cost_diff[32] = {
     959             :   0,       512, 512 * 2, 0, 512 * 2, 0, 0, 0, 512 * 2, 0, 0, 0, 0, 0, 0, 0,
     960             :   512 * 2, 0,   0,       0, 0,       0, 0, 0, 0,       0, 0, 0, 0, 0, 0, 0
     961             : };
     962    44682700 : static INLINE int get_br_cost_with_diff(TranLow level, const int *coeff_lps,
     963             :     int *diff) {
     964    44682700 :     const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
     965    44682700 :     int golomb_bits = 0;
     966    44682700 :     if (level <= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS)
     967    44436500 :         *diff += coeff_lps[base_range + COEFF_BASE_RANGE + 1];
     968             : 
     969    44682700 :     if (level >= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) {
     970      310212 :         int r = level - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
     971      310212 :         if (r < 32) {
     972      301475 :             golomb_bits = golomb_bits_cost[r];
     973      301475 :             *diff += golomb_cost_diff[r];
     974             :         }
     975             :         else {
     976        8737 :             golomb_bits = get_golomb_cost(level);
     977        8738 :             *diff += (r & (r - 1)) == 0 ? 1024 : 0;
     978             :         }
     979             :     }
     980             : 
     981    44682700 :     return coeff_lps[base_range] + golomb_bits;
     982             : }
     983             : static AOM_FORCE_INLINE int get_two_coeff_cost_simple(
     984             :     int ci, TranLow abs_qc, int coeff_ctx,
     985             :     const LvMapCoeffCost *txb_costs, int bwl, TxClass tx_class,
     986             :     const uint8_t *levels, int *cost_low) {
     987             :     // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
     988             :     // and not the last (scan_idx != eob - 1)
     989           0 :     assert(ci > 0);
     990             :     //assert(abs_qc + 4 < 4);
     991   295013000 :     int cost = txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
     992   295013000 :     int diff = 0;
     993   295013000 :     if (abs_qc <= 3) diff = txb_costs->base_cost[coeff_ctx][abs_qc + 4];
     994   295013000 :     if (abs_qc) {
     995   301082000 :         cost += av1_cost_literal(1);
     996   301082000 :         if (abs_qc > NUM_BASE_LEVELS) {
     997    44714200 :             const int br_ctx = get_br_ctx(levels, ci, bwl, (const TxType)tx_class);
     998    44703400 :             int brcost_diff = 0;
     999    44703400 :             cost += get_br_cost_with_diff(abs_qc, txb_costs->lps_cost[br_ctx],
    1000             :                 &brcost_diff);
    1001    50494600 :             diff += brcost_diff;
    1002             :         }
    1003             :     }
    1004   300794000 :     *cost_low = cost - diff;
    1005             : 
    1006   300794000 :     return cost;
    1007             : }
    1008   120593000 : static INLINE int get_coeff_cost_eob(int ci, TranLow abs_qc, int sign,
    1009             :     int coeff_ctx, int dc_sign_ctx,
    1010             :     const LvMapCoeffCost *txb_costs,
    1011             :     int bwl, TxClass tx_class) {
    1012   120593000 :     int cost = 0;
    1013   120593000 :     cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
    1014   120593000 :     if (abs_qc != 0) {
    1015   120628000 :         if (ci == 0)
    1016    11682200 :             cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign];
    1017             :         else
    1018   108945000 :             cost += av1_cost_literal(1);
    1019   120628000 :         if (abs_qc > NUM_BASE_LEVELS) {
    1020             :             int br_ctx;
    1021     2541880 :             br_ctx = get_br_ctx_eob(ci, bwl, tx_class);
    1022     2541880 :             cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
    1023             :         }
    1024             :     }
    1025   120593000 :     return cost;
    1026             : }
    1027             : 
    1028             : static AOM_FORCE_INLINE void update_coeff_eob(
    1029             :     int *accu_rate, int64_t *accu_dist, uint16_t *eob, int *nz_num, int *nz_ci,
    1030             :     int si, TxSize tx_size, TxClass tx_class, int bwl, int height,
    1031             :     int dc_sign_ctx, int64_t rdmult, int shift, const int16_t *dequant,
    1032             :     const int16_t *scan, const LvMapEobCost *txb_eob_costs,
    1033             :     const LvMapCoeffCost *txb_costs, const TranLow *tcoeff,
    1034             :     TranLow *qcoeff, TranLow *dqcoeff, uint8_t *levels, int sharpness) {
    1035   436296000 :     const int dqv = dequant[si != 0];
    1036   436296000 :     assert(si != *eob - 1);
    1037   436296000 :     const int ci = scan[si];
    1038   436296000 :     const TranLow qc = qcoeff[ci];
    1039             :     const int coeff_ctx =
    1040   436296000 :         get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
    1041   436462000 :     if (qc == 0)
    1042   358939000 :         *accu_rate += txb_costs->base_cost[coeff_ctx][0];
    1043             :     else {
    1044    77523000 :         int lower_level = 0;
    1045    77523000 :         const TranLow abs_qc = abs(qc);
    1046    77523000 :         const TranLow tqc = tcoeff[ci];
    1047    77523000 :         const TranLow dqc = dqcoeff[ci];
    1048    77523000 :         const int sign = (qc < 0) ? 1 : 0;
    1049    77523000 :         const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
    1050    79569000 :         int64_t dist = get_coeff_dist(tqc, dqc, shift) - dist0;
    1051             :         int rate =
    1052    79531000 :             get_coeff_cost_general(0, ci, abs_qc, sign, coeff_ctx, dc_sign_ctx,
    1053             :                 txb_costs, bwl, tx_class, levels);
    1054    79610600 :         int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist);
    1055             : 
    1056             :         TranLow qc_low, dqc_low;
    1057             :         TranLow abs_qc_low;
    1058             :         int64_t dist_low, rd_low;
    1059             :         int rate_low;
    1060    79610600 :         if (abs_qc == 1) {
    1061    71681900 :             abs_qc_low = 0;
    1062    71681900 :             dqc_low = qc_low = 0;
    1063    71681900 :             dist_low = 0;
    1064    71681900 :             rate_low = txb_costs->base_cost[coeff_ctx][0];
    1065    71681900 :             rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist);
    1066             :         }
    1067             :         else {
    1068     7928760 :             get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
    1069     7928900 :             abs_qc_low = abs_qc - 1;
    1070     7928900 :             dist_low = get_coeff_dist(tqc, dqc_low, shift) - dist0;
    1071             :             rate_low =
    1072     7928620 :                 get_coeff_cost_general(0, ci, abs_qc_low, sign, coeff_ctx,
    1073             :                     dc_sign_ctx, txb_costs, bwl, tx_class, levels);
    1074     7928610 :             rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low);
    1075             :         }
    1076             : 
    1077    79610500 :         int lower_level_new_eob = 0;
    1078    79610500 :         const int new_eob = si + 1;
    1079    79610500 :         const int coeff_ctx_new_eob = get_lower_levels_ctx_eob(bwl, height, si);
    1080             :         const int new_eob_cost =
    1081    79571900 :             get_eob_cost(new_eob, txb_eob_costs, txb_costs, (TxType)tx_class);
    1082    79635000 :         int rate_coeff_eob =
    1083    79587900 :             new_eob_cost + get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx_new_eob,
    1084             :                 dc_sign_ctx, txb_costs, bwl,
    1085             :                 tx_class);
    1086    79635000 :         int64_t dist_new_eob = dist;
    1087    79635000 :         int64_t rd_new_eob = RDCOST(rdmult, rate_coeff_eob, dist_new_eob);
    1088             : 
    1089    79635000 :         if (abs_qc_low > 0) {
    1090     7928220 :             const int rate_coeff_eob_low =
    1091     7928640 :                 new_eob_cost + get_coeff_cost_eob(ci, abs_qc_low, sign,
    1092             :                     coeff_ctx_new_eob, dc_sign_ctx,
    1093             :                     txb_costs, bwl, tx_class);
    1094     7928220 :             const int64_t dist_new_eob_low = dist_low;
    1095     7928220 :             const int64_t rd_new_eob_low =
    1096     7928220 :                 RDCOST(rdmult, rate_coeff_eob_low, dist_new_eob_low);
    1097     7928220 :             if (rd_new_eob_low < rd_new_eob) {
    1098     1920400 :                 lower_level_new_eob = 1;
    1099     1920400 :                 rd_new_eob = rd_new_eob_low;
    1100     1920400 :                 rate_coeff_eob = rate_coeff_eob_low;
    1101     1920400 :                 dist_new_eob = dist_new_eob_low;
    1102             :             }
    1103             :         }
    1104             : 
    1105    79634600 :         if (rd_low < rd) {
    1106    15975400 :             lower_level = 1;
    1107    15975400 :             rd = rd_low;
    1108    15975400 :             rate = rate_low;
    1109    15975400 :             dist = dist_low;
    1110             :         }
    1111             : 
    1112    79634600 :         if (sharpness == 0 && rd_new_eob < rd) {
    1113    42599100 :             for (int ni = 0; ni < *nz_num; ++ni) {
    1114    21834900 :                 int last_ci = nz_ci[ni];
    1115    21834900 :                 levels[get_padded_idx(last_ci, bwl)] = 0;
    1116    21834300 :                 qcoeff[last_ci] = 0;
    1117    21834300 :                 dqcoeff[last_ci] = 0;
    1118             :             }
    1119    20764200 :             *eob = new_eob;
    1120    20764200 :             *nz_num = 0;
    1121    20764200 :             *accu_rate = rate_coeff_eob;
    1122    20764200 :             *accu_dist = dist_new_eob;
    1123    20764200 :             lower_level = lower_level_new_eob;
    1124             :         }
    1125             :         else {
    1126    58869800 :             *accu_rate += rate;
    1127    58869800 :             *accu_dist += dist;
    1128             :         }
    1129             : 
    1130    79634000 :         if (lower_level) {
    1131    11388900 :             qcoeff[ci] = qc_low;
    1132    11388900 :             dqcoeff[ci] = dqc_low;
    1133    11388900 :             levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
    1134             :         }
    1135    79633900 :         if (qcoeff[ci]) {
    1136    69316700 :             nz_ci[*nz_num] = ci;
    1137    69316700 :             ++*nz_num;
    1138             :         }
    1139             :     }
    1140   438573000 : }
    1141    21918300 : static INLINE void update_coeff_general(
    1142             :     int *accu_rate,
    1143             :     int64_t *accu_dist,
    1144             :     int si,
    1145             :     int eob,
    1146             :     TxSize tx_size,
    1147             :     TxClass tx_class,
    1148             :     int bwl,
    1149             :     int height,
    1150             :     int64_t rdmult,
    1151             :     int shift,
    1152             :     int dc_sign_ctx,
    1153             :     const int16_t *dequant,
    1154             :     const int16_t *scan,
    1155             :     const LvMapCoeffCost *txb_costs,
    1156             :     const TranLow *tcoeff,
    1157             :     TranLow *qcoeff,
    1158             :     TranLow *dqcoeff,
    1159             :     uint8_t *levels) {
    1160    21918300 :     const int dqv = dequant[si != 0];
    1161    21918300 :     const int ci = scan[si];
    1162    21918300 :     const TranLow qc = qcoeff[ci];
    1163    21918300 :     const int is_last = si == (eob - 1);
    1164    21918300 :     const int coeff_ctx = get_lower_levels_ctx_general(
    1165             :         is_last, si, bwl, height, levels, ci, tx_size, tx_class);
    1166    21917500 :     if (qc == 0)
    1167     5433260 :         *accu_rate += txb_costs->base_cost[coeff_ctx][0];
    1168             :     else {
    1169    16484200 :         const int sign = (qc < 0) ? 1 : 0;
    1170    16484200 :         const TranLow abs_qc = abs(qc);
    1171    16484200 :         const TranLow tqc = tcoeff[ci];
    1172    16484200 :         const TranLow dqc = dqcoeff[ci];
    1173    16484200 :         const int64_t dist = get_coeff_dist(tqc, dqc, shift);
    1174    16488200 :         const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
    1175             :         const int rate =
    1176    16483300 :             get_coeff_cost_general(is_last, ci, abs_qc, sign, coeff_ctx,
    1177             :                 dc_sign_ctx, txb_costs, bwl, tx_class, levels);
    1178    16489600 :         const int64_t rd = RDCOST(rdmult, rate, dist);
    1179             : 
    1180             :         TranLow qc_low, dqc_low;
    1181             :         TranLow abs_qc_low;
    1182             :         int64_t dist_low, rd_low;
    1183             :         int rate_low;
    1184    16489600 :         if (abs_qc == 1) {
    1185     5407650 :             abs_qc_low = qc_low = dqc_low = 0;
    1186     5407650 :             dist_low = dist0;
    1187     5407650 :             rate_low = txb_costs->base_cost[coeff_ctx][0];
    1188             :         }
    1189             :         else {
    1190    11081900 :             get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
    1191    11082400 :             abs_qc_low = abs_qc - 1;
    1192    11082400 :             dist_low = get_coeff_dist(tqc, dqc_low, shift);
    1193             :             rate_low =
    1194    11081400 :                 get_coeff_cost_general(is_last, ci, abs_qc_low, sign, coeff_ctx,
    1195             :                     dc_sign_ctx, txb_costs, bwl, tx_class, levels);
    1196             :         }
    1197             : 
    1198    16483800 :         rd_low = RDCOST(rdmult, rate_low, dist_low);
    1199    16483800 :         if (rd_low < rd) {
    1200      898181 :             qcoeff[ci] = qc_low;
    1201      898181 :             dqcoeff[ci] = dqc_low;
    1202      898181 :             levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
    1203      898184 :             *accu_rate += rate_low;
    1204      898184 :             *accu_dist += dist_low - dist0;
    1205             :         }
    1206             :         else {
    1207    15585600 :             *accu_rate += rate;
    1208    15585600 :             *accu_dist += dist - dist0;
    1209             :         }
    1210             :     }
    1211    21917000 : }
    1212             : 
    1213             : static AOM_FORCE_INLINE void update_coeff_simple(
    1214             :     int *accu_rate,
    1215             :     int si,
    1216             :     int eob,
    1217             :     TxSize tx_size,
    1218             :     TxClass tx_class,
    1219             :     int bwl,
    1220             :     int64_t rdmult,
    1221             :     int shift,
    1222             :     const int16_t *dequant,
    1223             :     const int16_t *scan,
    1224             :     const LvMapCoeffCost *txb_costs,
    1225             :     const TranLow *tcoeff,
    1226             :     TranLow *qcoeff,
    1227             :     TranLow *dqcoeff,
    1228             :     uint8_t *levels) {
    1229   695711000 :     const int dqv = dequant[1];
    1230             :     (void)eob;
    1231             :     // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
    1232             :     // and not the last (scan_idx != eob - 1)
    1233           0 :     assert(si != eob - 1);
    1234   695711000 :     assert(si > 0);
    1235   695711000 :     const int ci = scan[si];
    1236   695711000 :     const TranLow qc = qcoeff[ci];
    1237             :     const int coeff_ctx =
    1238   695711000 :         get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
    1239   695361000 :     if (qc == 0)
    1240   400347000 :         *accu_rate += txb_costs->base_cost[coeff_ctx][0];
    1241             :     else {
    1242   295013000 :         const TranLow abs_qc = abs(qc);
    1243   295013000 :         const TranLow abs_tqc = abs(tcoeff[ci]);
    1244   295013000 :         const TranLow abs_dqc = abs(dqcoeff[ci]);
    1245   295013000 :         int rate_low = 0;
    1246   300794000 :         const int rate = get_two_coeff_cost_simple(
    1247             :             ci, abs_qc, coeff_ctx, txb_costs, bwl, tx_class, levels, &rate_low);
    1248   300794000 :         if (abs_dqc < abs_tqc) {
    1249   120084000 :             *accu_rate += rate;
    1250   120084000 :             return;
    1251             :         }
    1252             : 
    1253   180710000 :         const int64_t dist = get_coeff_dist(abs_tqc, abs_dqc, shift);
    1254   181313000 :         const int64_t rd = RDCOST(rdmult, rate, dist);
    1255             : 
    1256   181313000 :         const TranLow abs_qc_low = abs_qc - 1;
    1257   181313000 :         const TranLow abs_dqc_low = (abs_qc_low * dqv) >> shift;
    1258   181313000 :         const int64_t dist_low = get_coeff_dist(abs_tqc, abs_dqc_low, shift);
    1259   181382000 :         const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low);
    1260             : 
    1261   181382000 :         if (rd_low < rd) {
    1262    18445500 :             const int sign = (qc < 0) ? 1 : 0;
    1263    18445500 :             qcoeff[ci] = (-sign ^ abs_qc_low) + sign;
    1264    18445500 :             dqcoeff[ci] = (-sign ^ abs_dqc_low) + sign;
    1265    18445500 :             levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
    1266    18445200 :             *accu_rate += rate_low;
    1267             :         }
    1268             :         else
    1269   162936000 :             *accu_rate += rate;
    1270             :     }
    1271             : }
    1272    12986100 : static INLINE void update_skip(int *accu_rate, int64_t accu_dist, uint16_t *eob,
    1273             :     int nz_num, int *nz_ci, int64_t rdmult,
    1274             :     int skip_cost, int non_skip_cost,
    1275             :     TranLow *qcoeff, TranLow *dqcoeff,
    1276             :     int sharpness) {
    1277    12986100 :     const int64_t rd = RDCOST(rdmult, *accu_rate + non_skip_cost, accu_dist);
    1278    12986100 :     const int64_t rd_new_eob = RDCOST(rdmult, skip_cost, 0);
    1279    12986100 :     if (sharpness == 0 && rd_new_eob < rd) {
    1280     8048640 :         for (int i = 0; i < nz_num; ++i) {
    1281     4147310 :             const int ci = nz_ci[i];
    1282     4147310 :             qcoeff[ci] = 0;
    1283     4147310 :             dqcoeff[ci] = 0;
    1284             :             // no need to set up levels because this is the last step
    1285             :             // levels[get_padded_idx(ci, bwl)] = 0;
    1286             :         }
    1287     3901340 :         *accu_rate = 0;
    1288     3901340 :         *eob = 0;
    1289             :     }
    1290    12986100 : }
    1291             : enum {
    1292             :     NO_AQ = 0,
    1293             :     VARIANCE_AQ = 1,
    1294             :     COMPLEXITY_AQ = 2,
    1295             :     CYCLIC_REFRESH_AQ = 3,
    1296             :     AQ_MODE_COUNT  // This should always be the last member of the enum
    1297             : } UENUM1BYTE(AQ_MODE);
    1298             : enum {
    1299             :     NO_DELTA_Q = 0,
    1300             :     DELTA_Q_ONLY = 1,
    1301             :     DELTA_Q_LF = 2,
    1302             :     DELTAQ_MODE_COUNT  // This should always be the last member of the enum
    1303             : } UENUM1BYTE(DELTAQ_MODE);
    1304             : 
    1305             : // These numbers are empirically obtained.
    1306             : static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
    1307             :   { 17, 13 },
    1308             :   { 16, 10 },
    1309             : };
    1310             : 
    1311    35094100 : void eb_av1_optimize_b(
    1312             :     ModeDecisionContext  *md_context,
    1313             :     int16_t                 txb_skip_context,
    1314             :     int16_t                 dc_sign_context,
    1315             :     const TranLow       *coeff_ptr,
    1316             :     int32_t                 stride,
    1317             :     intptr_t                n_coeffs,
    1318             :     const MacroblockPlane  *p,
    1319             :     TranLow             *qcoeff_ptr,
    1320             :     TranLow             *dqcoeff_ptr,
    1321             :     uint16_t               *eob,
    1322             :     const ScanOrder       *sc,
    1323             :     const QuantParam      *qparam,
    1324             :     TxSize                  tx_size,
    1325             :     TxType                  tx_type,
    1326             :     EbBool                  is_inter,
    1327             :     uint32_t                bit_increment,
    1328             :     int                     plane)
    1329             : 
    1330             : {
    1331             :     (void)stride;
    1332             :     (void)n_coeffs;
    1333             :     (void)sc;
    1334             :     (void)qparam;
    1335             :     (void)bit_increment;
    1336             : 
    1337             :     // Hsan (Trellis): hardcoded as not supported:
    1338    35094100 :     int sharpness = 0; // No Sharpness
    1339    35094100 :     int fast_mode = 0; // TBD
    1340    35094100 :     AQ_MODE aq_mode = NO_AQ;
    1341    35094100 :     DELTAQ_MODE deltaq_mode = NO_DELTA_Q;
    1342    35094100 :     int8_t segment_id = 0;
    1343    35094100 :     int sb_energy_level = 0;
    1344    35094100 :     const ScanOrder *const scan_order = &av1_scan_orders[tx_size][tx_type];
    1345    35094100 :     const int16_t *scan = scan_order->scan;
    1346    35094100 :     const int shift = av1_get_tx_scale(tx_size);
    1347    35087500 :     const PlaneType plane_type = get_plane_type(plane);
    1348    35084000 :     const TxSize txs_ctx = get_txsize_entropy_ctx(tx_size);
    1349    35072400 :     const TxClass tx_class = tx_type_to_class[tx_type];
    1350    35072400 :     const int bwl = get_txb_bwl(tx_size);
    1351    35058800 :     const int width = get_txb_wide(tx_size);
    1352    35037100 :     const int height = get_txb_high(tx_size);
    1353    35019100 :     assert(width == (1 << bwl));
    1354    35019100 :     assert(txs_ctx < TX_SIZES);
    1355    35019100 :     const LvMapCoeffCost *txb_costs = &md_context->md_rate_estimation_ptr->coeff_fac_bits[txs_ctx][plane_type];
    1356    35019100 :     const int eob_multi_size = txsize_log2_minus4[tx_size];
    1357    35019100 :     const LvMapEobCost *txb_eob_costs = &md_context->md_rate_estimation_ptr->eob_frac_bits[eob_multi_size][plane_type];
    1358    35019100 :     const int rshift =
    1359    35019100 :         (sharpness +
    1360           0 :         (aq_mode == VARIANCE_AQ && segment_id < 4
    1361           0 :             ? 7 - segment_id
    1362    35019100 :             : 2) +
    1363    35021100 :             (aq_mode != VARIANCE_AQ &&
    1364           0 :                 deltaq_mode > NO_DELTA_Q && sb_energy_level < 0
    1365             :                 ? (3 - sb_energy_level)
    1366    70040300 :                 : 0));
    1367    35019100 :     const int64_t rdmult =
    1368    35019100 :         (((int64_t)md_context->full_lambda *
    1369    35019100 :         plane_rd_mult[is_inter][plane_type]) +
    1370             :             2) >>
    1371             :         rshift;
    1372             :     uint8_t levels_buf[TX_PAD_2D];
    1373    35019100 :     uint8_t *const levels = set_levels(levels_buf, width);
    1374             : 
    1375    35020900 :     if (*eob > 1) eb_av1_txb_init_levels(qcoeff_ptr, width, height, levels);
    1376             :     // TODO(angirbird): check iqmatrix
    1377    35038600 :     const int non_skip_cost = txb_costs->txb_skip_cost[txb_skip_context][0];
    1378    35038600 :     const int skip_cost = txb_costs->txb_skip_cost[txb_skip_context][1];
    1379    35038600 :     const int eob_cost = get_eob_cost(*eob, txb_eob_costs, txb_costs, (TxType)tx_class);
    1380    35059100 :     int accu_rate = eob_cost;
    1381             : 
    1382    35059100 :     int64_t accu_dist = 0;
    1383    35059100 :     int si = *eob - 1;
    1384    35059100 :     const int ci = scan[si];
    1385    35059100 :     const TranLow qc = qcoeff_ptr[ci];
    1386    35059100 :     const TranLow abs_qc = abs(qc);
    1387    35059100 :     const int sign = qc < 0;
    1388    35059100 :     const int max_nz_num = 2;
    1389    35059100 :     int nz_num = 1;
    1390    35059100 :     int nz_ci[3] = { ci, 0, 0 };
    1391             : 
    1392    35059100 :     if (abs_qc >= 2) {
    1393     1512150 :         update_coeff_general(
    1394             :             &accu_rate,
    1395             :             &accu_dist,
    1396             :             si,
    1397     1512150 :             *eob,
    1398             :             tx_size,
    1399             :             tx_class,
    1400             :             bwl,
    1401             :             height,
    1402             :             rdmult,
    1403             :             shift,
    1404             :             dc_sign_context,
    1405             :             p->dequant_QTX,
    1406             :             scan,
    1407             :             txb_costs,
    1408             :             coeff_ptr,
    1409             :             qcoeff_ptr,
    1410             :             dqcoeff_ptr,
    1411             :             levels);
    1412     1519940 :         --si;
    1413             :     }
    1414             :     else {
    1415    33546900 :         assert(abs_qc == 1);
    1416    33546900 :         const int coeff_ctx = get_lower_levels_ctx_eob(bwl, height, si);
    1417    33543700 :         accu_rate += get_coeff_cost_eob(
    1418             :             ci,
    1419             :             abs_qc,
    1420             :             sign,
    1421             :             coeff_ctx,
    1422             :             dc_sign_context,
    1423             :             txb_costs,
    1424             :             bwl,
    1425             :             tx_class);
    1426             : 
    1427    33541300 :         const TranLow tqc = coeff_ptr[ci];
    1428    33541300 :         const TranLow dqc = dqcoeff_ptr[ci];
    1429    33541300 :         const int64_t dist = get_coeff_dist(tqc, dqc, shift);
    1430    33539100 :         const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
    1431    33538000 :         accu_dist += dist - dist0;
    1432    33538000 :         --si;
    1433             :     }
    1434             : 
    1435             : #define UPDATE_COEFF_EOB_CASE(tx_class_literal)                                       \
    1436             :   case tx_class_literal:                                                              \
    1437             :     for (; si >= 0 && nz_num <= max_nz_num && !fast_mode; --si) {                     \
    1438             :       update_coeff_eob(&accu_rate, &accu_dist, eob, &nz_num, nz_ci, si,              \
    1439             :                        tx_size, tx_class_literal, bwl, height,                        \
    1440             :                        dc_sign_context, rdmult, shift, p->dequant_QTX, scan,          \
    1441             :                        txb_eob_costs, txb_costs, coeff_ptr, qcoeff_ptr, dqcoeff_ptr,  \
    1442             :                        levels, sharpness);                                            \
    1443             :     }                                                                                 \
    1444             :     break;
    1445    35058000 :     switch (tx_class) {
    1446   405244000 :         UPDATE_COEFF_EOB_CASE(TX_CLASS_2D);
    1447    34552400 :         UPDATE_COEFF_EOB_CASE(TX_CLASS_HORIZ);
    1448    33835000 :         UPDATE_COEFF_EOB_CASE(TX_CLASS_VERT);
    1449             : #undef UPDATE_COEFF_EOB_CASE
    1450           0 :     default: assert(false);
    1451             :     }
    1452             : 
    1453    37335700 :     if (si == -1 && nz_num <= max_nz_num) {
    1454    12986200 :         update_skip(&accu_rate, accu_dist, eob, nz_num, nz_ci, rdmult, skip_cost,
    1455             :             non_skip_cost, qcoeff_ptr, dqcoeff_ptr, sharpness);
    1456             :     }
    1457             : 
    1458             : #define UPDATE_COEFF_SIMPLE_CASE(tx_class_literal)                                   \
    1459             :   case tx_class_literal:                                                             \
    1460             :     for (; si >= 1; --si) {                                                          \
    1461             :       update_coeff_simple(&accu_rate, si, *eob, tx_size, tx_class_literal, bwl,       \
    1462             :                           rdmult, shift, p->dequant_QTX, scan, txb_costs, coeff_ptr, \
    1463             :                           qcoeff_ptr, dqcoeff_ptr, levels);                          \
    1464             :     }                                                                                \
    1465             :     break;
    1466    35079600 :     switch (tx_class) {
    1467   633522000 :         UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_2D);
    1468    48488400 :         UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_HORIZ);
    1469    54882600 :         UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_VERT);
    1470             : #undef UPDATE_COEFF_SIMPLE_CASE
    1471           0 :     default: assert(false);
    1472             :     }
    1473             : 
    1474             :     // DC position
    1475    41181900 :     if (si == 0) {
    1476             :         // no need to update accu_dist because it's not used after this point
    1477    20394100 :         int64_t dummy_dist = 0;
    1478    20394100 :         update_coeff_general(&accu_rate, &dummy_dist, si, *eob, tx_size, tx_class,
    1479             :             bwl, height, rdmult, shift, dc_sign_context,
    1480             :             p->dequant_QTX, scan, txb_costs, coeff_ptr, qcoeff_ptr, dqcoeff_ptr,
    1481             :             levels);
    1482             :     }
    1483    41178100 : }
    1484             : 
    1485   102074000 : static INLINE void set_dc_sign(int32_t *cul_level, int32_t dc_val) {
    1486   102074000 :     if (dc_val < 0)
    1487    14532500 :         *cul_level |= 1 << COEFF_CONTEXT_BITS;
    1488    87541900 :     else if (dc_val > 0)
    1489    17956500 :         *cul_level += 2 << COEFF_CONTEXT_BITS;
    1490   102074000 : }
    1491   102058000 : int32_t av1_quantize_inv_quantize(
    1492             :     PictureControlSet           *picture_control_set_ptr,
    1493             :     ModeDecisionContext         *md_context,
    1494             :     int32_t                     *coeff,
    1495             :     const uint32_t               coeff_stride,
    1496             :     int32_t                     *quant_coeff,
    1497             :     int32_t                     *recon_coeff,
    1498             :     uint32_t                     qp,
    1499             :     int32_t                segmentation_qp_offset,
    1500             :     uint32_t                     width,
    1501             :     uint32_t                     height,
    1502             :     TxSize                       txsize,
    1503             :     uint16_t                    *eob,
    1504             :     uint32_t                    *count_non_zero_coeffs,
    1505             : 
    1506             :     uint32_t                     component_type,
    1507             :     uint32_t                     bit_increment,
    1508             :     TxType                       tx_type,
    1509             :     ModeDecisionCandidateBuffer *candidate_buffer,
    1510             :     int16_t                      txb_skip_context,    // Hsan (Trellis): derived @ MD (what about re-generating @ EP ?)
    1511             :     int16_t                      dc_sign_context,     // Hsan (Trellis): derived @ MD (what about re-generating @ EP ?)
    1512             :     PredictionMode               pred_mode,
    1513             :     EbBool                       is_intra_bc,
    1514             :     EbBool                       is_encode_pass)
    1515             : {
    1516             :     (void)candidate_buffer;
    1517             :     (void)is_encode_pass;
    1518             :     (void)coeff_stride;
    1519             : #if !ADD_DELTA_QP_SUPPORT
    1520             :     (void) qp;
    1521             : #endif
    1522             :     MacroblockPlane      candidate_plane ;
    1523             : 
    1524   102058000 :     const QmVal *qMatrix = picture_control_set_ptr->parent_pcs_ptr->gqmatrix[NUM_QM_LEVELS - 1][0][txsize];
    1525   102058000 :     const QmVal *iqMatrix = picture_control_set_ptr->parent_pcs_ptr->giqmatrix[NUM_QM_LEVELS - 1][0][txsize];
    1526             : #if ADD_DELTA_QP_SUPPORT
    1527   102058000 :     uint32_t qIndex = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.delta_q_params.delta_q_present ? quantizer_to_qindex[qp] : picture_control_set_ptr->parent_pcs_ptr->frm_hdr.quantization_params.base_q_idx + segmentation_qp_offset;
    1528             : #else
    1529             :     uint32_t qIndex = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.quantization_params.base_q_idx + segmentation_qp_offset ;
    1530             : #endif
    1531   102058000 :     if (bit_increment == 0) {
    1532   102092000 :         if (component_type == COMPONENT_LUMA) {
    1533    77502500 :             candidate_plane.quant_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_quant[qIndex];
    1534    77502500 :             candidate_plane.quant_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_quant_fp[qIndex];
    1535    77502500 :             candidate_plane.round_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_round_fp[qIndex];
    1536    77502500 :             candidate_plane.quant_shift_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_quant_shift[qIndex];
    1537    77502500 :             candidate_plane.zbin_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_zbin[qIndex];
    1538    77502500 :             candidate_plane.round_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_round[qIndex];
    1539    77502500 :             candidate_plane.dequant_QTX = picture_control_set_ptr->parent_pcs_ptr->deqMd.y_dequant_QTX[qIndex];
    1540             :         }
    1541             : 
    1542   102092000 :         if (component_type == COMPONENT_CHROMA_CB) {
    1543    12527400 :             candidate_plane.quant_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_quant[qIndex];
    1544    12527400 :             candidate_plane.quant_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_quant_fp[qIndex];
    1545    12527400 :             candidate_plane.round_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_round_fp[qIndex];
    1546    12527400 :             candidate_plane.quant_shift_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_quant_shift[qIndex];
    1547    12527400 :             candidate_plane.zbin_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_zbin[qIndex];
    1548    12527400 :             candidate_plane.round_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_round[qIndex];
    1549    12527400 :             candidate_plane.dequant_QTX = picture_control_set_ptr->parent_pcs_ptr->deqMd.u_dequant_QTX[qIndex];
    1550             :         }
    1551             : 
    1552   102092000 :         if (component_type == COMPONENT_CHROMA_CR) {
    1553    12163300 :             candidate_plane.quant_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_quant[qIndex];
    1554    12163300 :             candidate_plane.quant_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_quant_fp[qIndex];
    1555    12163300 :             candidate_plane.round_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_round_fp[qIndex];
    1556    12163300 :             candidate_plane.quant_shift_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_quant_shift[qIndex];
    1557    12163300 :             candidate_plane.zbin_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_zbin[qIndex];
    1558    12163300 :             candidate_plane.round_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_round[qIndex];
    1559    12163300 :             candidate_plane.dequant_QTX = picture_control_set_ptr->parent_pcs_ptr->deqMd.v_dequant_QTX[qIndex];
    1560             :         }
    1561             :     }
    1562             :     else {
    1563           0 :         if (component_type == COMPONENT_LUMA) {
    1564           0 :             candidate_plane.quant_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.y_quant[qIndex];
    1565           0 :             candidate_plane.quant_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.y_quant_fp[qIndex];
    1566           0 :             candidate_plane.round_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.y_round_fp[qIndex];
    1567           0 :             candidate_plane.quant_shift_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.y_quant_shift[qIndex];
    1568           0 :             candidate_plane.zbin_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.y_zbin[qIndex];
    1569           0 :             candidate_plane.round_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.y_round[qIndex];
    1570           0 :             candidate_plane.dequant_QTX = picture_control_set_ptr->parent_pcs_ptr->deq.y_dequant_QTX[qIndex];
    1571             :         }
    1572             : 
    1573           0 :         if (component_type == COMPONENT_CHROMA_CB) {
    1574           0 :             candidate_plane.quant_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.u_quant[qIndex];
    1575           0 :             candidate_plane.quant_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.u_quant_fp[qIndex];
    1576           0 :             candidate_plane.round_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.u_round_fp[qIndex];
    1577           0 :             candidate_plane.quant_shift_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.u_quant_shift[qIndex];
    1578           0 :             candidate_plane.zbin_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.u_zbin[qIndex];
    1579           0 :             candidate_plane.round_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.u_round[qIndex];
    1580           0 :             candidate_plane.dequant_QTX = picture_control_set_ptr->parent_pcs_ptr->deq.u_dequant_QTX[qIndex];
    1581             :         }
    1582             : 
    1583           0 :         if (component_type == COMPONENT_CHROMA_CR) {
    1584           0 :             candidate_plane.quant_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.v_quant[qIndex];
    1585           0 :             candidate_plane.quant_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.v_quant_fp[qIndex];
    1586           0 :             candidate_plane.round_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.v_round_fp[qIndex];
    1587           0 :             candidate_plane.quant_shift_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.v_quant_shift[qIndex];
    1588           0 :             candidate_plane.zbin_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.v_zbin[qIndex];
    1589           0 :             candidate_plane.round_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.v_round[qIndex];
    1590           0 :             candidate_plane.dequant_QTX = picture_control_set_ptr->parent_pcs_ptr->deq.v_dequant_QTX[qIndex];
    1591             :         }
    1592             :     }
    1593             : 
    1594   102058000 :     const ScanOrder *const scan_order = &av1_scan_orders[txsize][tx_type];  //get_scan(tx_size, tx_type);
    1595             : 
    1596   102058000 :     const int32_t n_coeffs = av1_get_max_eob(txsize);
    1597             : 
    1598             :     QuantParam qparam;
    1599             : 
    1600   102064000 :     qparam.log_scale = av1_get_tx_scale(txsize);
    1601   102159000 :     qparam.tx_size = txsize;
    1602   102159000 :     qparam.qmatrix = qMatrix;
    1603   102159000 :     qparam.iqmatrix = iqMatrix;
    1604             : 
    1605             : 
    1606   102159000 :     EbBool is_inter = (pred_mode >= NEARESTMV);
    1607             : #if RDOQ_CHROMA
    1608   102159000 :     EbBool perform_rdoq = ((md_context->md_staging_skip_rdoq == EB_FALSE || is_encode_pass) && md_context->trellis_quant_coeff_optimization && !is_intra_bc);
    1609             : #else
    1610             :     EbBool perform_rdoq = ((md_context->md_staging_skip_rdoq == EB_FALSE || is_encode_pass) && md_context->trellis_quant_coeff_optimization && component_type == COMPONENT_LUMA && !is_intra_bc);
    1611             : #endif
    1612             : 
    1613   102159000 :     SequenceControlSet *sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
    1614   102159000 :     perform_rdoq = perform_rdoq && (EbBool) sequence_control_set_ptr->static_config.enable_rdoq;
    1615   102159000 :     if (sequence_control_set_ptr->static_config.encoder_bit_depth > 8
    1616           0 :         && picture_control_set_ptr->hbd_mode_decision==0 )
    1617           0 :         perform_rdoq = EB_FALSE;
    1618             : 
    1619             :     // Hsan: set to FALSE until adding x86 quantize_fp
    1620   102159000 :     EbBool perform_quantize_fp = picture_control_set_ptr->enc_mode == ENC_M0 ? EB_TRUE: EB_FALSE;
    1621             : 
    1622   102159000 :     if (perform_rdoq && perform_quantize_fp && !is_inter) {
    1623    41328300 :         if (bit_increment) {
    1624           0 :             eb_av1_highbd_quantize_fp_facade(
    1625             :                 (TranLow*)coeff,
    1626             :                 n_coeffs,
    1627             :                 &candidate_plane,
    1628             :                 quant_coeff,
    1629             :                 (TranLow*)recon_coeff,
    1630             :                 eob,
    1631             :                 scan_order,
    1632             :                 &qparam);
    1633             :         } else {
    1634    41328300 :             eb_av1_quantize_fp_facade(
    1635             :                 (TranLow*)coeff,
    1636             :                 n_coeffs,
    1637             :                 &candidate_plane,
    1638             :                 quant_coeff,
    1639             :                 (TranLow*)recon_coeff,
    1640             :                 eob,
    1641             :                 scan_order,
    1642             :                 &qparam);
    1643             :         }
    1644             :     } else {
    1645    60830600 :         if (bit_increment) {
    1646           0 :             eb_av1_highbd_quantize_b_facade(
    1647             :                 (TranLow*)coeff,
    1648             :                 n_coeffs,
    1649             :                 &candidate_plane,
    1650             :                 quant_coeff,
    1651             :                 (TranLow*)recon_coeff,
    1652             :                 eob,
    1653             :                 scan_order,
    1654             :                 &qparam);
    1655             :         } else {
    1656    60830600 :             av1_quantize_b_facade_II(
    1657             :                 (TranLow*)coeff,
    1658             :                 coeff_stride,
    1659             :                 width,
    1660             :                 height,
    1661             :                 n_coeffs,
    1662             :                 &candidate_plane,
    1663             :                 quant_coeff,
    1664             :                 (TranLow*)recon_coeff,
    1665             :                 eob,
    1666             :                 scan_order,
    1667             :                 &qparam);
    1668             :         }
    1669             :     }
    1670             : 
    1671   102186000 :     if (perform_rdoq && *eob != 0) {
    1672             : 
    1673             :         // Perform Trellis
    1674    35073000 :         if (*eob != 0) {
    1675    35073400 :             eb_av1_optimize_b(
    1676             :                 md_context,
    1677             :                 txb_skip_context,
    1678             :                 dc_sign_context,
    1679             :                 (TranLow*)coeff,
    1680             :                 coeff_stride,
    1681             :                 n_coeffs,
    1682             :                 &candidate_plane,
    1683             :                 quant_coeff,
    1684             :                 (TranLow*)recon_coeff,
    1685             :                 eob,
    1686             :                 scan_order,
    1687             :                 &qparam,
    1688             :                 txsize,
    1689             :                 tx_type,
    1690             :                 is_inter,
    1691             :                 bit_increment,
    1692             :                 (component_type == COMPONENT_LUMA) ? 0 : 1);
    1693             :         }
    1694             :     }
    1695             : 
    1696             : 
    1697   102071000 :     *count_non_zero_coeffs = *eob;
    1698             : 
    1699             :     // Derive cul_level
    1700   102071000 :     int32_t cul_level = 0;
    1701   102071000 :     const int16_t *const scan = scan_order->scan;
    1702  1953540000 :     for (int32_t c = 0; c < *eob; ++c) {
    1703  1851470000 :         const int16_t pos = scan[c];
    1704  1851470000 :         const int32_t v = quant_coeff[pos];
    1705  1851470000 :         int32_t level = ABS(v);
    1706  1851470000 :         cul_level += level;
    1707             :     }
    1708             : 
    1709   102071000 :     cul_level = AOMMIN(COEFF_CONTEXT_MASK, cul_level);
    1710             :     // DC value
    1711   102071000 :     set_dc_sign(&cul_level, quant_coeff[0]);
    1712   102058000 :     return cul_level;
    1713             : }
    1714             : 
    1715             : /****************************************
    1716             :  ************  Full loop ****************
    1717             : ****************************************/
    1718    40264000 : void product_full_loop(
    1719             :     ModeDecisionCandidateBuffer  *candidate_buffer,
    1720             :     ModeDecisionContext          *context_ptr,
    1721             :     PictureControlSet            *picture_control_set_ptr,
    1722             :     EbPictureBufferDesc          *input_picture_ptr,
    1723             :     uint32_t                     qp,
    1724             :     uint32_t                     *y_count_non_zero_coeffs,
    1725             :     uint64_t                     *y_coeff_bits,
    1726             :     uint64_t                     *y_full_distortion)
    1727             : {
    1728             :     uint32_t                       tu_origin_index;
    1729             :     uint64_t                      y_full_cost;
    1730    40264000 :     SequenceControlSet        *sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
    1731             :     //    uint32_t   currentTuIndex,tuIt;
    1732             :     uint64_t   y_tu_coeff_bits;
    1733             :     EB_ALIGN(16) uint64_t tuFullDistortion[3][DIST_CALC_TOTAL];
    1734    40264000 :     context_ptr->three_quad_energy = 0;
    1735             : #if ENHANCE_ATB
    1736    40264000 :     uint8_t  tx_depth = context_ptr->tx_depth;
    1737    40264000 :     uint32_t txb_itr = context_ptr->txb_itr;
    1738    40264000 :     uint32_t txb_1d_offset = context_ptr->txb_1d_offset;
    1739             : #else
    1740             :     uint32_t  txb_1d_offset = 0;
    1741             :     uint32_t txb_itr = 0;
    1742             : #endif
    1743             : #if !ENHANCE_ATB
    1744             :     uint8_t  tx_depth = candidate_buffer->candidate_ptr->tx_depth;
    1745             :     uint16_t txb_count = context_ptr->blk_geom->txb_count[tx_depth];
    1746             :     for (txb_itr = 0; txb_itr < txb_count; txb_itr++)
    1747             :     {
    1748             : #endif
    1749    40264000 :         uint16_t tx_org_x = context_ptr->blk_geom->tx_org_x[tx_depth][txb_itr];
    1750    40264000 :         uint16_t tx_org_y = context_ptr->blk_geom->tx_org_y[tx_depth][txb_itr];
    1751    40264000 :         int32_t cropped_tx_width = MIN(context_ptr->blk_geom->tx_width[tx_depth][txb_itr], sequence_control_set_ptr->seq_header.max_frame_width - (context_ptr->sb_origin_x + tx_org_x));
    1752    40264000 :         int32_t cropped_tx_height = MIN(context_ptr->blk_geom->tx_height[tx_depth][txb_itr], sequence_control_set_ptr->seq_header.max_frame_height - (context_ptr->sb_origin_y + tx_org_y));
    1753    40264000 :         context_ptr->luma_txb_skip_context = 0;
    1754    40264000 :         context_ptr->luma_dc_sign_context = 0;
    1755    40264000 :         get_txb_ctx(
    1756             :             sequence_control_set_ptr,
    1757             :             COMPONENT_LUMA,
    1758             : #if ENHANCE_ATB
    1759             :             context_ptr->full_loop_luma_dc_sign_level_coeff_neighbor_array,
    1760             : #else
    1761             :             context_ptr->luma_dc_sign_level_coeff_neighbor_array,
    1762             : #endif
    1763    40264000 :             context_ptr->sb_origin_x + tx_org_x,
    1764    40264000 :             context_ptr->sb_origin_y + tx_org_y,
    1765    40264000 :             context_ptr->blk_geom->bsize,
    1766    40264000 :             context_ptr->blk_geom->txsize[tx_depth][txb_itr],
    1767             :             &context_ptr->luma_txb_skip_context,
    1768             :             &context_ptr->luma_dc_sign_context);
    1769             : 
    1770    40263800 :         tu_origin_index = tx_org_x + (tx_org_y * candidate_buffer->residual_ptr->stride_y);
    1771    40263800 :         y_tu_coeff_bits = 0;
    1772             : 
    1773             :         // Y: T Q iQ
    1774    40263800 :         av1_estimate_transform(
    1775    40263800 :             &(((int16_t*)candidate_buffer->residual_ptr->buffer_y)[tu_origin_index]),
    1776    40263800 :             candidate_buffer->residual_ptr->stride_y,
    1777    40263800 :             &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[txb_1d_offset]),
    1778             :             NOT_USED_VALUE,
    1779    40263800 :             context_ptr->blk_geom->txsize[tx_depth][txb_itr],
    1780             :             &context_ptr->three_quad_energy,
    1781             :             context_ptr->transform_inner_array_ptr,
    1782    40263800 :             context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
    1783    40263800 :             candidate_buffer->candidate_ptr->transform_type[txb_itr],
    1784             :             PLANE_TYPE_Y,
    1785             :             DEFAULT_SHAPE);
    1786             : 
    1787    80529600 :         int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
    1788    40264800 :                          picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
    1789    40264800 :         candidate_buffer->candidate_ptr->quantized_dc[0][txb_itr] = av1_quantize_inv_quantize(
    1790             :             picture_control_set_ptr,
    1791             :             context_ptr,
    1792    40264800 :             &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[txb_1d_offset]),
    1793             :             NOT_USED_VALUE,
    1794    40264800 :             &(((int32_t*)candidate_buffer->residual_quant_coeff_ptr->buffer_y)[txb_1d_offset]),
    1795    40264800 :             &(((int32_t*)candidate_buffer->recon_coeff_ptr->buffer_y)[txb_1d_offset]),
    1796             :             qp,
    1797             :             seg_qp,
    1798    40264800 :             context_ptr->blk_geom->tx_width[tx_depth][txb_itr],
    1799    40264800 :             context_ptr->blk_geom->tx_height[tx_depth][txb_itr],
    1800    40264800 :             context_ptr->blk_geom->txsize[tx_depth][txb_itr],
    1801    40264800 :             &candidate_buffer->candidate_ptr->eob[0][txb_itr],
    1802    40264800 :             &(y_count_non_zero_coeffs[txb_itr]),
    1803             :             COMPONENT_LUMA,
    1804    40264800 :             context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
    1805    40264800 :             candidate_buffer->candidate_ptr->transform_type[txb_itr],
    1806             :             candidate_buffer,
    1807    40264800 :             context_ptr->luma_txb_skip_context,
    1808    40264800 :             context_ptr->luma_dc_sign_context,
    1809    40264800 :             candidate_buffer->candidate_ptr->pred_mode,
    1810    40264800 :             candidate_buffer->candidate_ptr->use_intrabc,
    1811             :             EB_FALSE);
    1812             : 
    1813    40269100 :         if (context_ptr->spatial_sse_full_loop) {
    1814    40137600 :             uint32_t input_tu_origin_index = (context_ptr->sb_origin_x + tx_org_x + input_picture_ptr->origin_x) + ((context_ptr->sb_origin_y + tx_org_y + input_picture_ptr->origin_y) * input_picture_ptr->stride_y);
    1815    40137600 :             uint32_t y_has_coeff           = y_count_non_zero_coeffs[txb_itr] > 0;
    1816             : 
    1817    40137600 :             if (y_has_coeff) {
    1818    15447300 :                 inv_transform_recon_wrapper(
    1819    15447300 :                     candidate_buffer->prediction_ptr->buffer_y,
    1820             :                     tu_origin_index,
    1821    15447300 :                     candidate_buffer->prediction_ptr->stride_y,
    1822    15447300 :                     candidate_buffer->recon_ptr->buffer_y,
    1823             :                     tu_origin_index,
    1824    15447300 :                     candidate_buffer->recon_ptr->stride_y,
    1825    15447300 :                     (int32_t*) candidate_buffer->recon_coeff_ptr->buffer_y,
    1826             :                     txb_1d_offset,
    1827    15447300 :                     context_ptr->hbd_mode_decision,
    1828    15447300 :                     context_ptr->blk_geom->txsize[tx_depth][txb_itr],
    1829    15447300 :                     candidate_buffer->candidate_ptr->transform_type[txb_itr],
    1830             :                     PLANE_TYPE_Y,
    1831    15447300 :                     (uint32_t)candidate_buffer->candidate_ptr->eob[0][txb_itr]);
    1832             :             } else {
    1833    24690300 :                 picture_copy(
    1834             :                     candidate_buffer->prediction_ptr,
    1835             :                     tu_origin_index,
    1836             :                     0,
    1837             :                     candidate_buffer->recon_ptr,
    1838             :                     tu_origin_index,
    1839             :                     0,
    1840    24690300 :                     context_ptr->blk_geom->tx_width[tx_depth][txb_itr],
    1841    24690300 :                     context_ptr->blk_geom->tx_height[tx_depth][txb_itr],
    1842             :                     0,
    1843             :                     0,
    1844             :                     PICTURE_BUFFER_DESC_Y_FLAG,
    1845    24690300 :                     context_ptr->hbd_mode_decision);
    1846             :             }
    1847             : 
    1848    80198100 :             EbSpatialFullDistType spatial_full_dist_type_fun = context_ptr->hbd_mode_decision ?
    1849    40099100 :                 full_distortion_kernel16_bits :
    1850             :                 spatial_full_distortion_kernel;
    1851             : 
    1852    80211800 :             tuFullDistortion[0][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun(
    1853             :                 input_picture_ptr->buffer_y,
    1854             :                 input_tu_origin_index,
    1855    40099100 :                 input_picture_ptr->stride_y,
    1856    40099100 :                 candidate_buffer->prediction_ptr->buffer_y,
    1857             :                 tu_origin_index,
    1858    40099100 :                 candidate_buffer->prediction_ptr->stride_y,
    1859             :                 cropped_tx_width,
    1860             :                 cropped_tx_height);
    1861             : 
    1862    80222900 :             tuFullDistortion[0][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun(
    1863             :                 input_picture_ptr->buffer_y,
    1864             :                 input_tu_origin_index,
    1865    40112700 :                 input_picture_ptr->stride_y,
    1866    40112700 :                 candidate_buffer->recon_ptr->buffer_y,
    1867             :                 tu_origin_index,
    1868    40112700 :                 candidate_buffer->recon_ptr->stride_y,
    1869             :                 cropped_tx_width,
    1870             :                 cropped_tx_height);
    1871             : 
    1872    40110200 :             tuFullDistortion[0][DIST_CALC_PREDICTION] <<= 4;
    1873    40110200 :             tuFullDistortion[0][DIST_CALC_RESIDUAL] <<= 4;
    1874             :         }
    1875             :         else {
    1876             :             // LUMA DISTORTION
    1877      131566 :             picture_full_distortion32_bits(
    1878      131566 :                 context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr,
    1879             :                 txb_1d_offset,
    1880             :                 0,
    1881             :                 candidate_buffer->recon_coeff_ptr,
    1882             :                 txb_1d_offset,
    1883             :                 0,
    1884      131566 :                 context_ptr->blk_geom->tx_width[tx_depth][txb_itr],
    1885      131566 :                 context_ptr->blk_geom->tx_height[tx_depth][txb_itr],
    1886             :                 NOT_USED_VALUE,
    1887             :                 NOT_USED_VALUE,
    1888             :                 tuFullDistortion[0],
    1889             :                 NOT_USED_VALUE,
    1890             :                 NOT_USED_VALUE,
    1891      131566 :                 y_count_non_zero_coeffs[txb_itr],
    1892             :                 0,
    1893             :                 0,
    1894             :                 COMPONENT_LUMA);
    1895             : 
    1896      149367 :             tuFullDistortion[0][DIST_CALC_RESIDUAL] += context_ptr->three_quad_energy;
    1897      149367 :             tuFullDistortion[0][DIST_CALC_PREDICTION] += context_ptr->three_quad_energy;
    1898             :             //assert(context_ptr->three_quad_energy == 0 && context_ptr->cu_stats->size < 64);
    1899      149367 :             TxSize tx_size = context_ptr->blk_geom->txsize[tx_depth][txb_itr];
    1900      149367 :             int32_t shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
    1901      149347 :             tuFullDistortion[0][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(tuFullDistortion[0][DIST_CALC_RESIDUAL], shift);
    1902      149347 :             tuFullDistortion[0][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(tuFullDistortion[0][DIST_CALC_PREDICTION], shift);
    1903             :         }
    1904             : 
    1905             :         //LUMA-ONLY
    1906    40259500 :         av1_tu_estimate_coeff_bits(
    1907             :             context_ptr,
    1908             :             0,//allow_update_cdf,
    1909             :             NULL,//FRAME_CONTEXT *ec_ctx,
    1910             :             picture_control_set_ptr,
    1911             :             candidate_buffer,
    1912             :             txb_1d_offset,
    1913             :             0,
    1914             :             context_ptr->coeff_est_entropy_coder_ptr,
    1915             :             candidate_buffer->residual_quant_coeff_ptr,
    1916    40259500 :             y_count_non_zero_coeffs[txb_itr],
    1917             :             0,
    1918             :             0,
    1919             :             &y_tu_coeff_bits,
    1920             :             &y_tu_coeff_bits,
    1921             :             &y_tu_coeff_bits,
    1922    40259500 :             context_ptr->blk_geom->txsize[tx_depth][txb_itr],
    1923    40259500 :             context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
    1924    40259500 :             candidate_buffer->candidate_ptr->transform_type[txb_itr],
    1925    40259500 :             candidate_buffer->candidate_ptr->transform_type_uv,
    1926             :             COMPONENT_LUMA);
    1927             : 
    1928             :         //TODO: fix cbf decision
    1929    40258900 :         av1_tu_calc_cost_luma(
    1930    40258900 :             context_ptr->luma_txb_skip_context,
    1931             :             candidate_buffer->candidate_ptr,
    1932             :             txb_itr,
    1933    40258900 :             context_ptr->blk_geom->txsize[tx_depth][0],
    1934    40258900 :             y_count_non_zero_coeffs[txb_itr],
    1935             :             tuFullDistortion[0],      //gets updated inside based on cbf decision
    1936             :             &y_tu_coeff_bits,            //gets updated inside based on cbf decision
    1937             :             &y_full_cost,
    1938    40258900 :             context_ptr->full_lambda);
    1939             : 
    1940    40262400 :         (*y_coeff_bits) += y_tu_coeff_bits;
    1941             : 
    1942    40262400 :         y_full_distortion[DIST_CALC_RESIDUAL] += tuFullDistortion[0][DIST_CALC_RESIDUAL];
    1943    40262400 :         y_full_distortion[DIST_CALC_PREDICTION] += tuFullDistortion[0][DIST_CALC_PREDICTION];
    1944             : #if ENHANCE_ATB
    1945    40262400 :         context_ptr->txb_1d_offset += context_ptr->blk_geom->tx_width[tx_depth][txb_itr] * context_ptr->blk_geom->tx_height[tx_depth][txb_itr];
    1946             : #else
    1947             :         txb_1d_offset += context_ptr->blk_geom->tx_width[tx_depth][txb_itr] * context_ptr->blk_geom->tx_height[tx_depth][txb_itr];
    1948             :     }
    1949             : #endif
    1950    40262400 : }
    1951             : // T1
    1952             : uint8_t allowed_tx_set_a[TX_SIZES_ALL][TX_TYPES] = {
    1953             : {1,    1,    1,    1,    0,    0,    0,    0,    0,    1,    1,    1,    0,    0,    0,    0},
    1954             : {1,    1,    1,    1,    0,    0,    0,    0,    0,    1,    1,    1,    0,    1,    0,    1},
    1955             : {1,    1,    1,    1,    0,    0,    0,    0,    0,    1,    1,    1,    0,    0,    0,    0},
    1956             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0},
    1957             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0},
    1958             : {1,    1,    1,    1,    0,    0,    0,    0,    0,    1,    1,    1,    0,    0,    0,    0},
    1959             : {1,    1,    1,    1,    0,    0,    0,    0,    0,    1,    1,    1,    0,    0,    0,    0},
    1960             : {1,    1,    1,    1,    0,    0,    0,    0,    0,    1,    1,    1,    0,    1,    0,    1},
    1961             : {1,    1,    1,    1,    0,    0,    0,    0,    0,    1,    1,    1,    1,    0,    1,    0},
    1962             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0},
    1963             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0},
    1964             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0},
    1965             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0},
    1966             : {1,    1,    1,    1,    0,    0,    0,    0,    0,    1,    1,    1,    0,    0,    0,    0},
    1967             : {1,    1,    1,    1,    0,    0,    0,    0,    0,    1,    1,    1,    0,    0,    0,    0},
    1968             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0},
    1969             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0},
    1970             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0},
    1971             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0} };
    1972             : 
    1973             : uint8_t allowed_tx_set_b[TX_SIZES_ALL][TX_TYPES] = {
    1974             : {1,    1,    1,    1,    0,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0},
    1975             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    1,    1,    1,    0,    0,    0,    0},
    1976             : {1,    1,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0},
    1977             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0},
    1978             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0},
    1979             : {1,    1,    1,    1,    0,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0},
    1980             : {1,    1,    1,    1,    0,    0,    0,    0,    0,    0,    1,    1,    0,    0,    0,    0},
    1981             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0,    0},
    1982             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    1,    1,    0,    0,    0,    0,    0},
    1983             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0},
    1984             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0},
    1985             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0},
    1986             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0},
    1987             : {0,    0,    1,    1,    0,    0,    0,    0,    0,    1,    1,    1,    0,    0,    0,    0},
    1988             : {1,    1,    0,    1,    0,    0,    0,    0,    0,    1,    1,    1,    0,    0,    0,    0},
    1989             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0},
    1990             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0},
    1991             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0},
    1992             : {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}
    1993             : };
    1994             : 
    1995     1760920 : void product_full_loop_tx_search(
    1996             :     ModeDecisionCandidateBuffer  *candidate_buffer,
    1997             :     ModeDecisionContext          *context_ptr,
    1998             :     PictureControlSet            *picture_control_set_ptr)
    1999             : {
    2000             :     uint32_t                       tu_origin_index;
    2001     1760920 :     SequenceControlSet          *sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
    2002             :     uint64_t                       y_tu_coeff_bits;
    2003             :     EB_ALIGN(16) uint64_t          tuFullDistortion[3][DIST_CALC_TOTAL];
    2004     1760920 :     int32_t                        plane = 0;
    2005     1760920 :     const int32_t                  is_inter = (candidate_buffer->candidate_ptr->type == INTER_MODE || candidate_buffer->candidate_ptr->use_intrabc) ? EB_TRUE : EB_FALSE;
    2006     1760920 :     uint64_t                       best_full_cost = UINT64_MAX;
    2007     1760920 :     uint64_t                       y_full_cost = MAX_CU_COST;
    2008             :     uint32_t                       yCountNonZeroCoeffsTemp;
    2009     1760920 :     TxType                         txk_start = DCT_DCT;
    2010     1760920 :     TxType                         txk_end = TX_TYPES;
    2011             :     TxType                         tx_type;
    2012     1760920 :     int32_t                        txb_itr = 0;
    2013     1760920 :     uint8_t                        tx_depth = candidate_buffer->candidate_ptr->tx_depth;
    2014     1760920 :     TxSize                         txSize = context_ptr->blk_geom->txsize[tx_depth][txb_itr];
    2015     1760920 :     assert(txSize < TX_SIZES_ALL);
    2016             :     const TxSetType                tx_set_type =
    2017     1760920 :         get_ext_tx_set_type(txSize, is_inter, picture_control_set_ptr->parent_pcs_ptr->frm_hdr.reduced_tx_set);
    2018             : 
    2019     1760950 :     int32_t allowed_tx_mask[TX_TYPES] = { 0 };  // 1: allow; 0: skip.
    2020     1760950 :     int32_t allowed_tx_num = 0;
    2021     1760950 :     TxType uv_tx_type = DCT_DCT;
    2022     1760950 :     if (picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set == 2)
    2023           0 :         txk_end = 2;
    2024             : 
    2025    29933000 :     for (int32_t tx_type_index = txk_start; tx_type_index < txk_end; ++tx_type_index) {
    2026    28172000 :         if (picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set == 2)
    2027           0 :             tx_type_index = (tx_type_index  == 1) ? IDTX : tx_type_index;
    2028    28172000 :         tx_type = (TxType)tx_type_index;
    2029    28172000 :         allowed_tx_mask[tx_type] = 1;
    2030    28172000 :         if (plane == 0) {
    2031    28172000 :             if (allowed_tx_mask[tx_type]) {
    2032    28172000 :                 const TxType ref_tx_type = ((!av1_ext_tx_used[tx_set_type][tx_type]) || txsize_sqr_up_map[txSize] > TX_32X32) ? DCT_DCT : tx_type;
    2033    28172000 :                 if (tx_type != ref_tx_type)
    2034    11619500 :                     allowed_tx_mask[tx_type] = 0;
    2035             :             }
    2036             :         }
    2037             : 
    2038    28172000 :         allowed_tx_num += allowed_tx_mask[tx_type];
    2039             :     }
    2040             :     // Need to have at least one transform type allowed.
    2041     1760950 :     if (allowed_tx_num == 0)
    2042           0 :         allowed_tx_mask[plane ? uv_tx_type : DCT_DCT] = 1;
    2043     1760950 :     TxType best_tx_type = DCT_DCT;
    2044    29931500 :     for (int32_t tx_type_index = txk_start; tx_type_index < txk_end; ++tx_type_index) {
    2045    28169700 :         if (picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set == 2)
    2046           0 :             tx_type_index = (tx_type_index  == 1) ? IDTX : tx_type_index;
    2047    28169700 :         tx_type = (TxType)tx_type_index;
    2048    28169700 :         if (!allowed_tx_mask[tx_type]) continue;
    2049    16550100 :         if (picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set)
    2050           0 :             if (!allowed_tx_set_a[txSize][tx_type]) continue;
    2051             : 
    2052    16550100 :         context_ptr->three_quad_energy = 0;
    2053    16550100 :         uint32_t txb_itr = 0;
    2054    16550100 :         uint16_t txb_count = context_ptr->blk_geom->txb_count[tx_depth];
    2055    33100600 :         for (txb_itr = 0; txb_itr < txb_count; txb_itr++)
    2056             :         {
    2057    16549600 :             uint8_t txb_origin_x = (uint8_t)context_ptr->blk_geom->tx_org_x[tx_depth][txb_itr];
    2058    16549600 :             uint8_t txb_origin_y = (uint8_t)context_ptr->blk_geom->tx_org_y[tx_depth][txb_itr];
    2059    16549600 :             tu_origin_index = txb_origin_x + (txb_origin_y * candidate_buffer->residual_ptr->stride_y);
    2060    16549600 :             y_tu_coeff_bits = 0;
    2061             : 
    2062    16549600 :             candidate_buffer->candidate_ptr->transform_type[txb_itr] = tx_type;
    2063             : 
    2064    16549600 :             context_ptr->luma_txb_skip_context = 0;
    2065    16549600 :             context_ptr->luma_dc_sign_context = 0;
    2066    16549600 :             get_txb_ctx(
    2067             :                 sequence_control_set_ptr,
    2068             :                 COMPONENT_LUMA,
    2069             : #if ENHANCE_ATB
    2070             :                 context_ptr->luma_dc_sign_level_coeff_neighbor_array,
    2071             : #else
    2072             :                 picture_control_set_ptr->ep_luma_dc_sign_level_coeff_neighbor_array,
    2073             : #endif
    2074    16549600 :                 context_ptr->sb_origin_x + txb_origin_x,
    2075    16549600 :                 context_ptr->sb_origin_y + txb_origin_y,
    2076             :                 //txb_origin_x,// context_ptr->cu_origin_x,
    2077             :                 //txb_origin_y,// context_ptr->cu_origin_y,
    2078    16549600 :                 context_ptr->blk_geom->bsize,
    2079    16549600 :                 context_ptr->blk_geom->txsize[tx_depth][txb_itr], //[0][0],
    2080             :                 &context_ptr->luma_txb_skip_context,
    2081             :                 &context_ptr->luma_dc_sign_context);
    2082             : 
    2083             :             // Y: T Q iQ
    2084    16548200 :             av1_estimate_transform(
    2085    16548200 :                 &(((int16_t*)candidate_buffer->residual_ptr->buffer_y)[tu_origin_index]),
    2086    16548200 :                 candidate_buffer->residual_ptr->stride_y,
    2087    16548200 :                 &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[tu_origin_index]),
    2088             :                 NOT_USED_VALUE,
    2089    16548200 :                 context_ptr->blk_geom->txsize[tx_depth][txb_itr],
    2090             :                 &context_ptr->three_quad_energy,
    2091             :                 context_ptr->transform_inner_array_ptr,
    2092    16548200 :                 context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
    2093             :                 tx_type,
    2094             :                 PLANE_TYPE_Y,
    2095    16548200 :                 context_ptr->pf_md_mode);
    2096             : 
    2097    33100900 :             int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
    2098    16550500 :                              picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
    2099             : 
    2100    16550500 :             av1_quantize_inv_quantize(
    2101             :                 picture_control_set_ptr,
    2102             :                 context_ptr,
    2103    16550500 :                 &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[tu_origin_index]),
    2104             :                 NOT_USED_VALUE,
    2105    16550500 :                 &(((int32_t*)candidate_buffer->residual_quant_coeff_ptr->buffer_y)[tu_origin_index]),
    2106    16550500 :                 &(((int32_t*)candidate_buffer->recon_coeff_ptr->buffer_y)[tu_origin_index]),
    2107    16550500 :                 context_ptr->cu_ptr->qp,
    2108             :                 seg_qp,
    2109    16550500 :                 context_ptr->blk_geom->tx_width[tx_depth][txb_itr],
    2110    16550500 :                 context_ptr->blk_geom->tx_height[tx_depth][txb_itr],
    2111    16550500 :                 context_ptr->blk_geom->txsize[tx_depth][txb_itr],
    2112    16550500 :                 &candidate_buffer->candidate_ptr->eob[0][txb_itr],
    2113             :                 &yCountNonZeroCoeffsTemp,
    2114             :                 COMPONENT_LUMA,
    2115    16550500 :                 context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
    2116             :                 tx_type,
    2117             :                 candidate_buffer,
    2118    16550500 :                 context_ptr->luma_txb_skip_context,
    2119    16550500 :                 context_ptr->luma_dc_sign_context,
    2120    16550500 :                 candidate_buffer->candidate_ptr->pred_mode,
    2121    16550500 :                 candidate_buffer->candidate_ptr->use_intrabc,
    2122             :                 EB_FALSE);
    2123             : 
    2124             :             //tx_type not equal to DCT_DCT and no coeff is not an acceptable option in AV1.
    2125    16551400 :             if (yCountNonZeroCoeffsTemp == 0 && tx_type != DCT_DCT)
    2126    12273500 :                 continue;
    2127             : 
    2128             : 
    2129     4277980 :             if (context_ptr->spatial_sse_full_loop) {
    2130     4280230 :                 if (yCountNonZeroCoeffsTemp)
    2131     2917180 :                     inv_transform_recon_wrapper(
    2132     2917180 :                         candidate_buffer->prediction_ptr->buffer_y,
    2133             :                         tu_origin_index,
    2134     2917180 :                         candidate_buffer->prediction_ptr->stride_y,
    2135     2917180 :                         candidate_buffer->recon_ptr->buffer_y,
    2136             :                         tu_origin_index,
    2137     2917180 :                         candidate_buffer->recon_ptr->stride_y,
    2138     2917180 :                         (int32_t*) candidate_buffer->recon_coeff_ptr->buffer_y,
    2139             :                         tu_origin_index,
    2140     2917180 :                         context_ptr->hbd_mode_decision,
    2141     2917180 :                         context_ptr->blk_geom->txsize[tx_depth][txb_itr],
    2142             :                         tx_type,
    2143             :                         PLANE_TYPE_Y,
    2144     2917180 :                         (uint16_t)candidate_buffer->candidate_ptr->eob[0][txb_itr]);
    2145             :                 else
    2146     1363050 :                     picture_copy(
    2147             :                         candidate_buffer->prediction_ptr,
    2148             :                         tu_origin_index,
    2149             :                         0,
    2150             :                         candidate_buffer->recon_ptr,
    2151             :                         tu_origin_index,
    2152             :                         0,
    2153     1363050 :                         context_ptr->blk_geom->tx_width[tx_depth][txb_itr],
    2154     1363050 :                         context_ptr->blk_geom->tx_height[tx_depth][txb_itr],
    2155             :                         0,
    2156             :                         0,
    2157             :                         PICTURE_BUFFER_DESC_Y_FLAG,
    2158     1363050 :                         context_ptr->hbd_mode_decision);
    2159             : 
    2160     8560300 :                 EbPictureBufferDesc *input_picture_ptr = context_ptr->hbd_mode_decision ?
    2161     4280150 :                     picture_control_set_ptr->input_frame16bit : picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
    2162     4280150 :                 uint32_t input_tu_origin_index = (context_ptr->sb_origin_x + txb_origin_x + input_picture_ptr->origin_x) + ((context_ptr->sb_origin_y + txb_origin_y + input_picture_ptr->origin_y) * input_picture_ptr->stride_y);
    2163             : 
    2164     8560300 :                 EbSpatialFullDistType spatial_full_dist_type_fun = context_ptr->hbd_mode_decision ?
    2165     4280150 :                     full_distortion_kernel16_bits : spatial_full_distortion_kernel;
    2166             : 
    2167     8560210 :                 tuFullDistortion[0][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun(
    2168             :                     input_picture_ptr->buffer_y,
    2169             :                     input_tu_origin_index,
    2170     4280150 :                     input_picture_ptr->stride_y,
    2171     4280150 :                     candidate_buffer->prediction_ptr->buffer_y,
    2172             :                     tu_origin_index,
    2173     4280150 :                     candidate_buffer->prediction_ptr->stride_y,
    2174     4280150 :                     context_ptr->blk_geom->tx_width[tx_depth][txb_itr],
    2175     4280150 :                     context_ptr->blk_geom->tx_height[tx_depth][txb_itr]);
    2176             : 
    2177     8560040 :                 tuFullDistortion[0][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun(
    2178             :                     input_picture_ptr->buffer_y,
    2179             :                     input_tu_origin_index,
    2180     4280060 :                     input_picture_ptr->stride_y,
    2181     4280060 :                     candidate_buffer->recon_ptr->buffer_y,
    2182             :                     tu_origin_index,
    2183     4280060 :                     candidate_buffer->recon_ptr->stride_y,
    2184     4280060 :                     context_ptr->blk_geom->tx_width[tx_depth][txb_itr],
    2185     4280060 :                     context_ptr->blk_geom->tx_height[tx_depth][txb_itr]);
    2186             : 
    2187     4279980 :                 tuFullDistortion[0][DIST_CALC_PREDICTION] <<= 4;
    2188     4279980 :                 tuFullDistortion[0][DIST_CALC_RESIDUAL] <<= 4;
    2189             :             }
    2190             :             else {
    2191             :                 // LUMA DISTORTION
    2192           0 :                 picture_full_distortion32_bits(
    2193           0 :                     context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr,
    2194             :                     tu_origin_index,
    2195             :                     0,
    2196             :                     candidate_buffer->recon_coeff_ptr,
    2197             :                     tu_origin_index,
    2198             :                     0,
    2199           0 :                     context_ptr->blk_geom->bwidth,
    2200           0 :                     context_ptr->blk_geom->bheight,
    2201           0 :                     context_ptr->blk_geom->bwidth_uv,
    2202           0 :                     context_ptr->blk_geom->bheight_uv,
    2203             :                     tuFullDistortion[0],
    2204             :                     tuFullDistortion[0],
    2205             :                     tuFullDistortion[0],
    2206             :                     yCountNonZeroCoeffsTemp,
    2207             :                     0,
    2208             :                     0,
    2209             :                     COMPONENT_LUMA);
    2210             : 
    2211           0 :                 tuFullDistortion[0][DIST_CALC_RESIDUAL] += context_ptr->three_quad_energy;
    2212           0 :                 tuFullDistortion[0][DIST_CALC_PREDICTION] += context_ptr->three_quad_energy;
    2213             : 
    2214           0 :                 int32_t shift = (MAX_TX_SCALE - av1_get_tx_scale(txSize)) * 2;
    2215           0 :                 tuFullDistortion[0][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(tuFullDistortion[0][DIST_CALC_RESIDUAL], shift);
    2216           0 :                 tuFullDistortion[0][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(tuFullDistortion[0][DIST_CALC_PREDICTION], shift);
    2217             :             }
    2218             :             //LUMA-ONLY
    2219     4279980 :             av1_tu_estimate_coeff_bits(
    2220             :                 context_ptr,
    2221             :                 0,//allow_update_cdf,
    2222             :                 NULL,//FRAME_CONTEXT *ec_ctx,
    2223             :                 picture_control_set_ptr,
    2224             :                 candidate_buffer,
    2225             :                 tu_origin_index,
    2226             :                 0,
    2227             :                 context_ptr->coeff_est_entropy_coder_ptr,
    2228             :                 candidate_buffer->residual_quant_coeff_ptr,
    2229             :                 yCountNonZeroCoeffsTemp,
    2230             :                 0,
    2231             :                 0,
    2232             :                 &y_tu_coeff_bits,
    2233             :                 &y_tu_coeff_bits,
    2234             :                 &y_tu_coeff_bits,
    2235     4279980 :                 context_ptr->blk_geom->txsize[tx_depth][txb_itr],
    2236     4279980 :                 context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
    2237     4279980 :                 candidate_buffer->candidate_ptr->transform_type[txb_itr],
    2238     4279980 :                 candidate_buffer->candidate_ptr->transform_type_uv,
    2239             :                 COMPONENT_LUMA);
    2240             : 
    2241     4279860 :             av1_tu_calc_cost_luma(
    2242     4279860 :                 context_ptr->luma_txb_skip_context,
    2243             :                 candidate_buffer->candidate_ptr,
    2244             :                 txb_itr,
    2245     4279860 :                 context_ptr->blk_geom->txsize[tx_depth][txb_itr],
    2246             :                 yCountNonZeroCoeffsTemp,
    2247             :                 tuFullDistortion[0],
    2248             :                 &y_tu_coeff_bits,
    2249             :                 &y_full_cost,
    2250     4279860 :                 context_ptr->full_lambda);
    2251             :         }
    2252             : 
    2253    16551000 :         if (y_full_cost < best_full_cost) {
    2254     2354670 :             best_full_cost = y_full_cost;
    2255     2354670 :             best_tx_type = tx_type;
    2256             :         }
    2257             : 
    2258             :         //if (cpi->sf.adaptive_txb_search_level) {
    2259             :         //    if ((best_rd - (best_rd >> cpi->sf.adaptive_txb_search_level)) >
    2260             :         //        ref_best_rd) {
    2261             :         //        break;
    2262             :         //    }
    2263             :         //}
    2264             :         //// Skip transform type search when we found the block has been quantized to
    2265             :         //// all zero and at the same time, it has better rdcost than doing transform.
    2266             :         //if (cpi->sf.tx_type_search.skip_tx_search && !best_eob) break;
    2267             :     }
    2268             :     // this kernel assumes no atb
    2269     1761850 :     candidate_buffer->candidate_ptr->transform_type[0] = best_tx_type;
    2270             :     // For Inter blocks, transform type of chroma follows luma transfrom type
    2271     1761850 :     if (is_inter)
    2272     1421380 :         candidate_buffer->candidate_ptr->transform_type_uv = candidate_buffer->candidate_ptr->transform_type[0];
    2273     1761850 : }
    2274             : 
    2275       16776 : void encode_pass_tx_search(
    2276             :     PictureControlSet            *picture_control_set_ptr,
    2277             :     EncDecContext                *context_ptr,
    2278             :     LargestCodingUnit            *sb_ptr,
    2279             :     uint32_t                      cb_qp,
    2280             :     EbPictureBufferDesc          *coeffSamplesTB,
    2281             :     EbPictureBufferDesc          *residual16bit,
    2282             :     EbPictureBufferDesc          *transform16bit,
    2283             :     EbPictureBufferDesc          *inverse_quant_buffer,
    2284             :     int16_t                      *transformScratchBuffer,
    2285             :     uint32_t                     *count_non_zero_coeffs,
    2286             :     uint32_t                     component_mask,
    2287             :     uint32_t                     dZoffset,
    2288             :     uint16_t                     *eob,
    2289             :     MacroblockPlane              *candidate_plane){
    2290             :     (void)dZoffset;
    2291             :     (void)cb_qp;
    2292             :     (void)candidate_plane;
    2293             :     UNUSED(count_non_zero_coeffs);
    2294             :     UNUSED(component_mask);
    2295             : 
    2296       16776 :     CodingUnit          *cu_ptr = context_ptr->cu_ptr;
    2297       16776 :     TransformUnit       *txb_ptr = &cu_ptr->transform_unit_array[context_ptr->txb_itr];
    2298       16776 :     uint32_t               qp = cu_ptr->qp;
    2299       16776 :     const uint32_t         coeff1dOffset = context_ptr->coded_area_sb;
    2300             : 
    2301             :     uint64_t               y_tu_coeff_bits;
    2302             :     EB_ALIGN(16) uint64_t  tuFullDistortion[3][DIST_CALC_TOTAL];
    2303       16776 :     const int32_t          is_inter = context_ptr->is_inter;
    2304       16776 :     uint64_t               best_full_cost = UINT64_MAX;
    2305             :     uint64_t               y_full_cost;
    2306             :     uint32_t               yCountNonZeroCoeffsTemp;
    2307       16776 :     TxType                 txk_start = DCT_DCT;
    2308       16776 :     TxType                 txk_end = TX_TYPES;
    2309             :     TxType                 tx_type;
    2310       16776 :     TxSize                 txSize = context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr];
    2311       16776 :     const uint32_t         scratch_luma_offset = context_ptr->blk_geom->tx_org_x[cu_ptr->tx_depth][context_ptr->txb_itr] + context_ptr->blk_geom->tx_org_y[cu_ptr->tx_depth][context_ptr->txb_itr] * SB_STRIDE_Y;
    2312       16776 :     assert(txSize < TX_SIZES_ALL);
    2313             :     const TxSetType        tx_set_type =
    2314       16776 :         get_ext_tx_set_type(txSize, is_inter, picture_control_set_ptr->parent_pcs_ptr->frm_hdr.reduced_tx_set);
    2315             : 
    2316       16770 :     TxType best_tx_type = DCT_DCT;
    2317       16770 :     if (picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set == 2)
    2318           0 :         txk_end = 2;
    2319      284846 :     for (int32_t tx_type_index = txk_start; tx_type_index < txk_end; ++tx_type_index) {
    2320      268063 :         if (picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set == 2)
    2321           0 :             tx_type_index = (tx_type_index  == 1) ? IDTX : tx_type_index;
    2322      268063 :         tx_type = (TxType)tx_type_index;
    2323             : 
    2324      268063 :         if(picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set)
    2325           0 :             if (!allowed_tx_set_a[txSize][tx_type]) continue;
    2326             : 
    2327      268063 :         const int32_t eset = get_ext_tx_set(txSize, is_inter, picture_control_set_ptr->parent_pcs_ptr->frm_hdr.reduced_tx_set);
    2328             :         // eset == 0 should correspond to a set with only DCT_DCT and there
    2329             :         // is no need to send the tx_type
    2330      268042 :         if (eset <= 0) continue;
    2331      233563 :         if (av1_ext_tx_used[tx_set_type][tx_type] == 0) continue;
    2332             : 
    2333      162348 :         context_ptr->three_quad_energy = 0;
    2334             : 
    2335      162348 :         y_tu_coeff_bits = 0;
    2336             : 
    2337      162348 :         av1_estimate_transform(
    2338      162348 :             ((int16_t*)residual16bit->buffer_y) + scratch_luma_offset,
    2339      162348 :             residual16bit->stride_y,
    2340      162348 :             ((TranLow*)transform16bit->buffer_y) + coeff1dOffset,
    2341             :             NOT_USED_VALUE,
    2342      162348 :             context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    2343             :             &context_ptr->three_quad_energy,
    2344             :             transformScratchBuffer,
    2345             :             BIT_INCREMENT_8BIT,
    2346             :             tx_type,
    2347             :             PLANE_TYPE_Y,
    2348             :             DEFAULT_SHAPE);
    2349      325024 :         int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
    2350      162512 :                          picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
    2351             : 
    2352             : 
    2353      162512 :         av1_quantize_inv_quantize(
    2354      162512 :             sb_ptr->picture_control_set_ptr,
    2355             :             context_ptr->md_context,
    2356      162512 :             ((TranLow*)transform16bit->buffer_y) + coeff1dOffset,
    2357             :             NOT_USED_VALUE,
    2358      162512 :             ((int32_t*)coeffSamplesTB->buffer_y) + coeff1dOffset,
    2359      162512 :             ((int32_t*)inverse_quant_buffer->buffer_y) + coeff1dOffset,
    2360             :             qp,
    2361             :             seg_qp,
    2362      162512 :             context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr],
    2363      162512 :             context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr],
    2364      162512 :             context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    2365             :             &eob[0],
    2366             :             &yCountNonZeroCoeffsTemp,
    2367             :             COMPONENT_LUMA,
    2368             :             BIT_INCREMENT_8BIT,
    2369             :             tx_type,
    2370      162512 :             &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
    2371             :             0,
    2372             :             0,
    2373             :             0,
    2374      162512 :             cu_ptr->av1xd->use_intrabc,
    2375             :             EB_FALSE);
    2376             : 
    2377             : 
    2378             :         //tx_type not equal to DCT_DCT and no coeff is not an acceptable option in AV1.
    2379      162565 :         if (yCountNonZeroCoeffsTemp == 0 && tx_type != DCT_DCT)
    2380       90763 :             continue;
    2381             :         // LUMA DISTORTION
    2382       71802 :         picture_full_distortion32_bits(
    2383             :             transform16bit,
    2384             :             coeff1dOffset,
    2385             :             0,
    2386             :             inverse_quant_buffer,
    2387             :             coeff1dOffset,
    2388             :             0,
    2389       71802 :             context_ptr->blk_geom->bwidth,
    2390       71802 :             context_ptr->blk_geom->bheight,
    2391       71802 :             context_ptr->blk_geom->bwidth_uv,
    2392       71802 :             context_ptr->blk_geom->bheight_uv,
    2393             :             tuFullDistortion[0],
    2394             :             tuFullDistortion[0],
    2395             :             tuFullDistortion[0],
    2396             :             yCountNonZeroCoeffsTemp,
    2397             :             0,
    2398             :             0,
    2399             :             COMPONENT_LUMA);
    2400             : 
    2401       71902 :         tuFullDistortion[0][DIST_CALC_RESIDUAL] += context_ptr->three_quad_energy;
    2402       71902 :         tuFullDistortion[0][DIST_CALC_PREDICTION] += context_ptr->three_quad_energy;
    2403             : 
    2404       71902 :         int32_t shift = (MAX_TX_SCALE - av1_get_tx_scale(txSize)) * 2;
    2405       71897 :         tuFullDistortion[0][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(tuFullDistortion[0][DIST_CALC_RESIDUAL], shift);
    2406       71897 :         tuFullDistortion[0][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(tuFullDistortion[0][DIST_CALC_PREDICTION], shift);
    2407       71897 :         txb_ptr->transform_type[PLANE_TYPE_Y] = tx_type;
    2408             : 
    2409             :         //LUMA-ONLY
    2410             : 
    2411       71897 :         ModeDecisionCandidateBuffer         **candidate_buffer_ptr_array_base = context_ptr->md_context->candidate_buffer_ptr_array;
    2412       71897 :         ModeDecisionCandidateBuffer         **candidate_buffer_ptr_array = &(candidate_buffer_ptr_array_base[0]);
    2413             :         ModeDecisionCandidateBuffer          *candidate_buffer;
    2414             : 
    2415             :         // Set the Candidate Buffer
    2416       71897 :         candidate_buffer = candidate_buffer_ptr_array[0];
    2417             :         // Rate estimation function uses the values from CandidatePtr. The right values are copied from cu_ptr to CandidatePtr
    2418       71897 :         EntropyCoder  *coeff_est_entropy_coder_ptr = picture_control_set_ptr->coeff_est_entropy_coder_ptr;
    2419       71897 :         candidate_buffer->candidate_ptr->type = cu_ptr->prediction_mode_flag;
    2420       71897 :         candidate_buffer->candidate_ptr->pred_mode = cu_ptr->pred_mode;
    2421             : #if FILTER_INTRA_FLAG
    2422       71897 :         candidate_buffer->candidate_ptr->filter_intra_mode = cu_ptr->filter_intra_mode;
    2423             : #endif
    2424       71897 :         const uint32_t coeff1dOffset = context_ptr->coded_area_sb;
    2425             : 
    2426       71897 :         av1_tu_estimate_coeff_bits(
    2427       71897 :             context_ptr->md_context,
    2428             :             0,//allow_update_cdf,
    2429             :             NULL,//FRAME_CONTEXT *ec_ctx,
    2430             :             picture_control_set_ptr,
    2431             :             candidate_buffer,
    2432             :             coeff1dOffset,
    2433             :             0,
    2434             :             coeff_est_entropy_coder_ptr,
    2435             :             coeffSamplesTB,
    2436             :             yCountNonZeroCoeffsTemp,
    2437             :             0,
    2438             :             0,
    2439             :             &y_tu_coeff_bits,
    2440             :             &y_tu_coeff_bits,
    2441             :             &y_tu_coeff_bits,
    2442       71897 :             context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    2443       71897 :             context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    2444       71897 :             cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_Y],
    2445       71897 :             cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_UV],
    2446             :             COMPONENT_LUMA);
    2447             : 
    2448       71802 :         av1_tu_calc_cost_luma(
    2449       71802 :             context_ptr->md_context->luma_txb_skip_context,
    2450             :             candidate_buffer->candidate_ptr,
    2451       71802 :             context_ptr->txb_itr,
    2452       71802 :             context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    2453             :             yCountNonZeroCoeffsTemp,
    2454             :             tuFullDistortion[0],
    2455             :             &y_tu_coeff_bits,
    2456             :             &y_full_cost,
    2457       71802 :             context_ptr->full_lambda);
    2458             : 
    2459       71619 :         if (y_full_cost < best_full_cost) {
    2460       24088 :             best_full_cost = y_full_cost;
    2461       24088 :             best_tx_type = tx_type;
    2462             :         }
    2463             :     }
    2464             : 
    2465       16783 :     txb_ptr->transform_type[PLANE_TYPE_Y] = best_tx_type;
    2466             : 
    2467             :     // For Inter blocks, transform type of chroma follows luma transfrom type
    2468       16783 :     if (is_inter)
    2469       14050 :         txb_ptr->transform_type[PLANE_TYPE_UV] = txb_ptr->transform_type[PLANE_TYPE_Y];
    2470       16783 : }
    2471             : 
    2472           0 : void encode_pass_tx_search_hbd(
    2473             :     PictureControlSet            *picture_control_set_ptr,
    2474             :     EncDecContext                *context_ptr,
    2475             :     LargestCodingUnit            *sb_ptr,
    2476             :     uint32_t                       cb_qp,
    2477             :     EbPictureBufferDesc          *coeffSamplesTB,
    2478             :     EbPictureBufferDesc          *residual16bit,
    2479             :     EbPictureBufferDesc          *transform16bit,
    2480             :     EbPictureBufferDesc          *inverse_quant_buffer,
    2481             :     int16_t                        *transformScratchBuffer,
    2482             :     uint32_t                       *count_non_zero_coeffs,
    2483             :     uint32_t                       component_mask,
    2484             :     uint32_t                       dZoffset,
    2485             :     uint16_t                       *eob,
    2486             :     MacroblockPlane                *candidate_plane){
    2487             :     (void)dZoffset;
    2488             :     (void)cb_qp;
    2489             :     (void)candidate_plane;
    2490             :     UNUSED(component_mask);
    2491             :     UNUSED(count_non_zero_coeffs);
    2492             : 
    2493           0 :     CodingUnit    *cu_ptr               = context_ptr->cu_ptr;
    2494           0 :     TransformUnit *txb_ptr              = &cu_ptr->transform_unit_array[context_ptr->txb_itr];
    2495           0 :     uint32_t         qp                   = cu_ptr->qp;
    2496           0 :     const uint32_t   scratch_luma_offset    = context_ptr->blk_geom->origin_x + context_ptr->blk_geom->origin_y * SB_STRIDE_Y;
    2497           0 :     const uint32_t   coeff1dOffset        = context_ptr->coded_area_sb;
    2498             : 
    2499             :     uint64_t                    y_tu_coeff_bits;
    2500             :     uint64_t                    tuFullDistortion[3][DIST_CALC_TOTAL];
    2501           0 :     const int32_t               is_inter = context_ptr->is_inter;
    2502           0 :     uint64_t                    best_full_cost = UINT64_MAX;
    2503             :     uint64_t                    y_full_cost;
    2504             :     uint32_t                    yCountNonZeroCoeffsTemp;
    2505           0 :     TxType                      txk_start = DCT_DCT;
    2506           0 :     TxType                      txk_end = TX_TYPES;
    2507             :     TxType                      tx_type;
    2508           0 :     TxSize                      txSize = context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr];
    2509           0 :     assert(txSize < TX_SIZES_ALL);
    2510             :     const TxSetType             tx_set_type =
    2511           0 :         get_ext_tx_set_type(txSize, is_inter, picture_control_set_ptr->parent_pcs_ptr->frm_hdr.reduced_tx_set);
    2512             : 
    2513           0 :     TxType best_tx_type = DCT_DCT;
    2514             : 
    2515           0 :     for (int32_t tx_type_index = txk_start; tx_type_index < txk_end; ++tx_type_index) {
    2516           0 :         tx_type = (TxType)tx_type_index;
    2517             :         ////if (!allowed_tx_mask[tx_type]) continue;
    2518           0 :         if (picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set)
    2519           0 :             if (!allowed_tx_set_a[txSize][tx_type]) continue;
    2520             : 
    2521           0 :         const int32_t eset = get_ext_tx_set(txSize, is_inter, picture_control_set_ptr->parent_pcs_ptr->frm_hdr.reduced_tx_set);
    2522             :         // eset == 0 should correspond to a set with only DCT_DCT and there
    2523             :         // is no need to send the tx_type
    2524           0 :         if (eset <= 0) continue;
    2525           0 :         if (av1_ext_tx_used[tx_set_type][tx_type] == 0) continue;
    2526             : 
    2527           0 :         context_ptr->three_quad_energy = 0;
    2528             : 
    2529           0 :         y_tu_coeff_bits = 0;
    2530             : 
    2531           0 :         av1_estimate_transform(
    2532           0 :             ((int16_t*)residual16bit->buffer_y) + scratch_luma_offset,
    2533           0 :             residual16bit->stride_y,
    2534           0 :             ((TranLow*)transform16bit->buffer_y) + coeff1dOffset,
    2535             :             NOT_USED_VALUE,
    2536           0 :             context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    2537             :             &context_ptr->three_quad_energy,
    2538             :             transformScratchBuffer,
    2539             :             BIT_INCREMENT_10BIT,
    2540             :             tx_type,
    2541             :             PLANE_TYPE_Y,
    2542             :             DEFAULT_SHAPE);
    2543           0 :         int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
    2544           0 :                          picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
    2545             : 
    2546           0 :         av1_quantize_inv_quantize(
    2547           0 :             sb_ptr->picture_control_set_ptr,
    2548             :             context_ptr->md_context,
    2549           0 :             ((int32_t*)transform16bit->buffer_y) + coeff1dOffset,
    2550             :             NOT_USED_VALUE,
    2551           0 :             ((int32_t*)coeffSamplesTB->buffer_y) + coeff1dOffset,
    2552           0 :             ((int32_t*)inverse_quant_buffer->buffer_y) + coeff1dOffset,
    2553             :             qp,
    2554             :             seg_qp,
    2555           0 :             context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr],
    2556           0 :             context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr],
    2557           0 :             context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    2558             :             &eob[0],
    2559             :             &yCountNonZeroCoeffsTemp,
    2560             :             COMPONENT_LUMA,
    2561             :             BIT_INCREMENT_10BIT,
    2562             :             tx_type,
    2563           0 :             &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
    2564             :             0,
    2565             :             0,
    2566             :             0,
    2567           0 :             cu_ptr->av1xd->use_intrabc,
    2568             :             EB_FALSE);
    2569             : 
    2570             : 
    2571             :         //tx_type not equal to DCT_DCT and no coeff is not an acceptable option in AV1.
    2572           0 :         if (yCountNonZeroCoeffsTemp == 0 && tx_type != DCT_DCT)
    2573           0 :             continue;
    2574             :         // LUMA DISTORTION
    2575           0 :         picture_full_distortion32_bits(
    2576             :             transform16bit,
    2577             :             coeff1dOffset,
    2578             :             0,
    2579             :             inverse_quant_buffer,
    2580             :             coeff1dOffset,
    2581             :             0,
    2582           0 :             context_ptr->blk_geom->bwidth,
    2583           0 :             context_ptr->blk_geom->bheight,
    2584           0 :             context_ptr->blk_geom->bwidth_uv,
    2585           0 :             context_ptr->blk_geom->bheight_uv,
    2586             :             tuFullDistortion[0],
    2587             :             tuFullDistortion[0],
    2588             :             tuFullDistortion[0],
    2589             :             yCountNonZeroCoeffsTemp,
    2590             :             0,
    2591             :             0,
    2592             :             COMPONENT_LUMA);
    2593             : 
    2594           0 :         tuFullDistortion[0][DIST_CALC_RESIDUAL] += context_ptr->three_quad_energy;
    2595           0 :         tuFullDistortion[0][DIST_CALC_PREDICTION] += context_ptr->three_quad_energy;
    2596             : 
    2597           0 :         int32_t shift = (MAX_TX_SCALE - av1_get_tx_scale(txSize)) * 2;
    2598           0 :         tuFullDistortion[0][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(tuFullDistortion[0][DIST_CALC_RESIDUAL], shift);
    2599           0 :         tuFullDistortion[0][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(tuFullDistortion[0][DIST_CALC_PREDICTION], shift);
    2600           0 :         txb_ptr->transform_type[PLANE_TYPE_Y] = tx_type;
    2601             : 
    2602             :         //LUMA-ONLY
    2603             : 
    2604           0 :         ModeDecisionCandidateBuffer         **candidate_buffer_ptr_array_base = context_ptr->md_context->candidate_buffer_ptr_array;
    2605           0 :         ModeDecisionCandidateBuffer         **candidate_buffer_ptr_array = &(candidate_buffer_ptr_array_base[0]);
    2606             :         ModeDecisionCandidateBuffer          *candidate_buffer;
    2607             : 
    2608             :         // Set the Candidate Buffer
    2609           0 :         candidate_buffer = candidate_buffer_ptr_array[0];
    2610             :         // Rate estimation function uses the values from CandidatePtr. The right values are copied from cu_ptr to CandidatePtr
    2611           0 :         EntropyCoder  *coeff_est_entropy_coder_ptr = picture_control_set_ptr->coeff_est_entropy_coder_ptr;
    2612           0 :         candidate_buffer->candidate_ptr->type = cu_ptr->prediction_mode_flag;
    2613           0 :         candidate_buffer->candidate_ptr->pred_mode = cu_ptr->pred_mode;
    2614             : #if FILTER_INTRA_FLAG
    2615           0 :         candidate_buffer->candidate_ptr->filter_intra_mode = cu_ptr->filter_intra_mode;
    2616             : #endif
    2617           0 :         const uint32_t coeff1dOffset = context_ptr->coded_area_sb;
    2618             : 
    2619           0 :         av1_tu_estimate_coeff_bits(
    2620           0 :             context_ptr->md_context,
    2621             :             0,//allow_update_cdf,
    2622             :             NULL,//FRAME_CONTEXT *ec_ctx,
    2623             :             picture_control_set_ptr,
    2624             :             candidate_buffer,
    2625             :             coeff1dOffset,
    2626             :             0,
    2627             :             coeff_est_entropy_coder_ptr,
    2628             :             coeffSamplesTB,
    2629             :             yCountNonZeroCoeffsTemp,
    2630             :             0,
    2631             :             0,
    2632             :             &y_tu_coeff_bits,
    2633             :             &y_tu_coeff_bits,
    2634             :             &y_tu_coeff_bits,
    2635           0 :             context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    2636           0 :             context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    2637           0 :             cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_Y],
    2638           0 :             cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_UV],
    2639             :             COMPONENT_LUMA);
    2640             : 
    2641           0 :         av1_tu_calc_cost_luma(
    2642           0 :             context_ptr->md_context->luma_txb_skip_context,
    2643             :             candidate_buffer->candidate_ptr,
    2644           0 :             context_ptr->txb_itr,
    2645           0 :             context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    2646             :             yCountNonZeroCoeffsTemp,
    2647             :             tuFullDistortion[0],
    2648             :             &y_tu_coeff_bits,
    2649             :             &y_full_cost,
    2650           0 :             context_ptr->full_lambda);
    2651             : 
    2652           0 :         if (y_full_cost < best_full_cost) {
    2653           0 :             best_full_cost = y_full_cost;
    2654           0 :             best_tx_type = tx_type;
    2655             :         }
    2656             :     }
    2657             : 
    2658           0 :     txb_ptr->transform_type[PLANE_TYPE_Y] = best_tx_type;
    2659             : 
    2660             :     // For Inter blocks, transform type of chroma follows luma transfrom type
    2661           0 :     if (is_inter)
    2662           0 :         txb_ptr->transform_type[PLANE_TYPE_UV] = txb_ptr->transform_type[PLANE_TYPE_Y];
    2663           0 : }
    2664             : 
    2665    44058200 : void inv_transform_recon_wrapper(
    2666             :     uint8_t    *pred_buffer,
    2667             :     uint32_t    pred_offset,
    2668             :     uint32_t    pred_stride,
    2669             :     uint8_t    *rec_buffer,
    2670             :     uint32_t    rec_offset,
    2671             :     uint32_t    rec_stride,
    2672             :     int32_t    *rec_coeff_buffer,
    2673             :     uint32_t    coeff_offset,
    2674             :     EbBool      hbd,
    2675             :     TxSize      txsize,
    2676             :     TxType      transform_type,
    2677             :     PlaneType   component_type,
    2678             :     uint32_t    eob)
    2679             : {
    2680    44058200 :     if (hbd) {
    2681           0 :         av1_inv_transform_recon(
    2682           0 :             rec_coeff_buffer + coeff_offset,
    2683           0 :             CONVERT_TO_BYTEPTR(((uint16_t*)pred_buffer) + pred_offset), pred_stride,
    2684           0 :             CONVERT_TO_BYTEPTR(((uint16_t*)rec_buffer) + rec_offset), rec_stride,
    2685             :             txsize,
    2686             :             BIT_INCREMENT_10BIT,
    2687             :             transform_type,
    2688             :             component_type,
    2689             :             eob, 0 /*lossless*/);
    2690             :     } else {
    2691    44058200 :         av1_inv_transform_recon8bit(
    2692    44058200 :             rec_coeff_buffer + coeff_offset,
    2693             :             pred_buffer + pred_offset, pred_stride,
    2694             :             rec_buffer + rec_offset, rec_stride,
    2695             :             txsize,
    2696             :             transform_type,
    2697             :             component_type,
    2698             :             eob, 0 /*lossless*/);
    2699             :     }
    2700    44052400 : }
    2701             : 
    2702             : /****************************************
    2703             :  ************  Full loop ****************
    2704             : ****************************************/
    2705    17201000 : void full_loop_r(
    2706             :     LargestCodingUnit            *sb_ptr,
    2707             :     ModeDecisionCandidateBuffer  *candidate_buffer,
    2708             :     ModeDecisionContext          *context_ptr,
    2709             :     EbPictureBufferDesc          *input_picture_ptr,
    2710             :     PictureControlSet            *picture_control_set_ptr,
    2711             :     uint32_t                          component_mask,
    2712             :     uint32_t                          cb_qp,
    2713             :     uint32_t                          cr_qp,
    2714             :     uint32_t                          *cb_count_non_zero_coeffs,
    2715             :     uint32_t                          *cr_count_non_zero_coeffs)
    2716             : {
    2717             :     (void)sb_ptr;
    2718             :     (void)cr_qp;
    2719             :     (void)input_picture_ptr;
    2720             :     int16_t                *chromaResidualPtr;
    2721             :     uint32_t                 tu_origin_index;
    2722             :     UNUSED(tu_origin_index);
    2723             :     uint32_t                 tuCbOriginIndex;
    2724             :     uint32_t                 tuCrOriginIndex;
    2725             :     uint32_t                 tuCount;
    2726             :     uint32_t                 txb_itr;
    2727             :     uint32_t                 txb_origin_x;
    2728             :     uint32_t                 txb_origin_y;
    2729             : 
    2730    17201000 :     SequenceControlSet    *sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
    2731             : 
    2732    17201000 :     context_ptr->three_quad_energy = 0;
    2733             : 
    2734    17201000 :     uint8_t tx_depth = candidate_buffer->candidate_ptr->tx_depth;
    2735    17201000 :     tuCount = context_ptr->blk_geom->txb_count[candidate_buffer->candidate_ptr->tx_depth];
    2736    17201000 :     uint32_t  txb_1d_offset = 0;
    2737    17201000 :     tuCount = tx_depth ? 1 : tuCount; //NM: 128x128 exeption
    2738             : 
    2739    17201000 :     txb_itr = 0;
    2740             :     do {
    2741    17198000 :         txb_origin_x = context_ptr->blk_geom->tx_org_x[tx_depth][txb_itr];
    2742    17198000 :         txb_origin_y = context_ptr->blk_geom->tx_org_y[tx_depth][txb_itr];
    2743             : 
    2744    17198000 :         context_ptr->cb_txb_skip_context = 0;
    2745    17198000 :         context_ptr->cb_dc_sign_context = 0;
    2746    17198000 :         get_txb_ctx(
    2747             :             sequence_control_set_ptr,
    2748             :             COMPONENT_CHROMA,
    2749             :             context_ptr->cb_dc_sign_level_coeff_neighbor_array,
    2750    17198000 :             ROUND_UV(context_ptr->sb_origin_x + txb_origin_x) >> 1,
    2751    17198000 :             ROUND_UV(context_ptr->sb_origin_y + txb_origin_y) >> 1,
    2752    17198000 :             context_ptr->blk_geom->bsize_uv,
    2753    17198000 :             context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
    2754             :             &context_ptr->cb_txb_skip_context,
    2755             :             &context_ptr->cb_dc_sign_context);
    2756             : 
    2757             : 
    2758    17202500 :         context_ptr->cr_txb_skip_context = 0;
    2759    17202500 :         context_ptr->cr_dc_sign_context = 0;
    2760    17202500 :         get_txb_ctx(
    2761             :             sequence_control_set_ptr,
    2762             :             COMPONENT_CHROMA,
    2763             :             context_ptr->cr_dc_sign_level_coeff_neighbor_array,
    2764    17202500 :             ROUND_UV(context_ptr->sb_origin_x + txb_origin_x) >> 1,
    2765    17202500 :             ROUND_UV(context_ptr->sb_origin_y + txb_origin_y) >> 1,
    2766    17202500 :             context_ptr->blk_geom->bsize_uv,
    2767    17202500 :             context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
    2768             :             &context_ptr->cr_txb_skip_context,
    2769             :             &context_ptr->cr_dc_sign_context);
    2770             : 
    2771             :         // NADER - TU
    2772    17199200 :         tu_origin_index = txb_origin_x + txb_origin_y * candidate_buffer->residual_quant_coeff_ptr->stride_y;
    2773    17199200 :         tuCbOriginIndex = (((txb_origin_x >> 3) << 3) + (((txb_origin_y >> 3) << 3) * candidate_buffer->residual_quant_coeff_ptr->stride_cb)) >> 1;
    2774    17199200 :         tuCrOriginIndex = (((txb_origin_x >> 3) << 3) + (((txb_origin_y >> 3) << 3) * candidate_buffer->residual_quant_coeff_ptr->stride_cr)) >> 1;
    2775             : 
    2776             :         //    This function replaces the previous Intra Chroma mode if the LM fast
    2777             :             //    cost is better.
    2778             :             //    *Note - this might require that we have inv transform in the loop
    2779    17199200 :         if (component_mask & PICTURE_BUFFER_DESC_Cb_FLAG) {
    2780             :             // Configure the Chroma Residual Ptr
    2781             : 
    2782    12495500 :             chromaResidualPtr = //(candidate_buffer->candidate_ptr->type  == INTRA_MODE )?
    2783             :                   //&(((int16_t*) candidate_buffer->intraChromaResidualPtr->buffer_cb)[tu_chroma_origin_index]):
    2784    12495500 :                 &(((int16_t*)candidate_buffer->residual_ptr->buffer_cb)[tuCbOriginIndex]);
    2785             : 
    2786             :             // Cb Transform
    2787    12495500 :             av1_estimate_transform(
    2788             :                 chromaResidualPtr,
    2789    12495500 :                 candidate_buffer->residual_ptr->stride_cb,
    2790    12495500 :                 &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_cb)[txb_1d_offset]),
    2791             :                 NOT_USED_VALUE,
    2792    12495500 :                 context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
    2793             :                 &context_ptr->three_quad_energy,
    2794             :                 context_ptr->transform_inner_array_ptr,
    2795    12495500 :                 context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
    2796    12495500 :                 candidate_buffer->candidate_ptr->transform_type_uv,
    2797             :                 PLANE_TYPE_UV,
    2798             :                 DEFAULT_SHAPE);
    2799             : 
    2800    24994400 :             int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
    2801    12497200 :                              picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
    2802    12497200 :             candidate_buffer->candidate_ptr->quantized_dc[1][0] = av1_quantize_inv_quantize(
    2803             :                 picture_control_set_ptr,
    2804             :                 context_ptr,
    2805    12497200 :                 &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_cb)[txb_1d_offset]),
    2806             :                 NOT_USED_VALUE,
    2807    12497200 :                 &(((int32_t*)candidate_buffer->residual_quant_coeff_ptr->buffer_cb)[txb_1d_offset]),
    2808    12497200 :                 &(((int32_t*)candidate_buffer->recon_coeff_ptr->buffer_cb)[txb_1d_offset]),
    2809             :                 cb_qp,
    2810             :                 seg_qp,
    2811    12497200 :                 context_ptr->blk_geom->tx_width_uv[tx_depth][txb_itr],
    2812    12497200 :                 context_ptr->blk_geom->tx_height_uv[tx_depth][txb_itr],
    2813    12497200 :                 context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
    2814    12497200 :                 &candidate_buffer->candidate_ptr->eob[1][txb_itr],
    2815    12497200 :                 &(cb_count_non_zero_coeffs[txb_itr]),
    2816             :                 COMPONENT_CHROMA_CB,
    2817    12497200 :                 context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
    2818    12497200 :                 candidate_buffer->candidate_ptr->transform_type_uv,
    2819             :                 candidate_buffer,
    2820             : #if RDOQ_CHROMA
    2821    12497200 :                 context_ptr->cb_txb_skip_context,
    2822    12497200 :                 context_ptr->cb_dc_sign_context,
    2823    12497200 :                 candidate_buffer->candidate_ptr->pred_mode >= NEARESTMV,
    2824             : #else
    2825             :                 0,
    2826             :                 0,
    2827             :                 0,
    2828             : #endif
    2829    12497200 :                 candidate_buffer->candidate_ptr->use_intrabc,
    2830             :                 EB_FALSE);
    2831             : 
    2832    12496500 :             if (context_ptr->spatial_sse_full_loop) {
    2833    12474500 :                 uint32_t cb_has_coeff = cb_count_non_zero_coeffs[txb_itr] > 0;
    2834             : 
    2835    12474500 :                 if (cb_has_coeff)
    2836     5606660 :                     inv_transform_recon_wrapper(
    2837     5606660 :                         candidate_buffer->prediction_ptr->buffer_cb,
    2838             :                         tuCbOriginIndex,
    2839     5606660 :                         candidate_buffer->prediction_ptr->stride_cb,
    2840     5606660 :                         candidate_buffer->recon_ptr->buffer_cb,
    2841             :                         tuCbOriginIndex,
    2842     5606660 :                         candidate_buffer->recon_ptr->stride_cb,
    2843     5606660 :                         (int32_t*) candidate_buffer->recon_coeff_ptr->buffer_cb,
    2844             :                         txb_1d_offset,
    2845     5606660 :                         context_ptr->hbd_mode_decision,
    2846     5606660 :                         context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
    2847     5606660 :                         candidate_buffer->candidate_ptr->transform_type_uv,
    2848             :                         PLANE_TYPE_UV,
    2849     5606660 :                         (uint32_t)candidate_buffer->candidate_ptr->eob[1][txb_itr]);
    2850             :                 else
    2851     6867860 :                     picture_copy(
    2852             :                         candidate_buffer->prediction_ptr,
    2853             :                         0,
    2854             :                         tuCbOriginIndex,
    2855             :                         candidate_buffer->recon_ptr,
    2856             :                         0,
    2857             :                         tuCbOriginIndex,
    2858             :                         0,
    2859             :                         0,
    2860     6867860 :                         context_ptr->blk_geom->tx_width_uv[tx_depth][txb_itr],
    2861     6867860 :                         context_ptr->blk_geom->tx_height_uv[tx_depth][txb_itr],
    2862             :                         PICTURE_BUFFER_DESC_Cb_FLAG,
    2863     6867860 :                         context_ptr->hbd_mode_decision);
    2864             :             }
    2865             :         }
    2866             : 
    2867    17199600 :         if (component_mask & PICTURE_BUFFER_DESC_Cr_FLAG) {
    2868             :             // Configure the Chroma Residual Ptr
    2869             : 
    2870    12132200 :             chromaResidualPtr = //(candidate_buffer->candidate_ptr->type  == INTRA_MODE )?
    2871             :                 //&(((int16_t*) candidate_buffer->intraChromaResidualPtr->buffer_cr)[tu_chroma_origin_index]):
    2872    12132200 :                 &(((int16_t*)candidate_buffer->residual_ptr->buffer_cr)[tuCrOriginIndex]);
    2873             : 
    2874             :             // Cr Transform
    2875    12132200 :             av1_estimate_transform(
    2876             :                 chromaResidualPtr,
    2877    12132200 :                 candidate_buffer->residual_ptr->stride_cr,
    2878    12132200 :                 &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_cr)[txb_1d_offset]),
    2879             :                 NOT_USED_VALUE,
    2880    12132200 :                 context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
    2881             :                 &context_ptr->three_quad_energy,
    2882             :                 context_ptr->transform_inner_array_ptr,
    2883    12132200 :                 context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
    2884    12132200 :                 candidate_buffer->candidate_ptr->transform_type_uv,
    2885             :                 PLANE_TYPE_UV,
    2886             :                 DEFAULT_SHAPE);
    2887    24266900 :             int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
    2888    12133500 :                              picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
    2889             : 
    2890    12133500 :             candidate_buffer->candidate_ptr->quantized_dc[2][0] = av1_quantize_inv_quantize(
    2891             :                 picture_control_set_ptr,
    2892             :                 context_ptr,
    2893    12133500 :                 &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_cr)[txb_1d_offset]),
    2894             :                 NOT_USED_VALUE,
    2895    12133500 :                 &(((int32_t*)candidate_buffer->residual_quant_coeff_ptr->buffer_cr)[txb_1d_offset]),
    2896    12133500 :                 &(((int32_t*)candidate_buffer->recon_coeff_ptr->buffer_cr)[txb_1d_offset]),
    2897             :                 cb_qp,
    2898             :                 seg_qp,
    2899    12133500 :                 context_ptr->blk_geom->tx_width_uv[tx_depth][txb_itr],
    2900    12133500 :                 context_ptr->blk_geom->tx_height_uv[tx_depth][txb_itr],
    2901    12133500 :                 context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
    2902    12133500 :                 &candidate_buffer->candidate_ptr->eob[2][txb_itr],
    2903    12133500 :                 &(cr_count_non_zero_coeffs[txb_itr]),
    2904             :                 COMPONENT_CHROMA_CR,
    2905    12133500 :                 context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
    2906    12133500 :                 candidate_buffer->candidate_ptr->transform_type_uv,
    2907             :                 candidate_buffer,
    2908             : #if RDOQ_CHROMA
    2909    12133500 :                 context_ptr->cr_txb_skip_context,
    2910    12133500 :                 context_ptr->cr_dc_sign_context,
    2911    12133500 :                 candidate_buffer->candidate_ptr->pred_mode >= NEARESTMV,
    2912             : #else
    2913             :                 0,
    2914             :                 0,
    2915             :                 0,
    2916             : #endif
    2917    12133500 :                 candidate_buffer->candidate_ptr->use_intrabc,
    2918             :                 EB_FALSE);
    2919             : 
    2920    12132500 :             if (context_ptr->spatial_sse_full_loop) {
    2921    12111000 :                 uint32_t cr_has_coeff = cr_count_non_zero_coeffs[txb_itr] > 0;
    2922             : 
    2923    12111000 :                 if (cr_has_coeff)
    2924     3716780 :                     inv_transform_recon_wrapper(
    2925     3716780 :                         candidate_buffer->prediction_ptr->buffer_cr,
    2926             :                         tuCrOriginIndex,
    2927     3716780 :                         candidate_buffer->prediction_ptr->stride_cr,
    2928     3716780 :                         candidate_buffer->recon_ptr->buffer_cr,
    2929             :                         tuCrOriginIndex,
    2930     3716780 :                         candidate_buffer->recon_ptr->stride_cr,
    2931     3716780 :                         (int32_t*) candidate_buffer->recon_coeff_ptr->buffer_cr,
    2932             :                         txb_1d_offset,
    2933     3716780 :                         context_ptr->hbd_mode_decision,
    2934     3716780 :                         context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
    2935     3716780 :                         candidate_buffer->candidate_ptr->transform_type_uv,
    2936             :                         PLANE_TYPE_UV,
    2937     3716780 :                         (uint32_t)candidate_buffer->candidate_ptr->eob[2][txb_itr]);
    2938             :                 else
    2939     8394220 :                     picture_copy(
    2940             :                         candidate_buffer->prediction_ptr,
    2941             :                         0,
    2942             :                         tuCbOriginIndex,
    2943             :                         candidate_buffer->recon_ptr,
    2944             :                         0,
    2945             :                         tuCbOriginIndex,
    2946             :                         0,
    2947             :                         0,
    2948     8394220 :                         context_ptr->blk_geom->tx_width_uv[tx_depth][txb_itr],
    2949     8394220 :                         context_ptr->blk_geom->tx_height_uv[tx_depth][txb_itr],
    2950             :                         PICTURE_BUFFER_DESC_Cr_FLAG,
    2951     8394220 :                         context_ptr->hbd_mode_decision);
    2952             :             }
    2953             :         }
    2954             : 
    2955    17199000 :         txb_1d_offset += context_ptr->blk_geom->tx_width_uv[tx_depth][txb_itr] * context_ptr->blk_geom->tx_height_uv[tx_depth][txb_itr];
    2956             : 
    2957    17199000 :         ++txb_itr;
    2958    17199000 :     } while (txb_itr < tuCount);
    2959    17202000 : }
    2960             : 
    2961             : //****************************************
    2962             : // ************ CuFullDistortionFastTuMode ****************
    2963             : //****************************************/
    2964    17204900 : void cu_full_distortion_fast_tu_mode_r(
    2965             :     LargestCodingUnit            *sb_ptr,
    2966             :     ModeDecisionCandidateBuffer  *candidate_buffer,
    2967             :     ModeDecisionContext          *context_ptr,
    2968             :     ModeDecisionCandidate        *candidate_ptr,
    2969             :     PictureControlSet            *picture_control_set_ptr,
    2970             :     EbPictureBufferDesc          *input_picture_ptr,
    2971             :     uint64_t                      cbFullDistortion[DIST_CALC_TOTAL],
    2972             :     uint64_t                      crFullDistortion[DIST_CALC_TOTAL],
    2973             :     uint32_t                      count_non_zero_coeffs[3][MAX_NUM_OF_TU_PER_CU],
    2974             :     COMPONENT_TYPE                component_type,
    2975             :     uint64_t                      *cb_coeff_bits,
    2976             :     uint64_t                      *cr_coeff_bits,
    2977             :     EbBool                         is_full_loop)
    2978             : {
    2979             :     (void)sb_ptr;
    2980             : 
    2981             :     uint64_t                          y_tu_coeff_bits;
    2982             :     uint64_t                          cb_tu_coeff_bits;
    2983             :     uint64_t                          cr_tu_coeff_bits;
    2984             :     uint32_t                          tu_origin_index;
    2985             :     uint32_t                          txb_origin_x;
    2986             :     uint32_t                          txb_origin_y;
    2987             :     uint32_t                          currentTuIndex;
    2988             :     int32_t                           chromaShift;
    2989             :     uint32_t                          tu_chroma_origin_index;
    2990             :     EB_ALIGN(16) uint64_t             tuFullDistortion[3][DIST_CALC_TOTAL];
    2991             :     EbPictureBufferDesc              *transform_buffer;
    2992             :     uint32_t                          tuTotalCount;
    2993    17204900 :     uint32_t                          txb_itr = 0;
    2994    17204900 :     uint8_t tx_depth = candidate_buffer->candidate_ptr->tx_depth;
    2995    17204900 :     tuTotalCount = context_ptr->blk_geom->txb_count[tx_depth];
    2996    17204900 :     currentTuIndex = 0;
    2997    17204900 :     transform_buffer = context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr;
    2998             : 
    2999    17204900 :     uint32_t  txb_1d_offset = 0;
    3000    17204900 :     candidate_ptr->u_has_coeff = 0;
    3001    17204900 :     candidate_ptr->v_has_coeff = 0;
    3002    17204900 :     tuTotalCount = tx_depth ? 1 : tuTotalCount; //NM: 128x128 exeption
    3003             :     do {
    3004    17213400 :         txb_origin_x = context_ptr->blk_geom->tx_org_x[tx_depth][txb_itr];
    3005    17213400 :         txb_origin_y = context_ptr->blk_geom->tx_org_y[tx_depth][txb_itr];
    3006    17213400 :         int32_t cropped_tx_width_uv = MIN(context_ptr->blk_geom->tx_width_uv[tx_depth][txb_itr], picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.max_frame_width / 2 - ((context_ptr->sb_origin_x + ((txb_origin_x >> 3) << 3)) >> 1));
    3007    17213400 :         int32_t cropped_tx_height_uv = MIN(context_ptr->blk_geom->tx_height_uv[tx_depth][txb_itr], picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.max_frame_height / 2 - ((context_ptr->sb_origin_y + ((txb_origin_y >> 3) << 3)) >> 1));
    3008    17213400 :         tu_origin_index = txb_origin_x + txb_origin_y * candidate_buffer->residual_quant_coeff_ptr->stride_y;
    3009    17213400 :         tu_chroma_origin_index = txb_1d_offset;
    3010             :         // Reset the Bit Costs
    3011    17213400 :         y_tu_coeff_bits = 0;
    3012    17213400 :         cb_tu_coeff_bits = 0;
    3013    17213400 :         cr_tu_coeff_bits = 0;
    3014             : 
    3015    17213400 :         if (component_type == COMPONENT_CHROMA_CB || component_type == COMPONENT_CHROMA_CR || component_type == COMPONENT_CHROMA || component_type == COMPONENT_ALL) {
    3016             :             uint32_t countNonZeroCoeffsAll[3];
    3017    17204500 :             countNonZeroCoeffsAll[0] = count_non_zero_coeffs[0][currentTuIndex];
    3018    17204500 :             countNonZeroCoeffsAll[1] = count_non_zero_coeffs[1][currentTuIndex];
    3019    17204500 :             countNonZeroCoeffsAll[2] = count_non_zero_coeffs[2][currentTuIndex];
    3020             : 
    3021    17204500 :             if (is_full_loop && context_ptr->spatial_sse_full_loop) {
    3022     7422890 :                 uint32_t input_chroma_tu_origin_index = (((context_ptr->sb_origin_y + ((txb_origin_y >> 3) << 3)) >> 1) + (input_picture_ptr->origin_y >> 1)) * input_picture_ptr->stride_cb + (((context_ptr->sb_origin_x + ((txb_origin_x >> 3) << 3)) >> 1) + (input_picture_ptr->origin_x >> 1));
    3023     7422890 :                 uint32_t tu_uv_origin_index = (((txb_origin_x >> 3) << 3) + (((txb_origin_y >> 3) << 3) * candidate_buffer->residual_quant_coeff_ptr->stride_cb)) >> 1;
    3024             : 
    3025    14845800 :                 EbSpatialFullDistType spatial_full_dist_type_fun = context_ptr->hbd_mode_decision ?
    3026     7422890 :                     full_distortion_kernel16_bits : spatial_full_distortion_kernel;
    3027             : 
    3028    14844900 :                 tuFullDistortion[1][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun(
    3029             :                     input_picture_ptr->buffer_cb,
    3030             :                     input_chroma_tu_origin_index,
    3031     7422890 :                     input_picture_ptr->stride_cb,
    3032     7422890 :                     candidate_buffer->prediction_ptr->buffer_cb,
    3033             :                     tu_uv_origin_index,
    3034     7422890 :                     candidate_buffer->prediction_ptr->stride_cb,
    3035             :                     cropped_tx_width_uv,
    3036             :                     cropped_tx_height_uv);
    3037             : 
    3038    14844000 :                 tuFullDistortion[1][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun(
    3039             :                     input_picture_ptr->buffer_cb,
    3040             :                     input_chroma_tu_origin_index,
    3041     7422010 :                     input_picture_ptr->stride_cb,
    3042     7422010 :                     candidate_buffer->recon_ptr->buffer_cb,
    3043             :                     tu_uv_origin_index,
    3044     7422010 :                     candidate_buffer->recon_ptr->stride_cb,
    3045             :                     cropped_tx_width_uv,
    3046             :                     cropped_tx_height_uv);
    3047             : 
    3048    14843900 :                 tuFullDistortion[2][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun(
    3049             :                     input_picture_ptr->buffer_cr,
    3050             :                     input_chroma_tu_origin_index,
    3051     7421970 :                     input_picture_ptr->stride_cr,
    3052     7421970 :                     candidate_buffer->prediction_ptr->buffer_cr,
    3053             :                     tu_uv_origin_index,
    3054     7421970 :                     candidate_buffer->prediction_ptr->stride_cr,
    3055             :                     cropped_tx_width_uv,
    3056             :                     cropped_tx_height_uv);
    3057             : 
    3058    14843600 :                 tuFullDistortion[2][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun(
    3059             :                     input_picture_ptr->buffer_cr,
    3060             :                     input_chroma_tu_origin_index,
    3061     7421880 :                     input_picture_ptr->stride_cr,
    3062     7421880 :                     candidate_buffer->recon_ptr->buffer_cr,
    3063             :                     tu_uv_origin_index,
    3064     7421880 :                     candidate_buffer->recon_ptr->stride_cr,
    3065             :                     cropped_tx_width_uv,
    3066             :                     cropped_tx_height_uv);
    3067     7421760 :                 tuFullDistortion[1][DIST_CALC_PREDICTION]   <<= 4;
    3068     7421760 :                 tuFullDistortion[1][DIST_CALC_RESIDUAL]     <<= 4;
    3069     7421760 :                 tuFullDistortion[2][DIST_CALC_PREDICTION]   <<= 4;
    3070     7421760 :                 tuFullDistortion[2][DIST_CALC_RESIDUAL]     <<= 4;
    3071             :             }
    3072             :             else {
    3073             :             // *Full Distortion (SSE)
    3074             :             // *Note - there are known issues with how this distortion metric is currently
    3075             :             //    calculated.  The amount of scaling between the two arrays is not
    3076             :             //    equivalent.
    3077             : 
    3078     9781610 :             picture_full_distortion32_bits(
    3079             :                 transform_buffer,
    3080             :                 NOT_USED_VALUE,
    3081             :                 tu_chroma_origin_index,
    3082             :                 candidate_buffer->recon_coeff_ptr,
    3083             :                 NOT_USED_VALUE,
    3084             :                 tu_chroma_origin_index,
    3085             :                 NOT_USED_VALUE,
    3086             :                 NOT_USED_VALUE,
    3087     9781610 :                 context_ptr->blk_geom->tx_width_uv[tx_depth][txb_itr],
    3088     9781610 :                 context_ptr->blk_geom->tx_height_uv[tx_depth][txb_itr],
    3089             :                 tuFullDistortion[0],
    3090             :                 tuFullDistortion[1],
    3091             :                 tuFullDistortion[2],
    3092             :                 countNonZeroCoeffsAll[0],
    3093             :                 countNonZeroCoeffsAll[1],
    3094             :                 countNonZeroCoeffsAll[2],
    3095             :                 component_type);
    3096     9791380 :             TxSize    txSize = context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr];
    3097     9791380 :             chromaShift = (MAX_TX_SCALE - av1_get_tx_scale(txSize)) * 2;
    3098     9790880 :             tuFullDistortion[1][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(tuFullDistortion[1][DIST_CALC_RESIDUAL], chromaShift);
    3099     9790880 :             tuFullDistortion[1][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(tuFullDistortion[1][DIST_CALC_PREDICTION], chromaShift);
    3100     9790880 :             tuFullDistortion[2][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(tuFullDistortion[2][DIST_CALC_RESIDUAL], chromaShift);
    3101     9790880 :             tuFullDistortion[2][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(tuFullDistortion[2][DIST_CALC_PREDICTION], chromaShift);
    3102             :             }
    3103             :             //CHROMA-ONLY
    3104    17212600 :             av1_tu_estimate_coeff_bits(
    3105             :                 context_ptr,
    3106             :                 0,//allow_update_cdf,
    3107             :                 NULL,//FRAME_CONTEXT *ec_ctx,
    3108             :                 picture_control_set_ptr,
    3109             :                 candidate_buffer,
    3110             :                 tu_origin_index,
    3111             :                 tu_chroma_origin_index,
    3112             :                 context_ptr->coeff_est_entropy_coder_ptr,
    3113             :                 candidate_buffer->residual_quant_coeff_ptr,
    3114             :                 count_non_zero_coeffs[0][currentTuIndex],
    3115    17212600 :                 count_non_zero_coeffs[1][currentTuIndex],
    3116    17212600 :                 count_non_zero_coeffs[2][currentTuIndex],
    3117             :                 &y_tu_coeff_bits,
    3118             :                 &cb_tu_coeff_bits,
    3119             :                 &cr_tu_coeff_bits,
    3120    17212600 :                 context_ptr->blk_geom->txsize[tx_depth][txb_itr],
    3121    17212600 :                 context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
    3122    17212600 :                 candidate_buffer->candidate_ptr->transform_type[txb_itr],
    3123    17212600 :                 candidate_buffer->candidate_ptr->transform_type_uv,
    3124             :                 component_type);
    3125             : 
    3126             :             // OMK Useless ? We don't calculate Chroma CBF here
    3127    17194000 :             av1_tu_calc_cost(
    3128             :                 candidate_ptr,
    3129    17194000 :                 context_ptr->luma_txb_skip_context,
    3130             :                 currentTuIndex,
    3131             :                 count_non_zero_coeffs[0][currentTuIndex],
    3132    17194000 :                 count_non_zero_coeffs[1][currentTuIndex],
    3133    17194000 :                 count_non_zero_coeffs[2][currentTuIndex],
    3134             :                 tuFullDistortion[0],
    3135             :                 tuFullDistortion[1],
    3136             :                 tuFullDistortion[2],
    3137             :                 component_type,
    3138             :                 &y_tu_coeff_bits,
    3139             :                 &cb_tu_coeff_bits,
    3140             :                 &cr_tu_coeff_bits,
    3141    17194000 :                 context_ptr->blk_geom->txsize[tx_depth][txb_itr],
    3142    17194000 :                 context_ptr->full_lambda);
    3143             : 
    3144    17199900 :             *cb_coeff_bits += cb_tu_coeff_bits;
    3145    17199900 :             *cr_coeff_bits += cr_tu_coeff_bits;
    3146    17199900 :             cbFullDistortion[DIST_CALC_RESIDUAL] += tuFullDistortion[1][DIST_CALC_RESIDUAL];
    3147    17199900 :             crFullDistortion[DIST_CALC_RESIDUAL] += tuFullDistortion[2][DIST_CALC_RESIDUAL];
    3148    17199900 :             cbFullDistortion[DIST_CALC_PREDICTION] += tuFullDistortion[1][DIST_CALC_PREDICTION];
    3149    17199900 :             crFullDistortion[DIST_CALC_PREDICTION] += tuFullDistortion[2][DIST_CALC_PREDICTION];
    3150             :         }
    3151             : 
    3152    17208800 :         txb_1d_offset += context_ptr->blk_geom->tx_width_uv[tx_depth][txb_itr] * context_ptr->blk_geom->tx_height_uv[tx_depth][txb_itr];
    3153    17208800 :         currentTuIndex++;
    3154             : 
    3155    17208800 :         ++txb_itr;
    3156    17208800 :     } while (txb_itr < tuTotalCount);
    3157    17200300 : }
    3158             : 
    3159             : /***************************************
    3160             :  * Check merge_block algorithm
    3161             :  ***************************************/
    3162     1039640 : EbBool merge_1D_inter_block(
    3163             :     ModeDecisionContext    *context_ptr,
    3164             :     uint32_t                  sq_idx,
    3165             :     uint32_t                  nsq_idx) {
    3166     1039640 :     EbBool merge_blocks = EB_FALSE;
    3167     1039640 :     CodingUnit  *parent_cu_ptr = &context_ptr->md_cu_arr_nsq[sq_idx];
    3168     1039640 :     CodingUnit  *child_cu_ptr = &context_ptr->md_cu_arr_nsq[nsq_idx];
    3169     1039640 :     int parent_diriction = parent_cu_ptr->prediction_unit_array[0].inter_pred_direction_index;
    3170     1039640 :     int parent_mv_l0 = parent_cu_ptr->prediction_unit_array[0].mv[REF_LIST_0].mv_union;
    3171     1039640 :     int parent_mv_l1 = parent_cu_ptr->prediction_unit_array[0].mv[REF_LIST_1].mv_union;
    3172     1039640 :     int child_0_diriction = child_cu_ptr->prediction_unit_array[0].inter_pred_direction_index;
    3173     1039640 :     int child_0_mv_l0 = child_cu_ptr->prediction_unit_array[0].mv[REF_LIST_0].mv_union;
    3174     1039640 :     int child_0_mv_l1 = child_cu_ptr->prediction_unit_array[0].mv[REF_LIST_1].mv_union;
    3175     1039640 :     int child_eob = child_cu_ptr->block_has_coeff;
    3176     1039640 :     if (parent_diriction == child_0_diriction && child_eob == 0) {
    3177      607059 :         switch (parent_diriction) {
    3178       96317 :         case UNI_PRED_LIST_0:
    3179       96317 :             if (parent_mv_l0 == child_0_mv_l0)
    3180       31917 :                 merge_blocks = EB_TRUE;
    3181       96317 :             break;
    3182       25603 :         case UNI_PRED_LIST_1:
    3183       25603 :             if (parent_mv_l1 == child_0_mv_l1)
    3184        8147 :                 merge_blocks = EB_TRUE;
    3185       25603 :             break;
    3186      469675 :         case BI_PRED:
    3187      469675 :             if (parent_mv_l0 == child_0_mv_l0 &&
    3188             :                 parent_mv_l1 == child_0_mv_l1) {
    3189      133319 :                 merge_blocks = EB_TRUE;
    3190             :             }
    3191      469675 :             break;
    3192       15464 :         default:
    3193       15464 :             merge_blocks = EB_FALSE;
    3194       15464 :             break;
    3195             :         }
    3196      432581 :     }
    3197     1039640 :     return merge_blocks;
    3198             : }
    3199      924655 : void  d1_non_square_block_decision(
    3200             :     ModeDecisionContext               *context_ptr
    3201             : #if ADD_SUPPORT_TO_SKIP_PART_N
    3202             :     , uint32_t                         d1_block_itr
    3203             : #endif
    3204             : )
    3205             : {
    3206             :     //compute total cost for the whole block partition
    3207      924655 :     uint64_t tot_cost = 0;
    3208      924655 :     uint32_t first_blk_idx = context_ptr->cu_ptr->mds_idx - (context_ptr->blk_geom->totns - 1);//index of first block in this partition
    3209             :     uint32_t blk_it;
    3210      924655 :     uint32_t merge_block_cnt = 0;
    3211      924655 :     EbBool merge_block_flag = EB_FALSE;
    3212     2746620 :     for (blk_it = 0; blk_it < context_ptr->blk_geom->totns; blk_it++)
    3213             :     {
    3214     1821970 :         tot_cost += context_ptr->md_local_cu_unit[first_blk_idx + blk_it].cost;
    3215     1821970 :         if (context_ptr->blk_geom->sqi_mds != first_blk_idx + blk_it)
    3216     1412660 :             if (context_ptr->md_local_cu_unit[context_ptr->blk_geom->sqi_mds].avail_blk_flag)
    3217     1039640 :                 merge_block_cnt += merge_1D_inter_block(context_ptr, context_ptr->blk_geom->sqi_mds, first_blk_idx + blk_it);
    3218             :     }
    3219      924654 :     if (context_ptr->blk_geom->bsize > BLOCK_4X4) {
    3220      738786 :         uint64_t split_cost = 0;
    3221      738786 :         uint32_t parent_depth_idx_mds = context_ptr->blk_geom->sqi_mds;
    3222      738786 :         av1_split_flag_rate(
    3223      738786 :             context_ptr->sb_ptr->picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr,
    3224             :             context_ptr,
    3225      738786 :             &context_ptr->md_cu_arr_nsq[parent_depth_idx_mds],
    3226             :             0,
    3227      738786 :             from_shape_to_part[context_ptr->blk_geom->shape],
    3228             :             &split_cost,
    3229      738786 :             context_ptr->full_lambda,
    3230             :             context_ptr->md_rate_estimation_ptr,
    3231      738786 :             context_ptr->sb_ptr->picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->max_sb_depth);
    3232             : 
    3233      738850 :         tot_cost += split_cost;
    3234             :     }
    3235      924718 :     if (merge_block_cnt == context_ptr->blk_geom->totns) merge_block_flag = EB_TRUE;
    3236             : #if ADD_SUPPORT_TO_SKIP_PART_N
    3237      924718 :     if (d1_block_itr == 0 || (tot_cost < context_ptr->md_local_cu_unit[context_ptr->blk_geom->sqi_mds].cost && merge_block_flag == EB_FALSE))
    3238             : #else
    3239             :     if (context_ptr->blk_geom->shape == PART_N || (tot_cost < context_ptr->md_local_cu_unit[context_ptr->blk_geom->sqi_mds].cost && merge_block_flag == EB_FALSE))
    3240             : #endif
    3241             :     {
    3242             :         //store best partition cost in parent square
    3243      421493 :         context_ptr->md_local_cu_unit[context_ptr->blk_geom->sqi_mds].cost = tot_cost;
    3244      421493 :         context_ptr->md_cu_arr_nsq[context_ptr->blk_geom->sqi_mds].part = from_shape_to_part[context_ptr->blk_geom->shape];
    3245      421493 :         context_ptr->md_cu_arr_nsq[context_ptr->blk_geom->sqi_mds].best_d1_blk = first_blk_idx;
    3246             :     }
    3247      924718 : }
    3248             : 
    3249             : /// compute the cost of curr depth, and the depth above
    3250      100178 : void   compute_depth_costs(
    3251             :     ModeDecisionContext    *context_ptr,
    3252             :     SequenceControlSet     *sequence_control_set_ptr,
    3253             :     uint32_t                  curr_depth_mds,
    3254             :     uint32_t                  above_depth_mds,
    3255             :     uint32_t                  step,
    3256             :     uint64_t                 *above_depth_cost,
    3257             :     uint64_t                 *curr_depth_cost)
    3258             : {
    3259      100178 :     uint64_t       above_non_split_rate = 0;
    3260      100178 :     uint64_t       above_split_rate = 0;
    3261             : 
    3262             :     /*
    3263             :     ___________
    3264             :     |     |     |
    3265             :     |blk0 |blk1 |
    3266             :     |-----|-----|
    3267             :     |blk2 |blk3 |
    3268             :     |_____|_____|
    3269             :     */
    3270             :     // current depth blocks
    3271      100178 :     uint32_t       curr_depth_blk0_mds = curr_depth_mds - 3 * step;
    3272      100178 :     uint32_t       curr_depth_blk1_mds = curr_depth_mds - 2 * step;
    3273      100178 :     uint32_t       curr_depth_blk2_mds = curr_depth_mds - 1 * step;
    3274      100178 :     uint32_t       curr_depth_blk3_mds = curr_depth_mds;
    3275             : 
    3276             :     // Rate of not spliting the current depth (Depth != 4) in case the children were omitted by MDC
    3277      100178 :     uint64_t       curr_non_split_rate_blk0 = 0;
    3278      100178 :     uint64_t       curr_non_split_rate_blk1 = 0;
    3279      100178 :     uint64_t       curr_non_split_rate_blk2 = 0;
    3280      100178 :     uint64_t       curr_non_split_rate_blk3 = 0;
    3281             : 
    3282      100178 :     context_ptr->md_local_cu_unit[above_depth_mds].left_neighbor_mode = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].left_neighbor_mode;
    3283      100178 :     context_ptr->md_local_cu_unit[above_depth_mds].left_neighbor_depth = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].left_neighbor_depth;
    3284      100178 :     context_ptr->md_local_cu_unit[above_depth_mds].top_neighbor_mode = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].top_neighbor_mode;
    3285      100178 :     context_ptr->md_local_cu_unit[above_depth_mds].top_neighbor_depth = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].top_neighbor_depth;
    3286      100178 :     context_ptr->md_local_cu_unit[above_depth_mds].left_neighbor_partition = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].left_neighbor_partition;
    3287      100178 :     context_ptr->md_local_cu_unit[above_depth_mds].above_neighbor_partition = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].above_neighbor_partition;
    3288             : 
    3289             :     // Compute above depth  cost
    3290      100178 :     if (context_ptr->md_local_cu_unit[above_depth_mds].tested_cu_flag == EB_TRUE)
    3291             :     {
    3292       96405 :         *above_depth_cost = context_ptr->md_local_cu_unit[above_depth_mds].cost + above_non_split_rate;
    3293             :         // Compute curr depth  cost
    3294       96405 :         av1_split_flag_rate(
    3295             :             sequence_control_set_ptr,
    3296             :             context_ptr,
    3297       96405 :             &context_ptr->md_cu_arr_nsq[above_depth_mds],
    3298             :             0,
    3299             :             PARTITION_SPLIT,
    3300             :             &above_split_rate,
    3301       96405 :             context_ptr->full_lambda,
    3302             :             context_ptr->md_rate_estimation_ptr,
    3303       96405 :             sequence_control_set_ptr->max_sb_depth);
    3304             :     }
    3305             :     else
    3306        3773 :         *above_depth_cost = MAX_MODE_COST;
    3307      100178 :     if (context_ptr->blk_geom->bsize > BLOCK_4X4) {
    3308       38491 :         if (context_ptr->md_local_cu_unit[curr_depth_blk0_mds].tested_cu_flag)
    3309       38491 :             if (context_ptr->md_cu_arr_nsq[curr_depth_blk0_mds].mdc_split_flag == 0)
    3310       30293 :                 av1_split_flag_rate(
    3311             :                     sequence_control_set_ptr,
    3312             :                     context_ptr,
    3313       30293 :                     &context_ptr->md_cu_arr_nsq[curr_depth_blk0_mds],
    3314             :                     0,
    3315             :                     PARTITION_NONE,
    3316             :                     &curr_non_split_rate_blk0,
    3317       30293 :                     context_ptr->full_lambda,
    3318             :                     context_ptr->md_rate_estimation_ptr,
    3319       30293 :                     sequence_control_set_ptr->max_sb_depth);
    3320             : 
    3321       38490 :         if (context_ptr->md_local_cu_unit[curr_depth_blk1_mds].tested_cu_flag)
    3322       38486 :             if (context_ptr->md_cu_arr_nsq[curr_depth_blk1_mds].mdc_split_flag == 0)
    3323       30293 :                 av1_split_flag_rate(
    3324             :                     sequence_control_set_ptr,
    3325             :                     context_ptr,
    3326       30293 :                     &context_ptr->md_cu_arr_nsq[curr_depth_blk1_mds],
    3327             :                     0,
    3328             :                     PARTITION_NONE,
    3329             :                     &curr_non_split_rate_blk1,
    3330       30293 :                     context_ptr->full_lambda,
    3331             :                     context_ptr->md_rate_estimation_ptr,
    3332       30293 :                     sequence_control_set_ptr->max_sb_depth);
    3333             : 
    3334       38490 :         if (context_ptr->md_local_cu_unit[curr_depth_blk2_mds].tested_cu_flag)
    3335       38491 :             if (context_ptr->md_cu_arr_nsq[curr_depth_blk2_mds].mdc_split_flag == 0)
    3336       30293 :                 av1_split_flag_rate(
    3337             :                     sequence_control_set_ptr,
    3338             :                     context_ptr,
    3339       30293 :                     &context_ptr->md_cu_arr_nsq[curr_depth_blk2_mds],
    3340             :                     0,
    3341             :                     PARTITION_NONE,
    3342             :                     &curr_non_split_rate_blk2,
    3343       30293 :                     context_ptr->full_lambda,
    3344             :                     context_ptr->md_rate_estimation_ptr,
    3345       30293 :                     sequence_control_set_ptr->max_sb_depth);
    3346             : 
    3347       38490 :         if (context_ptr->md_local_cu_unit[curr_depth_blk3_mds].tested_cu_flag)
    3348       38491 :             if (context_ptr->md_cu_arr_nsq[curr_depth_blk3_mds].mdc_split_flag == 0)
    3349       30293 :                 av1_split_flag_rate(
    3350             :                     sequence_control_set_ptr,
    3351             :                     context_ptr,
    3352       30293 :                     &context_ptr->md_cu_arr_nsq[curr_depth_blk3_mds],
    3353             :                     0,
    3354             :                     PARTITION_NONE,
    3355             :                     &curr_non_split_rate_blk3,
    3356       30293 :                     context_ptr->full_lambda,
    3357             :                     context_ptr->md_rate_estimation_ptr,
    3358       30293 :                     sequence_control_set_ptr->max_sb_depth);
    3359             :     }
    3360             :     //curr_non_split_rate_344 = splitflag_mdc_344 || 4x4 ? 0 : compute;
    3361             : 
    3362      100177 :     *curr_depth_cost =
    3363      100177 :         context_ptr->md_local_cu_unit[curr_depth_mds].cost + curr_non_split_rate_blk3 +
    3364      100177 :         context_ptr->md_local_cu_unit[curr_depth_mds - 1 * step].cost + curr_non_split_rate_blk2 +
    3365      100177 :         context_ptr->md_local_cu_unit[curr_depth_mds - 2 * step].cost + curr_non_split_rate_blk1 +
    3366      100177 :         context_ptr->md_local_cu_unit[curr_depth_mds - 3 * step].cost + curr_non_split_rate_blk0 +
    3367             :         above_split_rate;
    3368      100177 : }
    3369             : 
    3370      409400 : uint32_t d2_inter_depth_block_decision(
    3371             :     ModeDecisionContext          *context_ptr,
    3372             :     uint32_t                        blk_mds,
    3373             :     LargestCodingUnit            *tb_ptr,
    3374             :     uint32_t                          lcuAddr,
    3375             :     uint32_t                          tbOriginX,
    3376             :     uint32_t                          tbOriginY,
    3377             :     uint64_t                          full_lambda,
    3378             :     MdRateEstimationContext      *md_rate_estimation_ptr,
    3379             :     PictureControlSet            *picture_control_set_ptr)
    3380             : {
    3381             :     UNUSED(tb_ptr);
    3382             :     UNUSED(lcuAddr);
    3383             :     UNUSED(tbOriginX);
    3384             :     UNUSED(tbOriginY);
    3385             :     UNUSED(full_lambda);
    3386             :     UNUSED(md_rate_estimation_ptr);
    3387             : 
    3388             :     uint32_t                  lastCuIndex, d0_idx_mds, d1_idx_mds, d2_idx_mds, top_left_idx_mds;
    3389             :     UNUSED(top_left_idx_mds);
    3390             :     UNUSED(d2_idx_mds);
    3391             :     UNUSED(d1_idx_mds);
    3392             :     UNUSED(d0_idx_mds);
    3393      409400 :     uint64_t                    parent_depth_cost = 0, current_depth_cost = 0;
    3394      409400 :     SequenceControlSet     *sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
    3395             :     EbBool                    lastDepthFlag;
    3396             :     const BlockGeom          * blk_geom;
    3397             : 
    3398      409400 :     lastDepthFlag = context_ptr->md_cu_arr_nsq[blk_mds].split_flag == EB_FALSE ? EB_TRUE : EB_FALSE;
    3399      409400 :     d1_idx_mds = blk_mds;
    3400      409400 :     d2_idx_mds = blk_mds;
    3401      409400 :     lastCuIndex = blk_mds;
    3402      409400 :     blk_geom = get_blk_geom_mds(blk_mds);
    3403      409366 :     uint32_t    parent_depth_idx_mds = blk_mds;
    3404      409366 :     uint32_t    current_depth_idx_mds = blk_mds;
    3405             : 
    3406      409366 :     if (lastDepthFlag) {
    3407      413224 :         while (blk_geom->is_last_quadrant) {
    3408             :             //get parent idx
    3409      100178 :             parent_depth_idx_mds = current_depth_idx_mds - parent_depth_offset[sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128][blk_geom->depth];
    3410      100178 :             if (picture_control_set_ptr->slice_type == I_SLICE && parent_depth_idx_mds == 0 && sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128)
    3411           0 :                 parent_depth_cost = MAX_MODE_COST;
    3412             :             else
    3413      100178 :                 compute_depth_costs(context_ptr, sequence_control_set_ptr, current_depth_idx_mds, parent_depth_idx_mds, ns_depth_offset[sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128][blk_geom->depth], &parent_depth_cost, &current_depth_cost);
    3414      100185 :             if (!sequence_control_set_ptr->sb_geom[lcuAddr].block_is_allowed[parent_depth_idx_mds])
    3415       20400 :                 parent_depth_cost = MAX_MODE_COST;
    3416      100185 :             if (parent_depth_cost <= current_depth_cost) {
    3417       73988 :                 context_ptr->md_cu_arr_nsq[parent_depth_idx_mds].split_flag = EB_FALSE;
    3418       73988 :                 context_ptr->md_local_cu_unit[parent_depth_idx_mds].cost = parent_depth_cost;
    3419       73988 :                 lastCuIndex = parent_depth_idx_mds;
    3420             :             }
    3421             :             else {
    3422       26197 :                 context_ptr->md_local_cu_unit[parent_depth_idx_mds].cost = current_depth_cost;
    3423       26197 :                 context_ptr->md_cu_arr_nsq[parent_depth_idx_mds].part = PARTITION_SPLIT;
    3424             :             }
    3425             : 
    3426             :             //setup next parent inter depth
    3427      100185 :             blk_geom = get_blk_geom_mds(parent_depth_idx_mds);
    3428      100181 :             current_depth_idx_mds = parent_depth_idx_mds;
    3429             :         }
    3430             :     }
    3431             : 
    3432      409369 :     return lastCuIndex;
    3433             : }
    3434      299913 : void   compute_depth_costs_md_skip(
    3435             :     ModeDecisionContext *context_ptr,
    3436             :     SequenceControlSet  *sequence_control_set_ptr,
    3437             :     uint32_t             above_depth_mds,
    3438             :     uint32_t             step,
    3439             :     uint64_t            *above_depth_cost,
    3440             :     uint64_t            *curr_depth_cost)
    3441             : {
    3442      299913 :     uint64_t       above_non_split_rate = 0;
    3443      299913 :     uint64_t       above_split_rate = 0;
    3444      299913 :     *curr_depth_cost = 0;
    3445             :     // sum the previous ones
    3446      895123 :     for (int i = 1; i < context_ptr->blk_geom->quadi + 1; i++) {
    3447      595150 :         uint32_t curr_depth_cur_blk_mds = context_ptr->blk_geom->sqi_mds - i * step;
    3448      595150 :         uint64_t       curr_non_split_rate_blk = 0;
    3449      595150 :         if (context_ptr->blk_geom->bsize > BLOCK_4X4) {
    3450      316368 :             if (context_ptr->md_local_cu_unit[curr_depth_cur_blk_mds].tested_cu_flag)
    3451      316340 :                 if (context_ptr->md_cu_arr_nsq[curr_depth_cur_blk_mds].mdc_split_flag == 0)
    3452      184382 :                     av1_split_flag_rate(
    3453             :                         sequence_control_set_ptr,
    3454             :                         context_ptr,
    3455      184382 :                         &context_ptr->md_cu_arr_nsq[curr_depth_cur_blk_mds],
    3456             :                         0,
    3457             :                         PARTITION_NONE,
    3458             :                         &curr_non_split_rate_blk,
    3459      184382 :                         context_ptr->full_lambda,
    3460             :                         context_ptr->md_rate_estimation_ptr,
    3461      184382 :                         sequence_control_set_ptr->max_sb_depth);
    3462             :         }
    3463      595210 :         *curr_depth_cost +=
    3464      595210 :             context_ptr->md_local_cu_unit[curr_depth_cur_blk_mds].cost + curr_non_split_rate_blk;
    3465             :     }
    3466             :     /*
    3467             :     ___________
    3468             :     |     |     |
    3469             :     |blk0 |blk1 |
    3470             :     |-----|-----|
    3471             :     |blk2 |blk3 |
    3472             :     |_____|_____|
    3473             :     */
    3474             :     // current depth blocks
    3475      299973 :     uint32_t       curr_depth_blk0_mds = context_ptr->blk_geom->sqi_mds - context_ptr->blk_geom->quadi * step;
    3476             : 
    3477      299973 :     context_ptr->md_local_cu_unit[above_depth_mds].left_neighbor_mode = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].left_neighbor_mode;
    3478      299973 :     context_ptr->md_local_cu_unit[above_depth_mds].left_neighbor_depth = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].left_neighbor_depth;
    3479      299973 :     context_ptr->md_local_cu_unit[above_depth_mds].top_neighbor_mode = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].top_neighbor_mode;
    3480      299973 :     context_ptr->md_local_cu_unit[above_depth_mds].top_neighbor_depth = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].top_neighbor_depth;
    3481      299973 :     context_ptr->md_local_cu_unit[above_depth_mds].left_neighbor_partition = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].left_neighbor_partition;
    3482      299973 :     context_ptr->md_local_cu_unit[above_depth_mds].above_neighbor_partition = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].above_neighbor_partition;
    3483             : 
    3484             :     // Compute above depth  cost
    3485      299973 :     if (context_ptr->md_local_cu_unit[above_depth_mds].tested_cu_flag == EB_TRUE)
    3486             :     {
    3487      288051 :         *above_depth_cost = context_ptr->md_local_cu_unit[above_depth_mds].cost + above_non_split_rate;
    3488             :         // Compute curr depth  cost
    3489      288051 :         av1_split_flag_rate(
    3490             :             sequence_control_set_ptr,
    3491             :             context_ptr,
    3492      288051 :             &context_ptr->md_cu_arr_nsq[above_depth_mds],
    3493             :             0,
    3494             :             PARTITION_SPLIT,
    3495             :             &above_split_rate,
    3496      288051 :             context_ptr->full_lambda,
    3497             :             context_ptr->md_rate_estimation_ptr,
    3498      288051 :             sequence_control_set_ptr->max_sb_depth);
    3499             :     }
    3500             :     else
    3501       11922 :         *above_depth_cost = MAX_MODE_COST;
    3502             : 
    3503             : 
    3504      299953 :     *curr_depth_cost +=
    3505             :         above_split_rate;
    3506      299953 : }

Generated by: LCOV version 1.14