LCOV - code coverage report
Current view: top level - Codec - EbCodingLoop.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1004 1617 62.1 %
Date: 2019-11-25 17:38:06 Functions: 10 13 76.9 %

          Line data    Source code
       1             : /*
       2             : * Copyright(c) 2019 Intel Corporation
       3             : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
       4             : */
       5             : 
       6             : /*
       7             : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       8             : *
       9             : * This source code is subject to the terms of the BSD 2 Clause License and
      10             : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      11             : * was not distributed with this source code in the LICENSE file, you can
      12             : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      13             : * Media Patent License 1.0 was not distributed with this source code in the
      14             : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      15             : */
      16             : #include <string.h>
      17             : 
      18             : #include "EbDefinitions.h"
      19             : #include "EbUtility.h"
      20             : #include "EbTransformUnit.h"
      21             : #include "EbRateDistortionCost.h"
      22             : #include "EbDeblockingFilter.h"
      23             : #include "EbPictureOperators.h"
      24             : 
      25             : #include "EbSegmentation.h"
      26             : #include "EbModeDecisionProcess.h"
      27             : #include "EbEncDecProcess.h"
      28             : #include "EbSvtAv1ErrorCodes.h"
      29             : #include "EbTransforms.h"
      30             : #include "EbModeDecisionConfigurationProcess.h"
      31             : #include "EbIntraPrediction.h"
      32             : #include "aom_dsp_rtcd.h"
      33             : #include "EbCodingLoop.h"
      34             : 
      35             : void av1_set_ref_frame(MvReferenceFrame *rf,
      36             :     int8_t ref_frame_type);
      37             : 
      38             : /*******************************************
      39             : * set Penalize Skip Flag
      40             : *
      41             : * Summary: Set the penalize_skipflag to true
      42             : * When there is luminance/chrominance change
      43             : * or in noisy clip with low motion at meduim
      44             : * varince area
      45             : *
      46             : *******************************************/
      47             : 
      48             : #define S32 32*32
      49             : #define S16 16*16
      50             : #define S8  8*8
      51             : #define S4  4*4
      52             : 
      53             : static EB_AV1_INTER_PREDICTION_FUNC_PTR   av1_inter_prediction_function_table[2] =
      54             : {
      55             :     av1_inter_prediction,
      56             :     av1_inter_prediction_hbd
      57             : };
      58             : 
      59             : typedef void(*EB_AV1_ENCODE_LOOP_FUNC_PTR)(
      60             :     PictureControlSet    *picture_control_set_ptr,
      61             :     EncDecContext       *context_ptr,
      62             :     LargestCodingUnit   *sb_ptr,
      63             :     uint32_t                 origin_x,
      64             :     uint32_t                 origin_y,
      65             :     uint32_t                 cb_qp,
      66             :     EbPictureBufferDesc *predSamples,             // no basis/offset
      67             :     EbPictureBufferDesc *coeffSamplesTB,          // lcu based
      68             :     EbPictureBufferDesc *residual16bit,           // no basis/offset
      69             :     EbPictureBufferDesc *transform16bit,          // no basis/offset
      70             :     EbPictureBufferDesc *inverse_quant_buffer,
      71             :     int16_t                *transformScratchBuffer,
      72             :     uint32_t                  *count_non_zero_coeffs,
      73             :     uint32_t                 component_mask,
      74             :     uint32_t                 dZoffset,
      75             :     uint16_t                 *eob,
      76             :     MacroblockPlane       *candidate_plane);
      77             : 
      78             : typedef void(*EB_AV1_GENERATE_RECON_FUNC_PTR)(
      79             :     EncDecContext       *context_ptr,
      80             :     uint32_t                 origin_x,
      81             :     uint32_t                 origin_y,
      82             :     EbPictureBufferDesc *predSamples,     // no basis/offset
      83             :     EbPictureBufferDesc *residual16bit,    // no basis/offset
      84             :     int16_t                *transformScratchBuffer,
      85             :     uint32_t                 component_mask,
      86             :     uint16_t                *eob);
      87             : 
      88             : 
      89             : /*******************************************
      90             : * Residual Kernel 8-16bit
      91             :     Computes the residual data
      92             : *******************************************/
      93    62253500 : void residual_kernel(
      94             :     uint8_t   *input,
      95             :     uint32_t   input_offset,
      96             :     uint32_t   input_stride,
      97             :     uint8_t   *pred,
      98             :     uint32_t   pred_offset,
      99             :     uint32_t   pred_stride,
     100             :     int16_t   *residual,
     101             :     uint32_t   residual_offset,
     102             :     uint32_t   residual_stride,
     103             :     EbBool     hbd,
     104             :     uint32_t   area_width,
     105             :     uint32_t   area_height)
     106             : {
     107             : 
     108    62253500 :     if (hbd) {
     109           0 :         residual_kernel16bit(
     110           0 :             ((uint16_t*)input) + input_offset,
     111             :             input_stride,
     112           0 :             ((uint16_t*)pred) + pred_offset,
     113             :             pred_stride,
     114           0 :             residual + residual_offset,
     115             :             residual_stride,
     116             :             area_width,
     117             :             area_height);
     118             :     } else {
     119    62253500 :         residual_kernel8bit(
     120             :             &(input[input_offset]),
     121             :             input_stride,
     122             :             &(pred[pred_offset]),
     123             :             pred_stride,
     124    62253500 :             residual + residual_offset,
     125             :             residual_stride,
     126             :             area_width,
     127             :             area_height);
     128             :     }
     129    62242200 : }
     130             : 
     131             : /***************************************************
     132             : * Update Intra Mode Neighbor Arrays
     133             : ***************************************************/
     134        6418 : static void EncodePassUpdateIntraModeNeighborArrays(
     135             :     NeighborArrayUnit *mode_type_neighbor_array,
     136             :     NeighborArrayUnit *intra_luma_mode_neighbor_array,
     137             :     NeighborArrayUnit *intra_chroma_mode_neighbor_array,
     138             :     uint8_t            luma_mode,
     139             :     uint8_t            chroma_mode,
     140             :     uint32_t           origin_x,
     141             :     uint32_t           origin_y,
     142             :     uint32_t           width,
     143             :     uint32_t           height,
     144             :     uint32_t           width_uv,
     145             :     uint32_t           height_uv,
     146             :     uint32_t           component_mask)
     147             : {
     148        6418 :     uint8_t modeType = INTRA_MODE;
     149             : 
     150        6418 :     if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) {
     151             :         // Mode Type Update
     152        6418 :         neighbor_array_unit_mode_write(
     153             :             mode_type_neighbor_array,
     154             :             &modeType,
     155             :             origin_x,
     156             :             origin_y,
     157             :             width,
     158             :             height,
     159             :             NEIGHBOR_ARRAY_UNIT_FULL_MASK);
     160             : 
     161             :         // Intra Luma Mode Update
     162        6418 :         neighbor_array_unit_mode_write(
     163             :             intra_luma_mode_neighbor_array,
     164             :             &luma_mode,
     165             :             origin_x,
     166             :             origin_y,
     167             :             width,
     168             :             height,
     169             :             NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
     170             :     }
     171        6418 :     if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
     172             :         // Intra Luma Mode Update
     173        5347 :         neighbor_array_unit_mode_write(
     174             :             intra_chroma_mode_neighbor_array,
     175             :             &chroma_mode,
     176        5347 :             ((origin_x >> 3) << 3) / 2,
     177        5347 :             ((origin_y >> 3) << 3) / 2,
     178             :             width_uv,
     179             :             height_uv,
     180             :             NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
     181             :     }
     182             : 
     183        6418 :     return;
     184             : }
     185             : 
     186             : /***************************************************
     187             : * Update Inter Mode Neighbor Arrays
     188             : ***************************************************/
     189       27426 : static void EncodePassUpdateInterModeNeighborArrays(
     190             :     NeighborArrayUnit *mode_type_neighbor_array,
     191             :     NeighborArrayUnit *mv_neighbor_array,
     192             :     NeighborArrayUnit *skipNeighborArray,
     193             :     MvUnit            *mv_unit,
     194             :     uint8_t           *skip_flag,
     195             :     uint32_t           origin_x,
     196             :     uint32_t           origin_y,
     197             :     uint32_t           bwidth,
     198             :     uint32_t           bheight)
     199             : {
     200       27426 :     uint8_t modeType = INTER_MODE;
     201             : 
     202             :     // Mode Type Update
     203       27426 :     neighbor_array_unit_mode_write(
     204             :         mode_type_neighbor_array,
     205             :         &modeType,
     206             :         origin_x,
     207             :         origin_y,
     208             :         bwidth,
     209             :         bheight,
     210             :         NEIGHBOR_ARRAY_UNIT_FULL_MASK);
     211             : 
     212             :     // Motion Vector Unit
     213       27424 :     neighbor_array_unit_mode_write(
     214             :         mv_neighbor_array,
     215             :         (uint8_t*)mv_unit,
     216             :         origin_x,
     217             :         origin_y,
     218             :         bwidth,
     219             :         bheight,
     220             :         NEIGHBOR_ARRAY_UNIT_FULL_MASK);
     221             : 
     222             :     // Skip Flag
     223       27426 :     neighbor_array_unit_mode_write(
     224             :         skipNeighborArray,
     225             :         skip_flag,
     226             :         origin_x,
     227             :         origin_y,
     228             :         bwidth,
     229             :         bheight,
     230             :         NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
     231             : 
     232       27426 :     return;
     233             : }
     234             : 
     235             : /***************************************************
     236             : * Update Recon Samples Neighbor Arrays
     237             : ***************************************************/
     238       41783 : static void EncodePassUpdateReconSampleNeighborArrays(
     239             :     NeighborArrayUnit     *lumaReconSampleNeighborArray,
     240             :     NeighborArrayUnit     *cbReconSampleNeighborArray,
     241             :     NeighborArrayUnit     *crReconSampleNeighborArray,
     242             :     EbPictureBufferDesc   *recon_buffer,
     243             :     uint32_t                   origin_x,
     244             :     uint32_t                   origin_y,
     245             :     uint32_t                   width,
     246             :     uint32_t                   height,
     247             :     uint32_t                   bwidth_uv,
     248             :     uint32_t                   bheight_uv,
     249             :     uint32_t                   component_mask,
     250             :     EbBool                  is16bit)
     251             : {
     252       41783 :     uint32_t                 round_origin_x = (origin_x >> 3) << 3;// for Chroma blocks with size of 4
     253       41783 :     uint32_t                 round_origin_y = (origin_y >> 3) << 3;// for Chroma blocks with size of 4
     254             : 
     255       41783 :     if (is16bit == EB_TRUE) {
     256           0 :         if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK)
     257             :         {
     258             :             // Recon Samples - Luma
     259           0 :             neighbor_array_unit16bit_sample_write(
     260             :                 lumaReconSampleNeighborArray,
     261           0 :                 (uint16_t*)(recon_buffer->buffer_y),
     262           0 :                 recon_buffer->stride_y,
     263           0 :                 recon_buffer->origin_x + origin_x,
     264           0 :                 recon_buffer->origin_y + origin_y,
     265             :                 origin_x,
     266             :                 origin_y,
     267             :                 width,
     268             :                 height,
     269             :                 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
     270             :         }
     271             : 
     272           0 :         if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
     273             :             // Recon Samples - Cb
     274           0 :             neighbor_array_unit16bit_sample_write(
     275             :                 cbReconSampleNeighborArray,
     276           0 :                 (uint16_t*)(recon_buffer->buffer_cb),
     277           0 :                 recon_buffer->stride_cb,
     278           0 :                 (recon_buffer->origin_x + round_origin_x) >> 1,
     279           0 :                 (recon_buffer->origin_y + round_origin_y) >> 1,
     280             :                 round_origin_x >> 1,
     281             :                 round_origin_y >> 1,
     282             :                 bwidth_uv,
     283             :                 bheight_uv,
     284             :                 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
     285             : 
     286             :             // Recon Samples - Cr
     287           0 :             neighbor_array_unit16bit_sample_write(
     288             :                 crReconSampleNeighborArray,
     289           0 :                 (uint16_t*)(recon_buffer->buffer_cr),
     290           0 :                 recon_buffer->stride_cr,
     291           0 :                 (recon_buffer->origin_x + round_origin_x) >> 1,
     292           0 :                 (recon_buffer->origin_y + round_origin_y) >> 1,
     293             :                 round_origin_x >> 1,
     294             :                 round_origin_y >> 1,
     295             :                 bwidth_uv,
     296             :                 bheight_uv,
     297             :                 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
     298             :         }
     299             :     }
     300             :     else {
     301       41783 :         if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK)
     302             :         {
     303             :             // Recon Samples - Luma
     304       36437 :             neighbor_array_unit_sample_write(
     305             :                 lumaReconSampleNeighborArray,
     306             :                 recon_buffer->buffer_y,
     307       36437 :                 recon_buffer->stride_y,
     308       36437 :                 recon_buffer->origin_x + origin_x,
     309       36437 :                 recon_buffer->origin_y + origin_y,
     310             :                 origin_x,
     311             :                 origin_y,
     312             :                 width,
     313             :                 height,
     314             :                 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
     315             :         }
     316             : 
     317       41786 :         if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
     318             :             // Recon Samples - Cb
     319       32502 :             neighbor_array_unit_sample_write(
     320             :                 cbReconSampleNeighborArray,
     321             :                 recon_buffer->buffer_cb,
     322       32502 :                 recon_buffer->stride_cb,
     323       32502 :                 (recon_buffer->origin_x + round_origin_x) >> 1,
     324       32502 :                 (recon_buffer->origin_y + round_origin_y) >> 1,
     325             :                 round_origin_x >> 1,
     326             :                 round_origin_y >> 1,
     327             :                 bwidth_uv,
     328             :                 bheight_uv,
     329             :                 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
     330             : 
     331             :             // Recon Samples - Cr
     332       32499 :             neighbor_array_unit_sample_write(
     333             :                 crReconSampleNeighborArray,
     334             :                 recon_buffer->buffer_cr,
     335       32499 :                 recon_buffer->stride_cr,
     336       32499 :                 (recon_buffer->origin_x + round_origin_x) >> 1,
     337       32499 :                 (recon_buffer->origin_y + round_origin_y) >> 1,
     338             :                 round_origin_x >> 1,
     339             :                 round_origin_y >> 1,
     340             :                 bwidth_uv,
     341             :                 bheight_uv,
     342             :                 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
     343             :         }
     344             :     }
     345             : 
     346       41785 :     return;
     347             : }
     348             : 
     349             : /************************************************************
     350             : * Update Intra Luma Neighbor Modes
     351             : ************************************************************/
     352        6418 : void GeneratePuIntraLumaNeighborModes(
     353             :     CodingUnit            *cu_ptr,
     354             :     uint32_t                   pu_origin_x,
     355             :     uint32_t                   pu_origin_y,
     356             :     uint32_t                   sb_sz,
     357             :     NeighborArrayUnit     *intraLumaNeighborArray,
     358             :     NeighborArrayUnit     *intraChromaNeighborArray,
     359             :     NeighborArrayUnit     *mode_type_neighbor_array)
     360             : {
     361             :     (void)sb_sz;
     362             : 
     363        6418 :     uint32_t modeTypeLeftNeighborIndex = get_neighbor_array_unit_left_index(
     364             :         mode_type_neighbor_array,
     365             :         pu_origin_y);
     366        6418 :     uint32_t modeTypeTopNeighborIndex = get_neighbor_array_unit_top_index(
     367             :         mode_type_neighbor_array,
     368             :         pu_origin_x);
     369        6418 :     uint32_t intraLumaModeLeftNeighborIndex = get_neighbor_array_unit_left_index(
     370             :         intraLumaNeighborArray,
     371             :         pu_origin_y);
     372        6418 :     uint32_t intraLumaModeTopNeighborIndex = get_neighbor_array_unit_top_index(
     373             :         intraLumaNeighborArray,
     374             :         pu_origin_x);
     375             : 
     376        6417 :     uint32_t puOriginX_round = (pu_origin_x >> 3) << 3;
     377        6417 :     uint32_t puOriginY_round = (pu_origin_y >> 3) << 3;
     378             : 
     379        6417 :     uint32_t intraChromaModeLeftNeighborIndex = get_neighbor_array_unit_left_index(
     380             :         intraChromaNeighborArray,
     381             :         puOriginY_round >> 1);
     382        6417 :     uint32_t intraChromaModeTopNeighborIndex = get_neighbor_array_unit_top_index(
     383             :         intraChromaNeighborArray,
     384             :         puOriginX_round >> 1);
     385             : 
     386         671 :     (&cu_ptr->prediction_unit_array[0])->intra_luma_left_mode = (uint32_t)(
     387        6417 :         (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] != INTRA_MODE) ? DC_PRED/*EB_INTRA_DC*/ :
     388        5746 :         (uint32_t)intraLumaNeighborArray->left_array[intraLumaModeLeftNeighborIndex]);
     389             : 
     390         689 :     (&cu_ptr->prediction_unit_array[0])->intra_luma_top_mode = (uint32_t)(
     391        6417 :         (mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] != INTRA_MODE) ? DC_PRED/*EB_INTRA_DC*/ :
     392        5728 :         (uint32_t)intraLumaNeighborArray->top_array[intraLumaModeTopNeighborIndex]);       //   use DC. This seems like we could use a LCU-width
     393             : 
     394        6417 :     uint32_t modeTypeLeftNeighborIndex_round = get_neighbor_array_unit_left_index(
     395             :         mode_type_neighbor_array,
     396             :         puOriginY_round);
     397        6417 :     uint32_t modeTypeTopNeighborIndex_round = get_neighbor_array_unit_top_index(
     398             :         mode_type_neighbor_array,
     399             :         puOriginX_round);
     400             : 
     401         639 :     (&cu_ptr->prediction_unit_array[0])->intra_chroma_left_mode = (uint32_t)(
     402        6417 :         (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex_round] != INTRA_MODE) ? UV_DC_PRED :
     403        5778 :         (uint32_t)intraChromaNeighborArray->left_array[intraChromaModeLeftNeighborIndex]);
     404             : 
     405         656 :     (&cu_ptr->prediction_unit_array[0])->intra_chroma_top_mode = (uint32_t)(
     406        6417 :         (mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex_round] != INTRA_MODE) ? UV_DC_PRED :
     407        5761 :         (uint32_t)intraChromaNeighborArray->top_array[intraChromaModeTopNeighborIndex]);       //   use DC. This seems like we could use a LCU-width
     408             : 
     409        6417 :     return;
     410             : }
     411             : 
     412             : void encode_pass_tx_search(
     413             :     PictureControlSet            *picture_control_set_ptr,
     414             :     EncDecContext                *context_ptr,
     415             :     LargestCodingUnit            *sb_ptr,
     416             :     uint32_t                       cb_qp,
     417             :     EbPictureBufferDesc          *coeffSamplesTB,
     418             :     EbPictureBufferDesc          *residual16bit,
     419             :     EbPictureBufferDesc          *transform16bit,
     420             :     EbPictureBufferDesc          *inverse_quant_buffer,
     421             :     int16_t                        *transformScratchBuffer,
     422             :     uint32_t                       *count_non_zero_coeffs,
     423             :     uint32_t                       component_mask,
     424             :     uint32_t                       dZoffset,
     425             :     uint16_t                       *eob,
     426             :     MacroblockPlane                *candidate_plane);
     427             : 
     428             : /**********************************************************
     429             : * Encode Loop
     430             : *
     431             : * Summary: Performs an AV1 conformant
     432             : *   Transform, Quantization  and Inverse Quantization of a TU.
     433             : *
     434             : * Inputs:
     435             : *   origin_x
     436             : *   origin_y
     437             : *   txb_size
     438             : *   sb_sz
     439             : *   input - input samples (position sensitive)
     440             : *   pred - prediction samples (position independent)
     441             : *
     442             : * Outputs:
     443             : *   Inverse quantized coeff - quantization indices (position sensitive)
     444             : *
     445             : **********************************************************/
     446       39406 : static void Av1EncodeLoop(
     447             :     PictureControlSet    *picture_control_set_ptr,
     448             :     EncDecContext       *context_ptr,
     449             :     LargestCodingUnit   *sb_ptr,
     450             :     uint32_t                 origin_x,   //pic based tx org x
     451             :     uint32_t                 origin_y,   //pic based tx org y
     452             :     uint32_t                 cb_qp,
     453             :     EbPictureBufferDesc *predSamples,             // no basis/offset
     454             :     EbPictureBufferDesc *coeffSamplesTB,          // lcu based
     455             :     EbPictureBufferDesc *residual16bit,           // no basis/offset
     456             :     EbPictureBufferDesc *transform16bit,          // no basis/offset
     457             :     EbPictureBufferDesc *inverse_quant_buffer,
     458             :     int16_t                *transformScratchBuffer,
     459             :     uint32_t                  *count_non_zero_coeffs,
     460             :     uint32_t                 component_mask,
     461             :     uint32_t                 dZoffset,
     462             :     uint16_t                 *eob,
     463             :     MacroblockPlane       *candidate_plane){
     464             :     (void)dZoffset;
     465             :     (void)cb_qp;
     466             : 
     467             :     //    uint32_t                 chroma_qp = cb_qp;
     468       39406 :     CodingUnit          *cu_ptr = context_ptr->cu_ptr;
     469       39406 :     TransformUnit       *txb_ptr = &cu_ptr->transform_unit_array[context_ptr->txb_itr];
     470             :     //    EB_SLICE               slice_type = sb_ptr->picture_control_set_ptr->slice_type;
     471             :     //    uint32_t                 temporal_layer_index = sb_ptr->picture_control_set_ptr->temporal_layer_index;
     472       39406 :     uint32_t                 qp = cu_ptr->qp;
     473       39406 :     EbPictureBufferDesc  *input_samples = context_ptr->input_samples;
     474             : 
     475       39406 :     uint32_t                 round_origin_x = (origin_x >> 3) << 3;// for Chroma blocks with size of 4
     476       39406 :     uint32_t                 round_origin_y = (origin_y >> 3) << 3;// for Chroma blocks with size of 4
     477             : 
     478       39406 :     const uint32_t input_luma_offset = ((origin_y + input_samples->origin_y)          * input_samples->stride_y) + (origin_x + input_samples->origin_x);
     479       39406 :     const uint32_t input_cb_offset = (((round_origin_y + input_samples->origin_y) >> 1)    * input_samples->stride_cb) + ((round_origin_x + input_samples->origin_x) >> 1);
     480       39406 :     const uint32_t input_cr_offset = (((round_origin_y + input_samples->origin_y) >> 1)    * input_samples->stride_cr) + ((round_origin_x + input_samples->origin_x) >> 1);
     481       39406 :     const uint32_t pred_luma_offset = ((predSamples->origin_y + origin_y)        * predSamples->stride_y) + (predSamples->origin_x + origin_x);
     482       39406 :     const uint32_t pred_cb_offset = (((predSamples->origin_y + round_origin_y) >> 1)  * predSamples->stride_cb) + ((predSamples->origin_x + round_origin_x) >> 1);
     483       39406 :     const uint32_t pred_cr_offset = (((predSamples->origin_y + round_origin_y) >> 1)  * predSamples->stride_cr) + ((predSamples->origin_x + round_origin_x) >> 1);
     484             : 
     485       39406 :     const uint32_t scratch_luma_offset = context_ptr->blk_geom->tx_org_x[cu_ptr->tx_depth][context_ptr->txb_itr] + context_ptr->blk_geom->tx_org_y[cu_ptr->tx_depth][context_ptr->txb_itr] * SB_STRIDE_Y;
     486       39406 :     const uint32_t scratch_cb_offset = ROUND_UV(context_ptr->blk_geom->tx_org_x[cu_ptr->tx_depth][context_ptr->txb_itr]) / 2 + ROUND_UV(context_ptr->blk_geom->tx_org_y[cu_ptr->tx_depth][context_ptr->txb_itr]) / 2 * SB_STRIDE_UV;
     487       39406 :     const uint32_t scratch_cr_offset = ROUND_UV(context_ptr->blk_geom->tx_org_x[cu_ptr->tx_depth][context_ptr->txb_itr]) / 2 + ROUND_UV(context_ptr->blk_geom->tx_org_y[cu_ptr->tx_depth][context_ptr->txb_itr]) / 2 * SB_STRIDE_UV;
     488             : 
     489       39406 :     const uint32_t coeff1dOffset = context_ptr->coded_area_sb;
     490             : 
     491       39406 :     const uint32_t coeff1dOffsetChroma = context_ptr->coded_area_sb_uv;
     492             :     UNUSED(coeff1dOffsetChroma);
     493             : 
     494       39406 :     context_ptr->three_quad_energy = 0;
     495             :     //**********************************
     496             :     // Luma
     497             :     //**********************************
     498       39406 :     if (component_mask == PICTURE_BUFFER_DESC_FULL_MASK || component_mask == PICTURE_BUFFER_DESC_LUMA_MASK)
     499             :     {
     500       34059 :         residual_kernel8bit(
     501       34059 :             input_samples->buffer_y + input_luma_offset,
     502       34059 :             input_samples->stride_y,
     503       34059 :             predSamples->buffer_y + pred_luma_offset,
     504       34059 :             predSamples->stride_y,
     505       34059 :             ((int16_t*)residual16bit->buffer_y) + scratch_luma_offset,
     506       34059 :             residual16bit->stride_y,
     507       34059 :             context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr],
     508       34059 :             context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr]);
     509       34061 :         uint8_t  tx_search_skip_flag = (picture_control_set_ptr->parent_pcs_ptr->tx_search_level == TX_SEARCH_ENC_DEC && (picture_control_set_ptr->parent_pcs_ptr->atb_mode == 0 || cu_ptr->prediction_mode_flag == INTER_MODE)) ? get_skip_tx_search_flag(
     510       16775 :             context_ptr->blk_geom->sq_size,
     511             :             MAX_MODE_COST,
     512             :             0,
     513             :             1) : 1;
     514             : 
     515       34061 :         if (!tx_search_skip_flag) {
     516       16775 :                 encode_pass_tx_search(
     517             :                     picture_control_set_ptr,
     518             :                     context_ptr,
     519             :                     sb_ptr,
     520             :                     cb_qp,
     521             :                     coeffSamplesTB,
     522             :                     residual16bit,
     523             :                     transform16bit,
     524             :                     inverse_quant_buffer,
     525             :                     transformScratchBuffer,
     526             :                     count_non_zero_coeffs,
     527             :                     component_mask,
     528             :                     dZoffset,
     529             :                     eob,
     530             :                     candidate_plane);
     531             :         }
     532             : 
     533       34061 :         av1_estimate_transform(
     534       34061 :             ((int16_t*)residual16bit->buffer_y) + scratch_luma_offset,
     535       34061 :             residual16bit->stride_y,
     536       34061 :             ((TranLow*)transform16bit->buffer_y) + coeff1dOffset,
     537             :             NOT_USED_VALUE,
     538       34061 :             context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
     539             :             &context_ptr->three_quad_energy,
     540             :             transformScratchBuffer,
     541             :             BIT_INCREMENT_8BIT,
     542       34061 :             txb_ptr->transform_type[PLANE_TYPE_Y],
     543             :             PLANE_TYPE_Y,
     544             :             DEFAULT_SHAPE);
     545             : 
     546       68126 :         int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
     547       34063 :                          picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
     548             : 
     549       68123 :         cu_ptr->quantized_dc[0][context_ptr->txb_itr] = av1_quantize_inv_quantize(
     550       34063 :             sb_ptr->picture_control_set_ptr,
     551             :             context_ptr->md_context,
     552       34063 :             ((TranLow*)transform16bit->buffer_y) + coeff1dOffset,
     553             :             NOT_USED_VALUE,
     554       34063 :             ((int32_t*)coeffSamplesTB->buffer_y) + coeff1dOffset,
     555       34063 :             ((int32_t*)inverse_quant_buffer->buffer_y) + coeff1dOffset,
     556             :             qp,
     557             :             seg_qp,
     558       34063 :             context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr],
     559       34063 :             context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr],
     560       34063 :             context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
     561             :             &eob[0],
     562             :             &(count_non_zero_coeffs[0]),
     563             :             COMPONENT_LUMA,
     564             :             BIT_INCREMENT_8BIT,
     565       34063 :             txb_ptr->transform_type[PLANE_TYPE_Y],
     566       34063 :             &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
     567       34063 :             context_ptr->md_context->luma_txb_skip_context,
     568       34063 :             context_ptr->md_context->luma_dc_sign_context,
     569       34063 :             cu_ptr->pred_mode,
     570       34063 :             cu_ptr->av1xd->use_intrabc,
     571             :             EB_TRUE);
     572             : 
     573       34060 :         if (context_ptr->md_skip_blk) {
     574        7720 :             count_non_zero_coeffs[0] = 0;
     575        7720 :             eob[0] = 0;
     576             :         }
     577       34060 :         txb_ptr->y_has_coeff = count_non_zero_coeffs[0] ? EB_TRUE : EB_FALSE;
     578             : 
     579       34060 :         if (count_non_zero_coeffs[0] == 0) {
     580             :             // INTER. Chroma follows Luma in transform type
     581       20096 :             if (cu_ptr->prediction_mode_flag == INTER_MODE) {
     582       17790 :                 txb_ptr->transform_type[PLANE_TYPE_Y] = DCT_DCT;
     583       17790 :                 txb_ptr->transform_type[PLANE_TYPE_UV] = DCT_DCT;
     584             :             }
     585             :             else { // INTRA
     586        2306 :                 txb_ptr->transform_type[PLANE_TYPE_Y] = DCT_DCT;
     587             :             }
     588             :         }
     589       34060 :         txb_ptr->nz_coef_count[0] = (uint16_t)count_non_zero_coeffs[0];
     590             :     }
     591             : 
     592       39407 :     if (component_mask == PICTURE_BUFFER_DESC_FULL_MASK || component_mask == PICTURE_BUFFER_DESC_CHROMA_MASK) {
     593       28333 :         if (cu_ptr->prediction_mode_flag == INTRA_MODE && (context_ptr->evaluate_cfl_ep || cu_ptr->prediction_unit_array->intra_chroma_mode == UV_CFL_PRED)) {
     594        3737 :             EbPictureBufferDesc *reconSamples = predSamples;
     595        3737 :             uint32_t reconLumaOffset = (reconSamples->origin_y + round_origin_y) * reconSamples->stride_y + (reconSamples->origin_x + round_origin_x);
     596             : 
     597             :             // Down sample Luma
     598       18685 :             cfl_luma_subsampling_420_lbd_c(
     599        3737 :                 reconSamples->buffer_y + reconLumaOffset,
     600        3737 :                 reconSamples->stride_y,
     601        3737 :                 context_ptr->md_context->pred_buf_q3,
     602        3737 :                 context_ptr->blk_geom->bwidth_uv == context_ptr->blk_geom->bwidth ? (context_ptr->blk_geom->bwidth_uv << 1) : context_ptr->blk_geom->bwidth,
     603        3737 :                 context_ptr->blk_geom->bheight_uv == context_ptr->blk_geom->bheight ? (context_ptr->blk_geom->bheight_uv << 1) : context_ptr->blk_geom->bheight);
     604        3737 :             int32_t round_offset = ((context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr])*(context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr])) / 2;
     605             : 
     606        7473 :             eb_subtract_average(
     607        3736 :                 context_ptr->md_context->pred_buf_q3,
     608        3736 :                 context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
     609        3736 :                 context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
     610             :                 round_offset,
     611        3737 :                 LOG2F(context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr]) + LOG2F(context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr]));
     612             : 
     613        3737 :             if (context_ptr->evaluate_cfl_ep)
     614             :             {
     615             :                 // 3: Loop over alphas and find the best or choose DC
     616             :                 // Use the 1st spot of the candidate buffer to hold cfl settings: (1) to use same kernel as MD for CFL evaluation: cfl_rd_pick_alpha() (toward unification), (2) to avoid dedicated buffers for CFL evaluation @ EP (toward less memory)
     617        2682 :                 ModeDecisionCandidateBuffer  *candidate_buffer = &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]);
     618             : 
     619             :                 // Input(s)
     620        2682 :                 candidate_buffer->candidate_ptr->type = INTRA_MODE;
     621        2682 :                 candidate_buffer->candidate_ptr->intra_luma_mode = cu_ptr->pred_mode;
     622        2682 :                 candidate_buffer->candidate_ptr->cfl_alpha_signs = 0;
     623        2682 :                 candidate_buffer->candidate_ptr->cfl_alpha_idx = 0;
     624        2682 :                 context_ptr->md_context->blk_geom = context_ptr->blk_geom;
     625             : 
     626             :                 EbByte src_pred_ptr;
     627             :                 EbByte dst_pred_ptr;
     628             : 
     629             :                 // Copy Cb pred samples from ep buffer to md buffer
     630        2682 :                 src_pred_ptr = predSamples->buffer_cb + pred_cb_offset;
     631        2682 :                 dst_pred_ptr = &(candidate_buffer->prediction_ptr->buffer_cb[scratch_cb_offset]);
     632       17294 :                 for (int i = 0; i < context_ptr->blk_geom->bheight_uv; i++) {
     633       14612 :                     memcpy(dst_pred_ptr, src_pred_ptr, context_ptr->blk_geom->bwidth_uv);
     634       14612 :                     src_pred_ptr += predSamples->stride_cb;
     635       14612 :                     dst_pred_ptr += candidate_buffer->prediction_ptr->stride_cb;
     636             :                 }
     637             : 
     638             :                 // Copy Cr pred samples from ep buffer to md buffer
     639        2682 :                 src_pred_ptr = predSamples->buffer_cr + pred_cr_offset;
     640        2682 :                 dst_pred_ptr = &(candidate_buffer->prediction_ptr->buffer_cr[scratch_cr_offset]);
     641       17294 :                 for (int i = 0; i < context_ptr->blk_geom->bheight_uv; i++) {
     642       14612 :                     memcpy(dst_pred_ptr, src_pred_ptr, context_ptr->blk_geom->bwidth_uv);
     643       14612 :                     src_pred_ptr += predSamples->stride_cr;
     644       14612 :                     dst_pred_ptr += candidate_buffer->prediction_ptr->stride_cr;
     645             :                 }
     646             : 
     647        2682 :                 cfl_rd_pick_alpha(
     648             :                     picture_control_set_ptr,
     649             :                     candidate_buffer,
     650             :                     sb_ptr,
     651             :                     context_ptr->md_context,
     652             :                     input_samples,
     653             :                     input_cb_offset,
     654             :                     scratch_cb_offset);
     655             : 
     656             :                 // Output(s)
     657        2682 :                 if (candidate_buffer->candidate_ptr->intra_chroma_mode == UV_CFL_PRED) {
     658        1003 :                     cu_ptr->prediction_unit_array->intra_chroma_mode = UV_CFL_PRED;
     659        1003 :                     cu_ptr->prediction_unit_array->cfl_alpha_idx = candidate_buffer->candidate_ptr->cfl_alpha_idx;
     660        1003 :                     cu_ptr->prediction_unit_array->cfl_alpha_signs = candidate_buffer->candidate_ptr->cfl_alpha_signs;
     661        1003 :                     cu_ptr->prediction_unit_array->is_directional_chroma_mode_flag = EB_FALSE;
     662             :                 }
     663             :             }
     664             : 
     665        3737 :             if (cu_ptr->prediction_unit_array->intra_chroma_mode == UV_CFL_PRED) {
     666             :                 int32_t alpha_q3 =
     667        2058 :                     cfl_idx_to_alpha(cu_ptr->prediction_unit_array->cfl_alpha_idx, cu_ptr->prediction_unit_array->cfl_alpha_signs, CFL_PRED_U); // once for U, once for V
     668             : 
     669             :                 //TOCHANGE
     670             :                 //assert(chroma_size * CFL_BUF_LINE + chroma_size <= CFL_BUF_SQUARE);
     671             : 
     672        2058 :                 eb_cfl_predict_lbd(
     673        2058 :                     context_ptr->md_context->pred_buf_q3,
     674        2058 :                     predSamples->buffer_cb + pred_cb_offset,
     675        2058 :                     predSamples->stride_cb,
     676        2058 :                     predSamples->buffer_cb + pred_cb_offset,
     677        2058 :                     predSamples->stride_cb,
     678             :                     alpha_q3,
     679             :                     8,
     680        2058 :                     context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
     681        2058 :                     context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr]);
     682             :                 alpha_q3 =
     683        2058 :                     cfl_idx_to_alpha(cu_ptr->prediction_unit_array->cfl_alpha_idx, cu_ptr->prediction_unit_array->cfl_alpha_signs, CFL_PRED_V); // once for U, once for V
     684             : 
     685             :                 //TOCHANGE
     686             :                 //assert(chroma_size * CFL_BUF_LINE + chroma_size <= CFL_BUF_SQUARE);
     687             : 
     688        2058 :                 eb_cfl_predict_lbd(
     689        2058 :                     context_ptr->md_context->pred_buf_q3,
     690        2058 :                     predSamples->buffer_cr + pred_cr_offset,
     691        2058 :                     predSamples->stride_cr,
     692        2058 :                     predSamples->buffer_cr + pred_cr_offset,
     693        2058 :                     predSamples->stride_cr,
     694             :                     alpha_q3,
     695             :                     8,
     696        2058 :                     context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
     697        2058 :                     context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr]);
     698             :             }
     699             :         }
     700             : 
     701             :         //**********************************
     702             :         // Cb
     703             :         //**********************************
     704             : 
     705       28333 :         residual_kernel8bit(
     706       28333 :             input_samples->buffer_cb + input_cb_offset,
     707       28333 :             input_samples->stride_cb,
     708       28333 :             predSamples->buffer_cb + pred_cb_offset,
     709       28333 :             predSamples->stride_cb,
     710       28333 :             ((int16_t*)residual16bit->buffer_cb) + scratch_cb_offset,
     711       28333 :             residual16bit->stride_cb,
     712       28333 :             context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
     713       28333 :             context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr]);
     714             : 
     715       28331 :         residual_kernel8bit(
     716       28331 :             input_samples->buffer_cr + input_cr_offset,
     717       28331 :             input_samples->stride_cr,
     718       28331 :             predSamples->buffer_cr + pred_cr_offset,
     719       28331 :             predSamples->stride_cr,
     720       28331 :             ((int16_t*)residual16bit->buffer_cr) + scratch_cr_offset,
     721       28331 :             residual16bit->stride_cr,
     722       28331 :             context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
     723       28331 :             context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr]);
     724             : 
     725       28335 :         av1_estimate_transform(
     726       28335 :             ((int16_t*)residual16bit->buffer_cb) + scratch_cb_offset,
     727       28335 :             residual16bit->stride_cb,
     728       28335 :             ((TranLow*)transform16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
     729             :             NOT_USED_VALUE,
     730       28335 :             context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
     731             :             &context_ptr->three_quad_energy,
     732             :             transformScratchBuffer,
     733             :             BIT_INCREMENT_8BIT,
     734       28335 :             txb_ptr->transform_type[PLANE_TYPE_UV],
     735             :             PLANE_TYPE_UV,
     736             :             DEFAULT_SHAPE);
     737             : 
     738       56670 :         int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
     739       28335 :                          picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
     740             : 
     741       56670 :         cu_ptr->quantized_dc[1][context_ptr->txb_itr] = av1_quantize_inv_quantize(
     742       28335 :             sb_ptr->picture_control_set_ptr,
     743             :             context_ptr->md_context,
     744       28335 :             ((TranLow*)transform16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
     745             :             NOT_USED_VALUE,
     746       28335 :             ((int32_t*)coeffSamplesTB->buffer_cb) + context_ptr->coded_area_sb_uv,
     747       28335 :             ((int32_t*)inverse_quant_buffer->buffer_cb) + context_ptr->coded_area_sb_uv,
     748             :             qp,
     749             :             seg_qp,
     750       28335 :             context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
     751       28335 :             context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
     752       28335 :             context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
     753             :             &eob[1],
     754             :             &(count_non_zero_coeffs[1]),
     755             :             COMPONENT_CHROMA_CB,
     756             :             BIT_INCREMENT_8BIT,
     757       28335 :             txb_ptr->transform_type[PLANE_TYPE_UV],
     758       28335 :             &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
     759       28335 :             context_ptr->md_context->cb_txb_skip_context,
     760       28335 :             context_ptr->md_context->cb_dc_sign_context,
     761       28335 :             cu_ptr->pred_mode,
     762       28335 :             cu_ptr->av1xd->use_intrabc,
     763             :             EB_TRUE);
     764             : 
     765       28335 :         if (context_ptr->md_skip_blk) {
     766        6743 :             count_non_zero_coeffs[1] = 0;
     767        6743 :             eob[1] = 0;
     768             :         }
     769       28335 :         txb_ptr->u_has_coeff = count_non_zero_coeffs[1] ? EB_TRUE : EB_FALSE;
     770             : 
     771             :         //**********************************
     772             :         // Cr
     773             :         //**********************************
     774             : 
     775       28335 :         av1_estimate_transform(
     776       28335 :             ((int16_t*)residual16bit->buffer_cr) + scratch_cb_offset,
     777       28335 :             residual16bit->stride_cr,
     778       28335 :             ((TranLow*)transform16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
     779             :             NOT_USED_VALUE,
     780       28335 :             context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
     781             :             &context_ptr->three_quad_energy,
     782             :             transformScratchBuffer,
     783             :             BIT_INCREMENT_8BIT,
     784       28335 :             txb_ptr->transform_type[PLANE_TYPE_UV],
     785             :             PLANE_TYPE_UV,
     786             :             DEFAULT_SHAPE);
     787       56669 :         cu_ptr->quantized_dc[2][context_ptr->txb_itr] = av1_quantize_inv_quantize(
     788       28335 :             sb_ptr->picture_control_set_ptr,
     789             :             context_ptr->md_context,
     790       28335 :             ((TranLow*)transform16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
     791             :             NOT_USED_VALUE,
     792       28335 :             ((int32_t*)coeffSamplesTB->buffer_cr) + context_ptr->coded_area_sb_uv,
     793       28335 :             ((TranLow*)inverse_quant_buffer->buffer_cr) + context_ptr->coded_area_sb_uv,
     794             :             qp,
     795             :             seg_qp,
     796       28335 :             context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
     797       28335 :             context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
     798       28335 :             context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
     799             :             &eob[2],
     800             :             &(count_non_zero_coeffs[2]),
     801             :             COMPONENT_CHROMA_CR,
     802             :             BIT_INCREMENT_8BIT,
     803       28335 :             txb_ptr->transform_type[PLANE_TYPE_UV],
     804       28335 :             &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
     805       28335 :             context_ptr->md_context->cr_txb_skip_context,
     806       28335 :             context_ptr->md_context->cr_dc_sign_context,
     807       28335 :             cu_ptr->pred_mode,
     808       28335 :             cu_ptr->av1xd->use_intrabc,
     809             :             EB_TRUE);
     810       28334 :         if (context_ptr->md_skip_blk) {
     811        6743 :             count_non_zero_coeffs[2] = 0;
     812        6743 :             eob[2] = 0;
     813             :         }
     814       28334 :         txb_ptr->v_has_coeff = count_non_zero_coeffs[2] ? EB_TRUE : EB_FALSE;
     815             : 
     816       28334 :         txb_ptr->nz_coef_count[1] = (uint16_t)count_non_zero_coeffs[1];
     817       28334 :         txb_ptr->nz_coef_count[2] = (uint16_t)count_non_zero_coeffs[2];
     818             :     }
     819       39408 :     return;
     820             : }
     821             : 
     822             : void encode_pass_tx_search_hbd(
     823             :     PictureControlSet            *picture_control_set_ptr,
     824             :     EncDecContext                *context_ptr,
     825             :     LargestCodingUnit            *sb_ptr,
     826             :     uint32_t                       cb_qp,
     827             :     EbPictureBufferDesc          *coeffSamplesTB,
     828             :     EbPictureBufferDesc          *residual16bit,
     829             :     EbPictureBufferDesc          *transform16bit,
     830             :     EbPictureBufferDesc          *inverse_quant_buffer,
     831             :     int16_t                        *transformScratchBuffer,
     832             :     uint32_t                       *count_non_zero_coeffs,
     833             :     uint32_t                       component_mask,
     834             :     uint32_t                       dZoffset,
     835             :     uint16_t                       *eob,
     836             :     MacroblockPlane                *candidate_plane);
     837             : 
     838             : /**********************************************************
     839             : * Encode Loop
     840             : *
     841             : * Summary: Performs an AV1 conformant
     842             : *   Transform, Quantization  and Inverse Quantization of a TU.
     843             : *
     844             : * Inputs:
     845             : *   origin_x
     846             : *   origin_y
     847             : *   txb_size
     848             : *   sb_sz
     849             : *   input - input samples (position sensitive)
     850             : *   pred - prediction samples (position independent)
     851             : *
     852             : * Outputs:
     853             : *   Inverse quantized coeff - quantization indices (position sensitive)
     854             : *
     855             : **********************************************************/
     856           0 : static void Av1EncodeLoop16bit(
     857             :     PictureControlSet    *picture_control_set_ptr,
     858             :     EncDecContext       *context_ptr,
     859             :     LargestCodingUnit   *sb_ptr,
     860             :     uint32_t                 origin_x,
     861             :     uint32_t                 origin_y,
     862             :     uint32_t                 cb_qp,
     863             :     EbPictureBufferDesc *predSamples,         // no basis/offset
     864             :     EbPictureBufferDesc *coeffSamplesTB,      // lcu based
     865             :     EbPictureBufferDesc *residual16bit,       // no basis/offset
     866             :     EbPictureBufferDesc *transform16bit,      // no basis/offset
     867             :     EbPictureBufferDesc *inverse_quant_buffer,
     868             :     int16_t                *transformScratchBuffer,
     869             :     uint32_t                  *count_non_zero_coeffs,
     870             :     uint32_t                 component_mask,
     871             :     uint32_t                 dZoffset,
     872             :     uint16_t                 *eob,
     873             :     MacroblockPlane       *candidate_plane)
     874             : 
     875             : {
     876             :     (void)dZoffset;
     877             :     (void)cb_qp;
     878             : 
     879           0 :     CodingUnit          *cu_ptr = context_ptr->cu_ptr;
     880           0 :     TransformUnit       *txb_ptr = &cu_ptr->transform_unit_array[context_ptr->txb_itr];
     881             :     //    EB_SLICE               slice_type = sb_ptr->picture_control_set_ptr->slice_type;
     882             :     //    uint32_t                 temporal_layer_index = sb_ptr->picture_control_set_ptr->temporal_layer_index;
     883           0 :     uint32_t                 qp = cu_ptr->qp;
     884             : 
     885           0 :     EbPictureBufferDesc *inputSamples16bit = context_ptr->input_sample16bit_buffer;
     886           0 :     EbPictureBufferDesc *predSamples16bit = predSamples;
     887           0 :     uint32_t round_origin_x = (origin_x >> 3) << 3;// for Chroma blocks with size of 4
     888           0 :     uint32_t round_origin_y = (origin_y >> 3) << 3;// for Chroma blocks with size of 4
     889           0 :     const uint32_t input_luma_offset = context_ptr->blk_geom->tx_org_x[cu_ptr->tx_depth][context_ptr->txb_itr] + context_ptr->blk_geom->tx_org_y[cu_ptr->tx_depth][context_ptr->txb_itr] * SB_STRIDE_Y;
     890           0 :     const uint32_t input_cb_offset = ROUND_UV(context_ptr->blk_geom->tx_org_x[cu_ptr->tx_depth][context_ptr->txb_itr]) / 2 + ROUND_UV(context_ptr->blk_geom->tx_org_y[cu_ptr->tx_depth][context_ptr->txb_itr]) / 2 * SB_STRIDE_UV;
     891           0 :     const uint32_t input_cr_offset = ROUND_UV(context_ptr->blk_geom->tx_org_x[cu_ptr->tx_depth][context_ptr->txb_itr]) / 2 + ROUND_UV(context_ptr->blk_geom->tx_org_y[cu_ptr->tx_depth][context_ptr->txb_itr]) / 2 * SB_STRIDE_UV;
     892           0 :     const uint32_t pred_luma_offset = ((predSamples16bit->origin_y + origin_y)        * predSamples16bit->stride_y) + (predSamples16bit->origin_x + origin_x);
     893           0 :     const uint32_t pred_cb_offset = (((predSamples16bit->origin_y + round_origin_y) >> 1)  * predSamples16bit->stride_cb) + ((predSamples16bit->origin_x + round_origin_x) >> 1);
     894           0 :     const uint32_t pred_cr_offset = (((predSamples16bit->origin_y + round_origin_y) >> 1)  * predSamples16bit->stride_cr) + ((predSamples16bit->origin_x + round_origin_x) >> 1);
     895           0 :     const uint32_t scratch_luma_offset = context_ptr->blk_geom->origin_x + context_ptr->blk_geom->origin_y * SB_STRIDE_Y;
     896           0 :     const uint32_t scratch_cb_offset = ROUND_UV(context_ptr->blk_geom->origin_x) / 2 + ROUND_UV(context_ptr->blk_geom->origin_y) / 2 * SB_STRIDE_UV;
     897           0 :     const uint32_t scratch_cr_offset = ROUND_UV(context_ptr->blk_geom->origin_x) / 2 + ROUND_UV(context_ptr->blk_geom->origin_y) / 2 * SB_STRIDE_UV;
     898           0 :     const uint32_t coeff1dOffset = context_ptr->coded_area_sb;
     899           0 :     const uint32_t coeff1dOffsetChroma = context_ptr->coded_area_sb_uv;
     900             :     UNUSED(coeff1dOffsetChroma);
     901             : 
     902             :     {
     903             :         //**********************************
     904             :         // Luma
     905             :         //**********************************
     906           0 :         if (component_mask == PICTURE_BUFFER_DESC_FULL_MASK || component_mask == PICTURE_BUFFER_DESC_LUMA_MASK) {
     907           0 :             residual_kernel16bit(
     908           0 :                 ((uint16_t*)inputSamples16bit->buffer_y) + input_luma_offset,
     909           0 :                 inputSamples16bit->stride_y,
     910           0 :                 ((uint16_t*)predSamples16bit->buffer_y) + pred_luma_offset,
     911           0 :                 predSamples16bit->stride_y,
     912           0 :                 ((int16_t*)residual16bit->buffer_y) + scratch_luma_offset,
     913           0 :                 residual16bit->stride_y,
     914           0 :                 context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr],
     915           0 :                 context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr]);
     916           0 :             uint8_t  tx_search_skip_flag = (picture_control_set_ptr->parent_pcs_ptr->tx_search_level == TX_SEARCH_ENC_DEC && (picture_control_set_ptr->parent_pcs_ptr->atb_mode == 0 || cu_ptr->prediction_mode_flag == INTER_MODE)) ? get_skip_tx_search_flag(
     917           0 :                 context_ptr->blk_geom->sq_size,
     918             :                 MAX_MODE_COST,
     919             :                 0,
     920             :                 1) : 1;
     921             : 
     922           0 :             if (!tx_search_skip_flag) {
     923           0 :                     encode_pass_tx_search_hbd(
     924             :                         picture_control_set_ptr,
     925             :                         context_ptr,
     926             :                         sb_ptr,
     927             :                         cb_qp,
     928             :                         coeffSamplesTB,
     929             :                         residual16bit,
     930             :                         transform16bit,
     931             :                         inverse_quant_buffer,
     932             :                         transformScratchBuffer,
     933             :                         count_non_zero_coeffs,
     934             :                         component_mask,
     935             :                         dZoffset,
     936             :                         eob,
     937             :                         candidate_plane);
     938             :             }
     939             : 
     940           0 :             av1_estimate_transform(
     941           0 :                 ((int16_t*)residual16bit->buffer_y) + scratch_luma_offset,
     942           0 :                 residual16bit->stride_y,
     943           0 :                 ((TranLow*)transform16bit->buffer_y) + coeff1dOffset,
     944             :                 NOT_USED_VALUE,
     945           0 :                 context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
     946             :                 &context_ptr->three_quad_energy,
     947             :                 transformScratchBuffer,
     948             :                 BIT_INCREMENT_10BIT,
     949           0 :                 txb_ptr->transform_type[PLANE_TYPE_Y],
     950             :                 PLANE_TYPE_Y,
     951             :                 DEFAULT_SHAPE);
     952             : 
     953           0 :             int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
     954           0 :                              picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
     955           0 :             cu_ptr->quantized_dc[0][context_ptr->txb_itr] = av1_quantize_inv_quantize(
     956           0 :                 sb_ptr->picture_control_set_ptr,
     957             :                 context_ptr->md_context,
     958           0 :                 ((int32_t*)transform16bit->buffer_y) + coeff1dOffset,
     959             :                 NOT_USED_VALUE,
     960           0 :                 ((int32_t*)coeffSamplesTB->buffer_y) + coeff1dOffset,
     961           0 :                 ((int32_t*)inverse_quant_buffer->buffer_y) + coeff1dOffset,
     962             :                 qp,
     963             :                 seg_qp,
     964           0 :                 context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr],
     965           0 :                 context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr],
     966           0 :                 context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
     967             :                 &eob[0],
     968             :                 &(count_non_zero_coeffs[0]),
     969             :                 COMPONENT_LUMA,
     970             :                 BIT_INCREMENT_10BIT,
     971           0 :                 txb_ptr->transform_type[PLANE_TYPE_Y],
     972           0 :                 &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
     973           0 :                 context_ptr->md_context->luma_txb_skip_context,
     974           0 :                 context_ptr->md_context->luma_dc_sign_context,
     975           0 :                 cu_ptr->pred_mode,
     976           0 :                 cu_ptr->av1xd->use_intrabc,
     977             :                 EB_TRUE);
     978           0 :             if (context_ptr->md_skip_blk) {
     979           0 :                 count_non_zero_coeffs[0] = 0;
     980           0 :                 eob[0] = 0;
     981             :             }
     982           0 :             txb_ptr->y_has_coeff = count_non_zero_coeffs[0] ? EB_TRUE : EB_FALSE;
     983           0 :             if (count_non_zero_coeffs[0] == 0) {
     984             :                 // INTER. Chroma follows Luma in transform type
     985           0 :                 if (cu_ptr->prediction_mode_flag == INTER_MODE) {
     986           0 :                     txb_ptr->transform_type[PLANE_TYPE_Y] = DCT_DCT;
     987           0 :                     txb_ptr->transform_type[PLANE_TYPE_UV] = DCT_DCT;
     988             :                 }
     989             :                 else { // INTRA
     990           0 :                     txb_ptr->transform_type[PLANE_TYPE_Y] = DCT_DCT;
     991             :                 }
     992             :             }
     993             : 
     994           0 :             txb_ptr->nz_coef_count[0] = (uint16_t)count_non_zero_coeffs[0];
     995             :         }
     996             : 
     997           0 :         if (cu_ptr->prediction_mode_flag == INTRA_MODE && cu_ptr->prediction_unit_array->intra_chroma_mode == UV_CFL_PRED) {
     998           0 :             EbPictureBufferDesc *reconSamples = predSamples16bit;
     999             : #if ATB_10_BIT
    1000             : 
    1001           0 :             uint32_t reconLumaOffset = (reconSamples->origin_y + round_origin_y) * reconSamples->stride_y + (reconSamples->origin_x + round_origin_x);
    1002             : #else
    1003             :             uint32_t reconLumaOffset = (reconSamples->origin_y + origin_y)            * reconSamples->stride_y + (reconSamples->origin_x + origin_x);
    1004             :             if (txb_ptr->y_has_coeff == EB_TRUE && cu_ptr->skip_flag == EB_FALSE) {
    1005             :                 uint16_t     *predBuffer = ((uint16_t*)predSamples16bit->buffer_y) + pred_luma_offset;
    1006             :                 av1_inv_transform_recon(
    1007             :                     ((int32_t*)inverse_quant_buffer->buffer_y) + coeff1dOffset,
    1008             :                     CONVERT_TO_BYTEPTR(predBuffer), predSamples->stride_y,
    1009             :                     CONVERT_TO_BYTEPTR(predBuffer), predSamples->stride_y,
    1010             :                     context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    1011             :                     BIT_INCREMENT_10BIT,
    1012             :                     txb_ptr->transform_type[PLANE_TYPE_Y],
    1013             :                     PLANE_TYPE_Y,
    1014             :                     eob[0], 0 /*lossless*/);
    1015             :             }
    1016             :             if (context_ptr->blk_geom->has_uv) {
    1017             :                 reconLumaOffset = (reconSamples->origin_y + round_origin_y)            * reconSamples->stride_y + (reconSamples->origin_x + round_origin_x);
    1018             : #endif
    1019             :             // Down sample Luma
    1020           0 :             cfl_luma_subsampling_420_hbd_c(
    1021           0 :                 ((uint16_t*)reconSamples->buffer_y) + reconLumaOffset,
    1022           0 :                 reconSamples->stride_y,
    1023           0 :                 context_ptr->md_context->pred_buf_q3,
    1024           0 :                 context_ptr->blk_geom->bwidth_uv == context_ptr->blk_geom->bwidth ? (context_ptr->blk_geom->bwidth_uv << 1) : context_ptr->blk_geom->bwidth,
    1025           0 :                 context_ptr->blk_geom->bheight_uv == context_ptr->blk_geom->bheight ? (context_ptr->blk_geom->bheight_uv << 1) : context_ptr->blk_geom->bheight);
    1026           0 :             int32_t round_offset = ((context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr])*(context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr])) / 2;
    1027             : 
    1028           0 :             eb_subtract_average(
    1029           0 :                 context_ptr->md_context->pred_buf_q3,
    1030           0 :                 context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1031           0 :                 context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1032             :                 round_offset,
    1033           0 :                 LOG2F(context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr]) + LOG2F(context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr]));
    1034             : 
    1035             :             int32_t alpha_q3 =
    1036           0 :                 cfl_idx_to_alpha(cu_ptr->prediction_unit_array->cfl_alpha_idx, cu_ptr->prediction_unit_array->cfl_alpha_signs, CFL_PRED_U); // once for U, once for V
    1037             :             // TOCHANGE
    1038             :             // assert(chroma_size * CFL_BUF_LINE + chroma_size <=                CFL_BUF_SQUARE);
    1039             : 
    1040           0 :             eb_cfl_predict_hbd(
    1041           0 :                 context_ptr->md_context->pred_buf_q3,
    1042           0 :                 ((uint16_t*)predSamples16bit->buffer_cb) + pred_cb_offset,
    1043           0 :                 predSamples16bit->stride_cb,
    1044           0 :                 ((uint16_t*)predSamples16bit->buffer_cb) + pred_cb_offset,
    1045           0 :                 predSamples16bit->stride_cb,
    1046             :                 alpha_q3,
    1047             :                 10,
    1048           0 :                 context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1049           0 :                 context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr]);
    1050             : 
    1051             :             alpha_q3 =
    1052           0 :                 cfl_idx_to_alpha(cu_ptr->prediction_unit_array->cfl_alpha_idx, cu_ptr->prediction_unit_array->cfl_alpha_signs, CFL_PRED_V); // once for U, once for V
    1053             :             // TOCHANGE
    1054             :             //assert(chroma_size * CFL_BUF_LINE + chroma_size <=                CFL_BUF_SQUARE);
    1055             : 
    1056           0 :             eb_cfl_predict_hbd(
    1057           0 :                 context_ptr->md_context->pred_buf_q3,
    1058           0 :                 ((uint16_t*)predSamples16bit->buffer_cr) + pred_cr_offset,
    1059           0 :                 predSamples16bit->stride_cr,
    1060           0 :                 ((uint16_t*)predSamples16bit->buffer_cr) + pred_cr_offset,
    1061           0 :                 predSamples16bit->stride_cr,
    1062             :                 alpha_q3,
    1063             :                 10,
    1064           0 :                 context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1065           0 :                 context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr]);
    1066             : #if !ATB_10_BIT
    1067             :         }
    1068             : #endif
    1069             :         }
    1070             : 
    1071           0 :         if (component_mask == PICTURE_BUFFER_DESC_FULL_MASK || component_mask == PICTURE_BUFFER_DESC_CHROMA_MASK) {
    1072             :             //**********************************
    1073             :             // Cb
    1074             :             //**********************************
    1075           0 :             residual_kernel16bit(
    1076           0 :                 ((uint16_t*)inputSamples16bit->buffer_cb) + input_cb_offset,
    1077           0 :                 inputSamples16bit->stride_cb,
    1078           0 :                 ((uint16_t*)predSamples16bit->buffer_cb) + pred_cb_offset,
    1079           0 :                 predSamples16bit->stride_cb,
    1080           0 :                 ((int16_t*)residual16bit->buffer_cb) + scratch_cb_offset,
    1081           0 :                 residual16bit->stride_cb,
    1082           0 :                 context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1083           0 :                 context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr]);
    1084             : 
    1085           0 :             residual_kernel16bit(
    1086           0 :                 ((uint16_t*)inputSamples16bit->buffer_cr) + input_cr_offset,
    1087           0 :                 inputSamples16bit->stride_cr,
    1088           0 :                 ((uint16_t*)predSamples16bit->buffer_cr) + pred_cr_offset,
    1089           0 :                 predSamples16bit->stride_cr,
    1090           0 :                 ((int16_t*)residual16bit->buffer_cr) + scratch_cr_offset,
    1091           0 :                 residual16bit->stride_cr,
    1092           0 :                 context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1093           0 :                 context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr]);
    1094             : 
    1095           0 :             av1_estimate_transform(
    1096           0 :                 ((int16_t*)residual16bit->buffer_cb) + scratch_cb_offset,
    1097           0 :                 residual16bit->stride_cb,
    1098           0 :                 ((TranLow*)transform16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
    1099             :                 NOT_USED_VALUE,
    1100           0 :                 context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1101             :                 &context_ptr->three_quad_energy,
    1102             :                 transformScratchBuffer,
    1103             :                 BIT_INCREMENT_10BIT,
    1104           0 :                 txb_ptr->transform_type[PLANE_TYPE_UV],
    1105             :                 PLANE_TYPE_UV,
    1106             :                 DEFAULT_SHAPE);
    1107           0 :             int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
    1108           0 :                              picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
    1109             : 
    1110           0 :             cu_ptr->quantized_dc[1][context_ptr->txb_itr] = av1_quantize_inv_quantize(
    1111           0 :                 sb_ptr->picture_control_set_ptr,
    1112             :                 context_ptr->md_context,
    1113           0 :                 ((int32_t*)transform16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
    1114             :                 NOT_USED_VALUE,
    1115           0 :                 ((int32_t*)coeffSamplesTB->buffer_cb) + context_ptr->coded_area_sb_uv,
    1116           0 :                 ((int32_t*)inverse_quant_buffer->buffer_cb) + context_ptr->coded_area_sb_uv,
    1117             :                 qp,
    1118             :                 seg_qp,
    1119           0 :                 context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1120           0 :                 context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1121           0 :                 context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1122             :                 &eob[1],
    1123             :                 &(count_non_zero_coeffs[1]),
    1124             :                 COMPONENT_CHROMA_CB,
    1125             :                 BIT_INCREMENT_10BIT,
    1126           0 :                 txb_ptr->transform_type[PLANE_TYPE_UV],
    1127           0 :                 &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
    1128           0 :                 context_ptr->md_context->cb_txb_skip_context,
    1129           0 :                 context_ptr->md_context->cb_dc_sign_context,
    1130           0 :                 cu_ptr->pred_mode,
    1131           0 :                 cu_ptr->av1xd->use_intrabc,
    1132             :                 EB_TRUE);
    1133             : 
    1134           0 :             if (context_ptr->md_skip_blk) {
    1135           0 :                 count_non_zero_coeffs[1] = 0;
    1136           0 :                 eob[1] = 0;
    1137             :             }
    1138           0 :             txb_ptr->u_has_coeff = count_non_zero_coeffs[1] ? EB_TRUE : EB_FALSE;
    1139             : 
    1140             :             //**********************************
    1141             :             // Cr
    1142             :             //**********************************
    1143             : 
    1144           0 :             av1_estimate_transform(
    1145           0 :                 ((int16_t*)residual16bit->buffer_cr) + scratch_cb_offset,
    1146           0 :                 residual16bit->stride_cr,
    1147           0 :                 ((TranLow*)transform16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
    1148             :                 NOT_USED_VALUE,
    1149           0 :                 context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1150             :                 &context_ptr->three_quad_energy,
    1151             :                 transformScratchBuffer,
    1152             :                 BIT_INCREMENT_10BIT,
    1153           0 :                 txb_ptr->transform_type[PLANE_TYPE_UV],
    1154             :                 PLANE_TYPE_UV,
    1155             :                 DEFAULT_SHAPE);
    1156             : 
    1157           0 :             cu_ptr->quantized_dc[2][context_ptr->txb_itr] = av1_quantize_inv_quantize(
    1158           0 :                 sb_ptr->picture_control_set_ptr,
    1159             :                 context_ptr->md_context,
    1160           0 :                 ((int32_t*)transform16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
    1161             :                 NOT_USED_VALUE,
    1162           0 :                 ((int32_t*)coeffSamplesTB->buffer_cr) + context_ptr->coded_area_sb_uv,
    1163           0 :                 ((int32_t*)inverse_quant_buffer->buffer_cr) + context_ptr->coded_area_sb_uv,
    1164             :                 qp,
    1165             :                 seg_qp,
    1166           0 :                 context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1167           0 :                 context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1168           0 :                 context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1169             :                 &eob[2],
    1170             :                 &(count_non_zero_coeffs[2]),
    1171             :                 COMPONENT_CHROMA_CR,
    1172             :                 BIT_INCREMENT_10BIT,
    1173           0 :                 txb_ptr->transform_type[PLANE_TYPE_UV],
    1174           0 :                 &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
    1175           0 :                 context_ptr->md_context->cr_txb_skip_context,
    1176           0 :                 context_ptr->md_context->cr_dc_sign_context,
    1177           0 :                 cu_ptr->pred_mode,
    1178           0 :                 cu_ptr->av1xd->use_intrabc,
    1179             :                 EB_TRUE);
    1180           0 :             if (context_ptr->md_skip_blk) {
    1181           0 :                 count_non_zero_coeffs[2] = 0;
    1182           0 :                 eob[2] = 0;
    1183             :             }
    1184           0 :             txb_ptr->v_has_coeff = count_non_zero_coeffs[2] ? EB_TRUE : EB_FALSE;
    1185             : 
    1186           0 :             txb_ptr->nz_coef_count[1] = (uint16_t)count_non_zero_coeffs[1];
    1187           0 :             txb_ptr->nz_coef_count[2] = (uint16_t)count_non_zero_coeffs[2];
    1188             :         }
    1189             :     }
    1190             : 
    1191           0 :     return;
    1192             : }
    1193             : 
    1194             : /**********************************************************
    1195             : * Encode Generate Recon
    1196             : *
    1197             : * Summary: Performs an AV1 conformant
    1198             : *   Inverse Transform and generate
    1199             : *   the reconstructed samples of a TU.
    1200             : *
    1201             : * Inputs:
    1202             : *   origin_x
    1203             : *   origin_y
    1204             : *   txb_size
    1205             : *   sb_sz
    1206             : *   input - Inverse Qunatized Coeff (position sensitive)
    1207             : *   pred - prediction samples (position independent)
    1208             : *
    1209             : * Outputs:
    1210             : *   Recon  (position independent)
    1211             : *
    1212             : **********************************************************/
    1213       43570 : static void Av1EncodeGenerateRecon(
    1214             :     EncDecContext       *context_ptr,
    1215             :     uint32_t               origin_x,
    1216             :     uint32_t               origin_y,
    1217             :     EbPictureBufferDesc *predSamples,     // no basis/offset
    1218             :     EbPictureBufferDesc *residual16bit,    // no basis/offset
    1219             :     int16_t               *transformScratchBuffer,
    1220             :     uint32_t               component_mask,
    1221             :     uint16_t              *eob)
    1222             : {
    1223             :     uint32_t               pred_luma_offset;
    1224             :     uint32_t               predChromaOffset;
    1225       43570 :     CodingUnit          *cu_ptr = context_ptr->cu_ptr;
    1226       43570 :     TransformUnit       *txb_ptr = &cu_ptr->transform_unit_array[context_ptr->txb_itr];
    1227             : 
    1228             :     // *Note - The prediction is built in-place in the Recon buffer. It is overwritten with Reconstructed
    1229             :     //   samples if the CBF==1 && SKIP==False
    1230             : 
    1231             :     //**********************************
    1232             :     // Luma
    1233             :     //**********************************
    1234       43570 :     if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) {
    1235             :         {
    1236       38224 :             pred_luma_offset = (predSamples->origin_y + origin_y)             * predSamples->stride_y + (predSamples->origin_x + origin_x);
    1237       38224 :             if (txb_ptr->y_has_coeff == EB_TRUE && cu_ptr->skip_flag == EB_FALSE) {
    1238             :                 (void)transformScratchBuffer;
    1239       13910 :                 uint8_t     *predBuffer = predSamples->buffer_y + pred_luma_offset;
    1240       13910 :                 av1_inv_transform_recon8bit(
    1241       13910 :                     ((int32_t*)residual16bit->buffer_y) + context_ptr->coded_area_sb,
    1242       13910 :                     predBuffer, predSamples->stride_y,
    1243       13910 :                     predBuffer, predSamples->stride_y,
    1244       13910 :                     context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    1245       13910 :                     txb_ptr->transform_type[PLANE_TYPE_Y],
    1246             :                     PLANE_TYPE_Y,
    1247       13910 :                     eob[0], 0 /*lossless*/
    1248             :                 );
    1249             :             }
    1250             :         }
    1251             :     }
    1252             : 
    1253       43570 :     if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK)    {
    1254             :         //**********************************
    1255             :         // Chroma
    1256             :         //**********************************
    1257             : 
    1258       32497 :         uint32_t                 round_origin_x = (origin_x >> 3) << 3;// for Chroma blocks with size of 4
    1259       32497 :         uint32_t                 round_origin_y = (origin_y >> 3) << 3;// for Chroma blocks with size of 4
    1260       32497 :         predChromaOffset = (((predSamples->origin_y + round_origin_y) >> 1)           * predSamples->stride_cb) + ((predSamples->origin_x + round_origin_x) >> 1);
    1261             : 
    1262             :         //**********************************
    1263             :         // Cb
    1264             :         //**********************************
    1265       32497 :         if (txb_ptr->u_has_coeff == EB_TRUE && cu_ptr->skip_flag == EB_FALSE) {
    1266        3322 :             uint8_t     *predBuffer = predSamples->buffer_cb + predChromaOffset;
    1267             : 
    1268        3322 :             av1_inv_transform_recon8bit(
    1269        3322 :                 ((int32_t*)residual16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
    1270        3322 :                 predBuffer, predSamples->stride_cb,
    1271        3322 :                 predBuffer, predSamples->stride_cb,
    1272        3322 :                 context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1273        3322 :                 txb_ptr->transform_type[PLANE_TYPE_UV],
    1274             :                 PLANE_TYPE_UV,
    1275        3322 :                 eob[1], 0 /*lossless*/);
    1276             :         }
    1277             : 
    1278             :         //**********************************
    1279             :         // Cr
    1280             :         //**********************************
    1281       32497 :         predChromaOffset = (((predSamples->origin_y + round_origin_y) >> 1)           * predSamples->stride_cr) + ((predSamples->origin_x + round_origin_x) >> 1);
    1282             : 
    1283       32497 :         if (txb_ptr->v_has_coeff == EB_TRUE && cu_ptr->skip_flag == EB_FALSE) {
    1284        1694 :             uint8_t     *predBuffer = predSamples->buffer_cr + predChromaOffset;
    1285             : 
    1286        1694 :             av1_inv_transform_recon8bit(
    1287        1694 :                 ((int32_t*)residual16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
    1288        1694 :                 predBuffer, predSamples->stride_cr,
    1289        1694 :                 predBuffer, predSamples->stride_cr,
    1290        1694 :                 context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1291        1694 :                 txb_ptr->transform_type[PLANE_TYPE_UV],
    1292             :                 PLANE_TYPE_UV,
    1293        1694 :                 eob[2], 0 /*lossless*/);
    1294             :         }
    1295             :     }
    1296             : 
    1297       43572 :     return;
    1298             : }
    1299             : 
    1300             : /**********************************************************
    1301             : * Encode Generate Recon
    1302             : *
    1303             : * Summary: Performs an AV1 conformant
    1304             : *   Inverse Transform and generate
    1305             : *   the reconstructed samples of a TU.
    1306             : *
    1307             : * Inputs:
    1308             : *   origin_x
    1309             : *   origin_y
    1310             : *   txb_size
    1311             : *   sb_sz
    1312             : *   input - Inverse Qunatized Coeff (position sensitive)
    1313             : *   pred - prediction samples (position independent)
    1314             : *
    1315             : * Outputs:
    1316             : *   Recon  (position independent)
    1317             : *
    1318             : **********************************************************/
    1319           0 : static void Av1EncodeGenerateRecon16bit(
    1320             :     EncDecContext         *context_ptr,
    1321             :     uint32_t               origin_x,
    1322             :     uint32_t               origin_y,
    1323             :     EbPictureBufferDesc   *predSamples,     // no basis/offset
    1324             :     EbPictureBufferDesc   *residual16bit,    // no basis/offset
    1325             :     int16_t               *transformScratchBuffer,
    1326             :     uint32_t               component_mask,
    1327             :     uint16_t              *eob)
    1328             : {
    1329             :     uint32_t pred_luma_offset;
    1330             :     uint32_t predChromaOffset;
    1331             : 
    1332           0 :     CodingUnit          *cu_ptr = context_ptr->cu_ptr;
    1333           0 :     TransformUnit       *txb_ptr = &cu_ptr->transform_unit_array[context_ptr->txb_itr];
    1334             : 
    1335             :     (void)transformScratchBuffer;
    1336             :     //**********************************
    1337             :     // Luma
    1338             :     //**********************************
    1339           0 :     if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) {
    1340             : #if !ATB_10_BIT
    1341             :         if (cu_ptr->prediction_mode_flag != INTRA_MODE || cu_ptr->prediction_unit_array->intra_chroma_mode != UV_CFL_PRED)
    1342             : #endif
    1343             :         {
    1344           0 :             pred_luma_offset = (predSamples->origin_y + origin_y)* predSamples->stride_y + (predSamples->origin_x + origin_x);
    1345           0 :             if (txb_ptr->y_has_coeff == EB_TRUE && cu_ptr->skip_flag == EB_FALSE) {
    1346           0 :                 uint16_t     *predBuffer = ((uint16_t*)predSamples->buffer_y) + pred_luma_offset;
    1347           0 :                 av1_inv_transform_recon(
    1348           0 :                     ((int32_t*)residual16bit->buffer_y) + context_ptr->coded_area_sb,
    1349           0 :                     CONVERT_TO_BYTEPTR(predBuffer), predSamples->stride_y,
    1350           0 :                     CONVERT_TO_BYTEPTR(predBuffer), predSamples->stride_y,
    1351           0 :                     context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    1352             :                     BIT_INCREMENT_10BIT,
    1353           0 :                     txb_ptr->transform_type[PLANE_TYPE_Y],
    1354             :                     PLANE_TYPE_Y,
    1355           0 :                     eob[0], 0 /*lossless*/
    1356             :                 );
    1357             :             }
    1358             :         }
    1359             :     }
    1360             : 
    1361           0 :     if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK)    {
    1362             :         //**********************************
    1363             :         // Chroma
    1364             :         //**********************************
    1365             : 
    1366             :         //**********************************
    1367             :         // Cb
    1368             :         //**********************************
    1369             : 
    1370           0 :         uint32_t                 round_origin_x = (origin_x >> 3) << 3;// for Chroma blocks with size of 4
    1371           0 :         uint32_t                 round_origin_y = (origin_y >> 3) << 3;// for Chroma blocks with size of 4
    1372             : 
    1373           0 :         predChromaOffset = (((predSamples->origin_y + round_origin_y) >> 1)           * predSamples->stride_cb) + ((predSamples->origin_x + round_origin_x) >> 1);
    1374             : 
    1375           0 :         if (txb_ptr->u_has_coeff == EB_TRUE && cu_ptr->skip_flag == EB_FALSE) {
    1376           0 :             uint16_t     *predBuffer = ((uint16_t*)predSamples->buffer_cb) + predChromaOffset;
    1377           0 :             av1_inv_transform_recon(
    1378           0 :                 ((int32_t*)residual16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
    1379           0 :                 CONVERT_TO_BYTEPTR(predBuffer), predSamples->stride_cb,
    1380           0 :                 CONVERT_TO_BYTEPTR(predBuffer), predSamples->stride_cb,
    1381           0 :                 context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1382             :                 BIT_INCREMENT_10BIT,
    1383           0 :                 txb_ptr->transform_type[PLANE_TYPE_UV],
    1384             :                 PLANE_TYPE_UV,
    1385           0 :                 eob[1], 0 /*lossless*/);
    1386             :         }
    1387             : 
    1388             :         //**********************************
    1389             :         // Cr
    1390             :         //**********************************
    1391           0 :         predChromaOffset = (((predSamples->origin_y + round_origin_y) >> 1)           * predSamples->stride_cr) + ((predSamples->origin_x + round_origin_x) >> 1);
    1392           0 :         if (txb_ptr->v_has_coeff == EB_TRUE && cu_ptr->skip_flag == EB_FALSE) {
    1393           0 :             uint16_t     *predBuffer = ((uint16_t*)predSamples->buffer_cr) + predChromaOffset;
    1394           0 :             av1_inv_transform_recon(
    1395           0 :                 ((int32_t*)residual16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
    1396           0 :                 CONVERT_TO_BYTEPTR(predBuffer), predSamples->stride_cr,
    1397           0 :                 CONVERT_TO_BYTEPTR(predBuffer), predSamples->stride_cr,
    1398           0 :                 context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1399             :                 BIT_INCREMENT_10BIT,
    1400           0 :                 txb_ptr->transform_type[PLANE_TYPE_UV],
    1401             :                 PLANE_TYPE_UV,
    1402           0 :                 eob[2], 0 /*lossless*/);
    1403             :         }
    1404             :     }
    1405             : 
    1406           0 :     return;
    1407             : }
    1408             : static EB_AV1_ENCODE_LOOP_FUNC_PTR   Av1EncodeLoopFunctionTable[2] =
    1409             : {
    1410             :     Av1EncodeLoop,
    1411             :     Av1EncodeLoop16bit
    1412             : };
    1413             : 
    1414             : EB_AV1_GENERATE_RECON_FUNC_PTR   Av1EncodeGenerateReconFunctionPtr[2] =
    1415             : {
    1416             :     Av1EncodeGenerateRecon,
    1417             :     Av1EncodeGenerateRecon16bit
    1418             : };
    1419             : 
    1420           0 : void Store16bitInputSrc(
    1421             :     EbPictureBufferDesc     *input_sample16bit_buffer,
    1422             :     PictureControlSet       *picture_control_set_ptr,
    1423             :     uint32_t                 lcuX,
    1424             :     uint32_t                 lcuY,
    1425             :     uint32_t                 lcuW,
    1426             :     uint32_t                 lcuH )
    1427             : {
    1428             :     uint32_t rowIt;
    1429             :     uint16_t* fromPtr;
    1430             :     uint16_t* toPtr;
    1431             : 
    1432           0 :     fromPtr = (uint16_t*)input_sample16bit_buffer->buffer_y;
    1433           0 :     toPtr = (uint16_t*)picture_control_set_ptr->input_frame16bit->buffer_y + (lcuX + picture_control_set_ptr->input_frame16bit->origin_x) + (lcuY + picture_control_set_ptr->input_frame16bit->origin_y)*picture_control_set_ptr->input_frame16bit->stride_y;
    1434             : 
    1435           0 :     for (rowIt = 0; rowIt < lcuH; rowIt++)
    1436           0 :         memcpy(toPtr + rowIt * picture_control_set_ptr->input_frame16bit->stride_y, fromPtr + rowIt * input_sample16bit_buffer->stride_y, lcuW * 2);
    1437             : 
    1438           0 :     lcuX = lcuX / 2;
    1439           0 :     lcuY = lcuY / 2;
    1440           0 :     lcuW = lcuW / 2;
    1441           0 :     lcuH = lcuH / 2;
    1442             : 
    1443           0 :     fromPtr = (uint16_t*)input_sample16bit_buffer->buffer_cb;
    1444           0 :     toPtr = (uint16_t*)picture_control_set_ptr->input_frame16bit->buffer_cb + (lcuX + picture_control_set_ptr->input_frame16bit->origin_x / 2) + (lcuY + picture_control_set_ptr->input_frame16bit->origin_y / 2)*picture_control_set_ptr->input_frame16bit->stride_cb;
    1445             : 
    1446           0 :     for (rowIt = 0; rowIt < lcuH; rowIt++)
    1447           0 :         memcpy(toPtr + rowIt * picture_control_set_ptr->input_frame16bit->stride_cb, fromPtr + rowIt * input_sample16bit_buffer->stride_cb, lcuW * 2);
    1448             : 
    1449           0 :     fromPtr = (uint16_t*)input_sample16bit_buffer->buffer_cr;
    1450           0 :     toPtr = (uint16_t*)picture_control_set_ptr->input_frame16bit->buffer_cr + (lcuX + picture_control_set_ptr->input_frame16bit->origin_x / 2) + (lcuY + picture_control_set_ptr->input_frame16bit->origin_y / 2)*picture_control_set_ptr->input_frame16bit->stride_cb;
    1451             : 
    1452           0 :     for (rowIt = 0; rowIt < lcuH; rowIt++)
    1453           0 :         memcpy(toPtr + rowIt * picture_control_set_ptr->input_frame16bit->stride_cr, fromPtr + rowIt * input_sample16bit_buffer->stride_cr, lcuW * 2);
    1454           0 : }
    1455             : 
    1456             : 
    1457             : 
    1458             : void update_av1_mi_map(
    1459             :     CodingUnit        *cu_ptr,
    1460             :     uint32_t           cu_origin_x,
    1461             :     uint32_t           cu_origin_y,
    1462             :     const BlockGeom   *blk_geom,
    1463             :     PictureControlSet *picture_control_set_ptr);
    1464             : 
    1465             : void move_cu_data(
    1466             : #if PAL_SUP
    1467             :     PictureControlSet  *pcs,
    1468             :     EncDecContext      *context_ptr,
    1469             : #endif
    1470             :     CodingUnit *src_cu,
    1471             :     CodingUnit *dst_cu);
    1472             : 
    1473        6417 : void perform_intra_coding_loop(
    1474             :     PictureControlSet  *picture_control_set_ptr,
    1475             :     LargestCodingUnit  *sb_ptr,
    1476             :     uint32_t            tbAddr,
    1477             :     CodingUnit         *cu_ptr,
    1478             :     PredictionUnit     *pu_ptr,
    1479             :     EncDecContext      *context_ptr,
    1480             :     uint32_t            dZoffset) {
    1481        6417 :     EbBool                  is16bit = context_ptr->is16bit;
    1482             : 
    1483        6417 :     EbPictureBufferDesc    *recon_buffer = is16bit ? picture_control_set_ptr->recon_picture16bit_ptr : picture_control_set_ptr->recon_picture_ptr;
    1484        6417 :     EbPictureBufferDesc    *coeff_buffer_sb = sb_ptr->quantized_coeff;
    1485             : 
    1486        6417 :     NeighborArrayUnit      *ep_luma_recon_neighbor_array = is16bit ? picture_control_set_ptr->ep_luma_recon_neighbor_array16bit : picture_control_set_ptr->ep_luma_recon_neighbor_array;
    1487        6417 :     NeighborArrayUnit      *ep_cb_recon_neighbor_array = is16bit ? picture_control_set_ptr->ep_cb_recon_neighbor_array16bit : picture_control_set_ptr->ep_cb_recon_neighbor_array;
    1488        6417 :     NeighborArrayUnit      *ep_cr_recon_neighbor_array = is16bit ? picture_control_set_ptr->ep_cr_recon_neighbor_array16bit : picture_control_set_ptr->ep_cr_recon_neighbor_array;
    1489             : 
    1490        6417 :     EbPictureBufferDesc    *residual_buffer = context_ptr->residual_buffer;
    1491        6417 :     EbPictureBufferDesc    *transform_buffer = context_ptr->transform_buffer;
    1492        6417 :     EbPictureBufferDesc    *inverse_quant_buffer = context_ptr->inverse_quant_buffer;
    1493        6417 :     int16_t                *transform_inner_array_ptr = context_ptr->transform_inner_array_ptr;
    1494             : 
    1495             :     uint32_t                count_non_zero_coeffs[3];
    1496             :     MacroblockPlane         cuPlane[3];
    1497             :     uint16_t                eobs[MAX_TXB_COUNT][3];
    1498             :     uint64_t                y_tu_coeff_bits;
    1499             :     uint64_t                cb_tu_coeff_bits;
    1500             :     uint64_t                cr_tu_coeff_bits;
    1501        6417 :     EntropyCoder           *coeff_est_entropy_coder_ptr = picture_control_set_ptr->coeff_est_entropy_coder_ptr;
    1502             : 
    1503        6417 :     if (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_TRUE)
    1504             :         //get the 16bit form of the input LCU
    1505        6414 :         if (is16bit)
    1506           0 :             recon_buffer = ((EbReferenceObject*)picture_control_set_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture16bit;
    1507             :         else
    1508        6414 :             recon_buffer = ((EbReferenceObject*)picture_control_set_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture;
    1509             :     else  // non ref pictures
    1510           3 :         recon_buffer = is16bit ? picture_control_set_ptr->recon_picture16bit_ptr : picture_control_set_ptr->recon_picture_ptr;
    1511             : 
    1512        6417 :     uint32_t totTu = context_ptr->blk_geom->txb_count[cu_ptr->tx_depth];
    1513             : 
    1514             :     // Luma path
    1515       15430 :     for (context_ptr->txb_itr = 0; context_ptr->txb_itr < totTu; context_ptr->txb_itr++) {
    1516        9012 :         uint16_t txb_origin_x = context_ptr->cu_origin_x + context_ptr->blk_geom->tx_boff_x[cu_ptr->tx_depth][context_ptr->txb_itr];
    1517        9012 :         uint16_t txb_origin_y = context_ptr->cu_origin_y + context_ptr->blk_geom->tx_boff_y[cu_ptr->tx_depth][context_ptr->txb_itr];
    1518             : 
    1519        9012 :         context_ptr->md_context->luma_txb_skip_context = 0;
    1520        9012 :         context_ptr->md_context->luma_dc_sign_context = 0;
    1521        9012 :         get_txb_ctx(
    1522        9012 :             picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr,
    1523             :             COMPONENT_LUMA,
    1524             :             picture_control_set_ptr->ep_luma_dc_sign_level_coeff_neighbor_array,
    1525             :             txb_origin_x,
    1526             :             txb_origin_y,
    1527        9012 :             context_ptr->blk_geom->bsize,
    1528        9012 :             context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    1529        9012 :             &context_ptr->md_context->luma_txb_skip_context,
    1530        9012 :             &context_ptr->md_context->luma_dc_sign_context);
    1531        9012 :         if (is16bit) {
    1532             :             uint16_t    topNeighArray[64 * 2 + 1];
    1533             :             uint16_t    leftNeighArray[64 * 2 + 1];
    1534             :             PredictionMode mode;
    1535             : 
    1536           0 :             TxSize  tx_size = context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr];
    1537             : 
    1538           0 :             if (txb_origin_y != 0)
    1539           0 :                 memcpy(topNeighArray + 1, (uint16_t*)(ep_luma_recon_neighbor_array->top_array) + txb_origin_x, context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr] * 2 * sizeof(uint16_t));
    1540           0 :             if (txb_origin_x != 0)
    1541           0 :                 memcpy(leftNeighArray + 1, (uint16_t*)(ep_luma_recon_neighbor_array->left_array) + txb_origin_y, context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr] * 2 * sizeof(uint16_t));
    1542           0 :             if (txb_origin_y != 0 && txb_origin_x != 0)
    1543           0 :                 topNeighArray[0] = leftNeighArray[0] = ((uint16_t*)(ep_luma_recon_neighbor_array->top_left_array) + MAX_PICTURE_HEIGHT_SIZE + txb_origin_x - txb_origin_y)[0];
    1544             : 
    1545           0 :             mode = cu_ptr->pred_mode;
    1546             : 
    1547           0 :             eb_av1_predict_intra_block_16bit(
    1548             :                 &sb_ptr->tile_info,
    1549             :                 ED_STAGE,
    1550             :                 context_ptr->blk_geom,
    1551           0 :                 picture_control_set_ptr->parent_pcs_ptr->av1_cm,
    1552             : #if ATB_10_BIT
    1553           0 :                 context_ptr->blk_geom->bwidth,
    1554           0 :                 context_ptr->blk_geom->bheight,
    1555             : #else
    1556             :                 context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr],
    1557             :                 context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr],
    1558             : #endif
    1559             :                 tx_size,
    1560             :                 mode,
    1561             :                 pu_ptr->angle_delta[PLANE_TYPE_Y],
    1562             : #if PAL_SUP
    1563           0 :                 cu_ptr->palette_info.pmi.palette_size[0] > 0,
    1564             :                 &cu_ptr->palette_info,
    1565             : #else
    1566             :                 0,
    1567             : #endif
    1568             : #if FILTER_INTRA_FLAG
    1569           0 :                 cu_ptr->filter_intra_mode,
    1570             : #else
    1571             :                 FILTER_INTRA_MODES,
    1572             : #endif
    1573             :                 topNeighArray + 1,
    1574             :                 leftNeighArray + 1,
    1575             :                 recon_buffer,
    1576             : #if ATB_10_BIT
    1577           0 :                 context_ptr->blk_geom->tx_boff_x[cu_ptr->tx_depth][context_ptr->txb_itr] >> 2,
    1578           0 :                 context_ptr->blk_geom->tx_boff_y[cu_ptr->tx_depth][context_ptr->txb_itr] >> 2,
    1579             : #else
    1580             :                 0,
    1581             :                 0,
    1582             : #endif
    1583             :                 0,
    1584           0 :                 context_ptr->blk_geom->bsize,
    1585             :                 txb_origin_x,
    1586             :                 txb_origin_y,
    1587           0 :                 context_ptr->cu_origin_x,
    1588           0 :                 context_ptr->cu_origin_y,
    1589             :                 0,
    1590             :                 0);
    1591             :         }
    1592             :         else {
    1593             :             uint8_t    topNeighArray[64 * 2 + 1];
    1594             :             uint8_t    leftNeighArray[64 * 2 + 1];
    1595             :             PredictionMode mode;
    1596             : 
    1597        9012 :             TxSize  tx_size = context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr];
    1598             : 
    1599        9012 :             if (txb_origin_y != 0)
    1600        8711 :                 memcpy(topNeighArray + 1, ep_luma_recon_neighbor_array->top_array + txb_origin_x, context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr] * 2);
    1601             : 
    1602        9012 :             if (txb_origin_x != 0)
    1603        8650 :                 memcpy(leftNeighArray + 1, ep_luma_recon_neighbor_array->left_array + txb_origin_y, context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr] * 2);
    1604             : 
    1605        9012 :             if (txb_origin_y != 0 && txb_origin_x != 0)
    1606        8357 :                 topNeighArray[0] = leftNeighArray[0] = ep_luma_recon_neighbor_array->top_left_array[MAX_PICTURE_HEIGHT_SIZE + txb_origin_x - txb_origin_y];
    1607             : 
    1608        9012 :             mode = cu_ptr->pred_mode;
    1609             : 
    1610             :             // Hsan: if CHROMA_MODE_2, then CFL will be evaluated @ EP as no CHROMA @ MD
    1611             :             // If that's the case then you should ensure than the 1st chroma prediction uses UV_DC_PRED (that's the default configuration for CHROMA_MODE_2 if CFL applicable (set @ fast loop candidates injection) then MD assumes chroma mode always UV_DC_PRED)
    1612        9012 :             eb_av1_predict_intra_block(
    1613             :                 &sb_ptr->tile_info,
    1614             :                 ED_STAGE,
    1615             :                 context_ptr->blk_geom,
    1616        9012 :                 picture_control_set_ptr->parent_pcs_ptr->av1_cm,
    1617        9012 :                 context_ptr->blk_geom->bwidth,
    1618        9012 :                 context_ptr->blk_geom->bheight,
    1619             :                 tx_size,
    1620             :                 mode,
    1621             :                 pu_ptr->angle_delta[PLANE_TYPE_Y],
    1622             : #if PAL_SUP
    1623        9012 :                 cu_ptr->palette_info.pmi.palette_size[0] > 0,
    1624             :                 &cu_ptr->palette_info,
    1625             : #else
    1626             :                 0,
    1627             : #endif
    1628             : #if FILTER_INTRA_FLAG
    1629        9012 :                 cu_ptr->filter_intra_mode,
    1630             : #else
    1631             :                 FILTER_INTRA_MODES,
    1632             : #endif
    1633             :                 topNeighArray + 1,
    1634             :                 leftNeighArray + 1,
    1635             :                 recon_buffer,
    1636        9012 :                 context_ptr->blk_geom->tx_boff_x[cu_ptr->tx_depth][context_ptr->txb_itr] >> 2,
    1637        9012 :                 context_ptr->blk_geom->tx_boff_y[cu_ptr->tx_depth][context_ptr->txb_itr] >> 2,
    1638             :                 0,
    1639        9012 :                 context_ptr->blk_geom->bsize,
    1640             :                 txb_origin_x,
    1641             :                 txb_origin_y,
    1642        9012 :                 context_ptr->cu_origin_x,
    1643        9012 :                 context_ptr->cu_origin_y,
    1644             :                 0,
    1645             :                 0);
    1646             :         }
    1647             :         // Encode Transform Unit -INTRA-
    1648             : 
    1649        9012 :         uint16_t cb_qp = cu_ptr->qp;
    1650        9012 :         Av1EncodeLoopFunctionTable[is16bit](
    1651             :             picture_control_set_ptr,
    1652             :             context_ptr,
    1653             :             sb_ptr,
    1654             :             txb_origin_x,
    1655             :             txb_origin_y,
    1656             :             cb_qp,
    1657             :             recon_buffer,
    1658             :             coeff_buffer_sb,
    1659             :             residual_buffer,
    1660             :             transform_buffer,
    1661             :             inverse_quant_buffer,
    1662             :             transform_inner_array_ptr,
    1663             :             count_non_zero_coeffs,
    1664             :             PICTURE_BUFFER_DESC_LUMA_MASK,
    1665        9012 :             cu_ptr->delta_qp > 0 ? 0 : dZoffset,
    1666        9012 :             eobs[context_ptr->txb_itr],
    1667             :             cuPlane);
    1668             : 
    1669        9012 :         if (picture_control_set_ptr->update_cdf)
    1670             :         {
    1671        6287 :             ModeDecisionCandidateBuffer         **candidate_buffer_ptr_array_base = context_ptr->md_context->candidate_buffer_ptr_array;
    1672        6287 :             ModeDecisionCandidateBuffer         **candidate_buffer_ptr_array = &(candidate_buffer_ptr_array_base[0]);
    1673             :             ModeDecisionCandidateBuffer          *candidate_buffer;
    1674             : 
    1675             :             // Set the Candidate Buffer
    1676        6287 :             candidate_buffer = candidate_buffer_ptr_array[0];
    1677             :             // Rate estimation function uses the values from CandidatePtr. The right values are copied from cu_ptr to CandidatePtr
    1678        6287 :             candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr] = cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_Y];
    1679        6287 :             candidate_buffer->candidate_ptr->transform_type_uv = cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_UV];
    1680        6287 :             candidate_buffer->candidate_ptr->type = cu_ptr->prediction_mode_flag;
    1681        6287 :             candidate_buffer->candidate_ptr->pred_mode = cu_ptr->pred_mode;
    1682             : #if FILTER_INTRA_FLAG
    1683        6287 :             candidate_buffer->candidate_ptr->filter_intra_mode = cu_ptr->filter_intra_mode;
    1684             : #endif
    1685        6287 :             const uint32_t coeff1dOffset = context_ptr->coded_area_sb;
    1686             : 
    1687        6287 :             av1_tu_estimate_coeff_bits(
    1688        6287 :                 context_ptr->md_context,
    1689             :                 1,//allow_update_cdf,
    1690        6287 :                 &picture_control_set_ptr->ec_ctx_array[tbAddr],
    1691             :                 picture_control_set_ptr,
    1692             :                 candidate_buffer,
    1693             :                 coeff1dOffset,
    1694        6287 :                 context_ptr->coded_area_sb_uv,
    1695             :                 coeff_est_entropy_coder_ptr,
    1696             :                 coeff_buffer_sb,
    1697        6287 :                 eobs[context_ptr->txb_itr][0],
    1698        6287 :                 eobs[context_ptr->txb_itr][1],
    1699        6287 :                 eobs[context_ptr->txb_itr][2],
    1700             :                 &y_tu_coeff_bits,
    1701             :                 &cb_tu_coeff_bits,
    1702             :                 &cr_tu_coeff_bits,
    1703        6287 :                 context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    1704        6287 :                 context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1705        6287 :                 candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr],
    1706        6287 :                 candidate_buffer->candidate_ptr->transform_type_uv,
    1707             :                 COMPONENT_LUMA);
    1708             :         }
    1709             : 
    1710        9012 :         Av1EncodeGenerateReconFunctionPtr[is16bit](
    1711             :             context_ptr,
    1712             :             txb_origin_x,
    1713             :             txb_origin_y,
    1714             :             recon_buffer,
    1715             :             inverse_quant_buffer,
    1716             :             transform_inner_array_ptr,
    1717             :             PICTURE_BUFFER_DESC_LUMA_MASK,
    1718        9012 :             eobs[context_ptr->txb_itr]);
    1719             : 
    1720             :         // Update Recon Samples-INTRA-
    1721        9012 :         EncodePassUpdateReconSampleNeighborArrays(
    1722             :             ep_luma_recon_neighbor_array,
    1723             :             ep_cb_recon_neighbor_array,
    1724             :             ep_cr_recon_neighbor_array,
    1725             :             recon_buffer,
    1726             :             txb_origin_x,
    1727             :             txb_origin_y,
    1728        9012 :             context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr],
    1729        9012 :             context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr],
    1730        9012 :             context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1731        9012 :             context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1732             :             PICTURE_BUFFER_DESC_LUMA_MASK,
    1733             :             is16bit);
    1734             : 
    1735        9012 :         context_ptr->coded_area_sb += context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr] * context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr];
    1736             : 
    1737             : 
    1738             :         // Update the luma Dc Sign Level Coeff Neighbor Array
    1739             :         {
    1740        9012 :             uint8_t dcSignLevelCoeff = (uint8_t)cu_ptr->quantized_dc[0][context_ptr->txb_itr];
    1741             : 
    1742        9012 :             neighbor_array_unit_mode_write(
    1743             :                 picture_control_set_ptr->ep_luma_dc_sign_level_coeff_neighbor_array,
    1744             :                 (uint8_t*)&dcSignLevelCoeff,
    1745             :                 txb_origin_x,
    1746             :                 txb_origin_y,
    1747        9012 :                 context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr],
    1748        9012 :                 context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr],
    1749             :                 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
    1750             :         }
    1751             : 
    1752             :     } // Transform Loop
    1753             : 
    1754             :     // Chroma path
    1755             : 
    1756        6418 :     if(context_ptr->blk_geom->has_uv)
    1757             :     {
    1758        5347 :         context_ptr->txb_itr = 0;
    1759        5347 :         uint16_t txb_origin_x = context_ptr->cu_origin_x + context_ptr->blk_geom->tx_boff_x[cu_ptr->tx_depth][context_ptr->txb_itr];
    1760        5347 :         uint16_t txb_origin_y = context_ptr->cu_origin_y + context_ptr->blk_geom->tx_boff_y[cu_ptr->tx_depth][context_ptr->txb_itr];
    1761             : 
    1762        5347 :         uint32_t cu_originx_uv = (context_ptr->cu_origin_x >> 3 << 3) >> 1;
    1763        5347 :         uint32_t cu_originy_uv = (context_ptr->cu_origin_y >> 3 << 3) >> 1;
    1764             : 
    1765        5347 :         context_ptr->md_context->cb_txb_skip_context = 0;
    1766        5347 :         context_ptr->md_context->cb_dc_sign_context = 0;
    1767        5347 :         get_txb_ctx(
    1768        5347 :             picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr,
    1769             :             COMPONENT_CHROMA,
    1770             :             picture_control_set_ptr->ep_cb_dc_sign_level_coeff_neighbor_array,
    1771             :             cu_originx_uv,
    1772             :             cu_originy_uv,
    1773        5347 :             context_ptr->blk_geom->bsize_uv,
    1774        5347 :             context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1775        5347 :             &context_ptr->md_context->cb_txb_skip_context,
    1776        5347 :             &context_ptr->md_context->cb_dc_sign_context);
    1777             : 
    1778        5347 :         context_ptr->md_context->cr_txb_skip_context = 0;
    1779        5347 :         context_ptr->md_context->cr_dc_sign_context = 0;
    1780        5347 :         get_txb_ctx(
    1781        5347 :             picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr,
    1782             :             COMPONENT_CHROMA,
    1783             :             picture_control_set_ptr->ep_cr_dc_sign_level_coeff_neighbor_array,
    1784             :             cu_originx_uv,
    1785             :             cu_originy_uv,
    1786        5347 :             context_ptr->blk_geom->bsize_uv,
    1787        5347 :             context_ptr->blk_geom->txsize_uv[context_ptr->cu_ptr->tx_depth][context_ptr->txb_itr],
    1788        5347 :             &context_ptr->md_context->cr_txb_skip_context,
    1789        5347 :             &context_ptr->md_context->cr_dc_sign_context);
    1790             : 
    1791        5347 :         if (is16bit) {
    1792             :             uint16_t    topNeighArray[64 * 2 + 1];
    1793             :             uint16_t    leftNeighArray[64 * 2 + 1];
    1794             :             PredictionMode mode;
    1795             : 
    1796           0 :             int32_t plane_end = 2;
    1797             : 
    1798           0 :             for (int32_t plane = 1; plane <= plane_end; ++plane) {
    1799           0 :                 TxSize  tx_size = plane ? context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr] : context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr];
    1800             : 
    1801           0 :                 if (plane == 1) {
    1802           0 :                     if (cu_originy_uv != 0)
    1803           0 :                         memcpy(topNeighArray + 1, (uint16_t*)(ep_cb_recon_neighbor_array->top_array) + cu_originx_uv, context_ptr->blk_geom->bwidth_uv * 2 * sizeof(uint16_t));
    1804           0 :                     if (cu_originx_uv != 0)
    1805           0 :                         memcpy(leftNeighArray + 1, (uint16_t*)(ep_cb_recon_neighbor_array->left_array) + cu_originy_uv, context_ptr->blk_geom->bheight_uv * 2 * sizeof(uint16_t));
    1806           0 :                     if (cu_originy_uv != 0 && cu_originx_uv != 0)
    1807           0 :                         topNeighArray[0] = leftNeighArray[0] = ((uint16_t*)(ep_cb_recon_neighbor_array->top_left_array) + MAX_PICTURE_HEIGHT_SIZE / 2 + cu_originx_uv - cu_originy_uv)[0];
    1808             :                 }
    1809           0 :                 else if (plane == 2) {
    1810           0 :                     if (cu_originy_uv != 0)
    1811           0 :                         memcpy(topNeighArray + 1, (uint16_t*)(ep_cr_recon_neighbor_array->top_array) + cu_originx_uv, context_ptr->blk_geom->bwidth_uv * 2 * sizeof(uint16_t));
    1812           0 :                     if (cu_originx_uv != 0)
    1813           0 :                         memcpy(leftNeighArray + 1, (uint16_t*)(ep_cr_recon_neighbor_array->left_array) + cu_originy_uv, context_ptr->blk_geom->bheight_uv * 2 * sizeof(uint16_t));
    1814           0 :                     if (cu_originy_uv != 0 && cu_originx_uv != 0)
    1815           0 :                         topNeighArray[0] = leftNeighArray[0] = ((uint16_t*)(ep_cr_recon_neighbor_array->top_left_array) + MAX_PICTURE_HEIGHT_SIZE / 2 + cu_originx_uv - cu_originy_uv)[0];
    1816             :                 }
    1817             : 
    1818           0 :                 mode = (pu_ptr->intra_chroma_mode == UV_CFL_PRED) ? (PredictionMode)UV_DC_PRED : (PredictionMode)pu_ptr->intra_chroma_mode;
    1819             : 
    1820           0 :                 eb_av1_predict_intra_block_16bit(
    1821             :                     &sb_ptr->tile_info,
    1822             :                     ED_STAGE,
    1823             :                     context_ptr->blk_geom,
    1824           0 :                     picture_control_set_ptr->parent_pcs_ptr->av1_cm,
    1825             : #if ATB_10_BIT
    1826           0 :                     plane ? context_ptr->blk_geom->bwidth_uv : context_ptr->blk_geom->bwidth,
    1827           0 :                     plane ? context_ptr->blk_geom->bheight_uv : context_ptr->blk_geom->bheight,
    1828             : #else
    1829             :                     plane ? context_ptr->blk_geom->bwidth_uv : context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr],
    1830             :                     plane ? context_ptr->blk_geom->bheight_uv : context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr],
    1831             : #endif
    1832             :                     tx_size,
    1833             :                     mode,
    1834             :                     plane ? pu_ptr->angle_delta[PLANE_TYPE_UV] : pu_ptr->angle_delta[PLANE_TYPE_Y],
    1835             : #if PAL_SUP
    1836             :                     0, //chroma
    1837             :                     &cu_ptr->palette_info,
    1838             : #else
    1839             :                     0,
    1840             : #endif
    1841             :                     FILTER_INTRA_MODES,
    1842             :                     topNeighArray + 1,
    1843             :                     leftNeighArray + 1,
    1844             :                     recon_buffer,
    1845             : #if ATB_10_BIT
    1846           0 :                     plane ? 0 : context_ptr->blk_geom->tx_boff_x[cu_ptr->tx_depth][context_ptr->txb_itr] >> 2,
    1847           0 :                     plane ? 0 : context_ptr->blk_geom->tx_boff_y[cu_ptr->tx_depth][context_ptr->txb_itr] >> 2,
    1848             : #else
    1849             :                     //int32_t dst_stride,
    1850             :                     0,
    1851             :                     0,
    1852             : #endif
    1853             :                     plane,
    1854           0 :                     context_ptr->blk_geom->bsize,
    1855             :                     txb_origin_x,
    1856             :                     txb_origin_y,
    1857           0 :                     plane ? context_ptr->cu_origin_x : context_ptr->cu_origin_x,
    1858           0 :                     plane ? context_ptr->cu_origin_y : context_ptr->cu_origin_y,
    1859             :                     0,
    1860             :                     0);
    1861             :             }
    1862             :         }
    1863             :         else {
    1864             :             uint8_t    topNeighArray[64 * 2 + 1];
    1865             :             uint8_t    leftNeighArray[64 * 2 + 1];
    1866             :             PredictionMode mode;
    1867             : 
    1868             :             // Partition Loop
    1869        5347 :             int32_t plane_end = 2;
    1870             : 
    1871       16041 :             for (int32_t plane = 1; plane <= plane_end; ++plane) {
    1872       10694 :                 TxSize  tx_size = plane ? context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr] : context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr];
    1873             : 
    1874       10694 :                 if (plane == 1) {
    1875        5347 :                     if (cu_originy_uv != 0)
    1876        5111 :                         memcpy(topNeighArray + 1, ep_cb_recon_neighbor_array->top_array + cu_originx_uv, context_ptr->blk_geom->bwidth_uv * 2);
    1877             : 
    1878        5347 :                     if (cu_originx_uv != 0)
    1879        5067 :                         memcpy(leftNeighArray + 1, ep_cb_recon_neighbor_array->left_array + cu_originy_uv, context_ptr->blk_geom->bheight_uv * 2);
    1880             : 
    1881        5347 :                     if (cu_originy_uv != 0 && cu_originx_uv != 0)
    1882        4839 :                         topNeighArray[0] = leftNeighArray[0] = ep_cb_recon_neighbor_array->top_left_array[MAX_PICTURE_HEIGHT_SIZE / 2 + cu_originx_uv - cu_originy_uv];
    1883             :                 }
    1884             :                 else {
    1885        5347 :                     if (cu_originy_uv != 0)
    1886        5111 :                         memcpy(topNeighArray + 1, ep_cr_recon_neighbor_array->top_array + cu_originx_uv, context_ptr->blk_geom->bwidth_uv * 2);
    1887             : 
    1888        5347 :                     if (cu_originx_uv != 0)
    1889        5067 :                         memcpy(leftNeighArray + 1, ep_cr_recon_neighbor_array->left_array + cu_originy_uv, context_ptr->blk_geom->bheight_uv * 2);
    1890             : 
    1891        5347 :                     if (cu_originy_uv != 0 && cu_originx_uv != 0)
    1892        4839 :                         topNeighArray[0] = leftNeighArray[0] = ep_cr_recon_neighbor_array->top_left_array[MAX_PICTURE_HEIGHT_SIZE / 2 + cu_originx_uv - cu_originy_uv];
    1893             :                 }
    1894             : 
    1895       10694 :                 mode = (pu_ptr->intra_chroma_mode == UV_CFL_PRED) ? (PredictionMode)UV_DC_PRED : (PredictionMode)pu_ptr->intra_chroma_mode;
    1896             : 
    1897             :                 // Hsan: if CHROMA_MODE_2, then CFL will be evaluated @ EP as no CHROMA @ MD
    1898             :                 // If that's the case then you should ensure than the 1st chroma prediction uses UV_DC_PRED (that's the default configuration for CHROMA_MODE_2 if CFL applicable (set @ fast loop candidates injection) then MD assumes chroma mode always UV_DC_PRED)
    1899       21388 :                 eb_av1_predict_intra_block(
    1900             :                     &sb_ptr->tile_info,
    1901             :                     ED_STAGE,
    1902             :                     context_ptr->blk_geom,
    1903       10694 :                     picture_control_set_ptr->parent_pcs_ptr->av1_cm,
    1904       10694 :                     plane ? context_ptr->blk_geom->bwidth_uv : context_ptr->blk_geom->bwidth,
    1905       10694 :                     plane ? context_ptr->blk_geom->bheight_uv : context_ptr->blk_geom->bheight,
    1906             :                     tx_size,
    1907             :                     mode,
    1908             :                     plane ? pu_ptr->angle_delta[PLANE_TYPE_UV] : pu_ptr->angle_delta[PLANE_TYPE_Y],
    1909             : #if PAL_SUP
    1910             :                     0, //chroma
    1911             :                     &cu_ptr->palette_info,
    1912             : #else
    1913             :                     0,
    1914             : #endif
    1915             :                     FILTER_INTRA_MODES,
    1916             :                     topNeighArray + 1,
    1917             :                     leftNeighArray + 1,
    1918             :                     recon_buffer,
    1919           0 :                     plane ? 0 : context_ptr->blk_geom->tx_boff_x[cu_ptr->tx_depth][context_ptr->txb_itr] >> 2,
    1920           0 :                     plane ? 0 : context_ptr->blk_geom->tx_boff_y[cu_ptr->tx_depth][context_ptr->txb_itr] >> 2,
    1921             :                     plane,
    1922       10694 :                     context_ptr->blk_geom->bsize,
    1923             :                     txb_origin_x,
    1924             :                     txb_origin_y,
    1925       10694 :                     plane ? context_ptr->cu_origin_x : context_ptr->cu_origin_x,
    1926       10694 :                     plane ? context_ptr->cu_origin_y : context_ptr->cu_origin_y,
    1927             :                     0,
    1928             :                     0);
    1929             :             }
    1930             :         }
    1931             : 
    1932             :         // Encode Transform Unit -INTRA-
    1933        5347 :         uint16_t cb_qp = cu_ptr->qp;
    1934             : 
    1935        5347 :         Av1EncodeLoopFunctionTable[is16bit](
    1936             :             picture_control_set_ptr,
    1937             :             context_ptr,
    1938             :             sb_ptr,
    1939             :             txb_origin_x,
    1940             :             txb_origin_y,
    1941             :             cb_qp,
    1942             :             recon_buffer,
    1943             :             coeff_buffer_sb,
    1944             :             residual_buffer,
    1945             :             transform_buffer,
    1946             :             inverse_quant_buffer,
    1947             :             transform_inner_array_ptr,
    1948             :             count_non_zero_coeffs,
    1949             :             PICTURE_BUFFER_DESC_CHROMA_MASK,
    1950        5347 :             cu_ptr->delta_qp > 0 ? 0 : dZoffset,
    1951        5347 :             eobs[context_ptr->txb_itr],
    1952             :             cuPlane);
    1953             : 
    1954        5347 :         if (picture_control_set_ptr->update_cdf)
    1955             :         {
    1956        2622 :             ModeDecisionCandidateBuffer         **candidate_buffer_ptr_array_base = context_ptr->md_context->candidate_buffer_ptr_array;
    1957        2622 :             ModeDecisionCandidateBuffer         **candidate_buffer_ptr_array = &(candidate_buffer_ptr_array_base[0]);
    1958             :             ModeDecisionCandidateBuffer          *candidate_buffer;
    1959             : 
    1960             :             // Set the Candidate Buffer
    1961        2622 :             candidate_buffer = candidate_buffer_ptr_array[0];
    1962             :             // Rate estimation function uses the values from CandidatePtr. The right values are copied from cu_ptr to CandidatePtr
    1963        2622 :             candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr] = cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_Y];
    1964        2622 :             candidate_buffer->candidate_ptr->transform_type_uv = cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_UV];
    1965        2622 :             candidate_buffer->candidate_ptr->type = cu_ptr->prediction_mode_flag;
    1966        2622 :             candidate_buffer->candidate_ptr->pred_mode = cu_ptr->pred_mode;
    1967             : #if FILTER_INTRA_FLAG
    1968        2622 :             candidate_buffer->candidate_ptr->filter_intra_mode = cu_ptr->filter_intra_mode;
    1969             : #endif
    1970        2622 :             const uint32_t coeff1dOffset = context_ptr->coded_area_sb;
    1971             : 
    1972        2622 :             av1_tu_estimate_coeff_bits(
    1973        2622 :                 context_ptr->md_context,
    1974             :                 1,//allow_update_cdf,
    1975        2622 :                 &picture_control_set_ptr->ec_ctx_array[tbAddr],
    1976             :                 picture_control_set_ptr,
    1977             :                 candidate_buffer,
    1978             :                 coeff1dOffset,
    1979        2622 :                 context_ptr->coded_area_sb_uv,
    1980             :                 coeff_est_entropy_coder_ptr,
    1981             :                 coeff_buffer_sb,
    1982        2622 :                 eobs[context_ptr->txb_itr][0],
    1983        2622 :                 eobs[context_ptr->txb_itr][1],
    1984        2622 :                 eobs[context_ptr->txb_itr][2],
    1985             :                 &y_tu_coeff_bits,
    1986             :                 &cb_tu_coeff_bits,
    1987             :                 &cr_tu_coeff_bits,
    1988        2622 :                 context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    1989        2622 :                 context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    1990        2622 :                 candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr],
    1991        2622 :                 candidate_buffer->candidate_ptr->transform_type_uv,
    1992             :                 COMPONENT_CHROMA);
    1993             :         }
    1994             : 
    1995        5347 :         Av1EncodeGenerateReconFunctionPtr[is16bit](
    1996             :             context_ptr,
    1997             :             txb_origin_x,
    1998             :             txb_origin_y,
    1999             :             recon_buffer,
    2000             :             inverse_quant_buffer,
    2001             :             transform_inner_array_ptr,
    2002             :             PICTURE_BUFFER_DESC_CHROMA_MASK,
    2003        5347 :             eobs[context_ptr->txb_itr]);
    2004             : 
    2005             :         // Update Recon Samples-INTRA-
    2006        5347 :         EncodePassUpdateReconSampleNeighborArrays(
    2007             :             ep_luma_recon_neighbor_array,
    2008             :             ep_cb_recon_neighbor_array,
    2009             :             ep_cr_recon_neighbor_array,
    2010             :             recon_buffer,
    2011             :             txb_origin_x,
    2012             :             txb_origin_y,
    2013        5347 :             context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr],
    2014        5347 :             context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr],
    2015        5347 :             context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    2016        5347 :             context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    2017             :             PICTURE_BUFFER_DESC_CHROMA_MASK,
    2018             :             is16bit);
    2019             : 
    2020        5347 :         context_ptr->coded_area_sb_uv += context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr] * context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr];
    2021             : 
    2022             :         // Update the cb Dc Sign Level Coeff Neighbor Array
    2023             :         {
    2024        5347 :             uint8_t dcSignLevelCoeff = (uint8_t)cu_ptr->quantized_dc[1][context_ptr->txb_itr];
    2025        5347 :             neighbor_array_unit_mode_write(
    2026             :                 picture_control_set_ptr->ep_cb_dc_sign_level_coeff_neighbor_array,
    2027             :                 (uint8_t*)&dcSignLevelCoeff,
    2028        5347 :                 ROUND_UV(txb_origin_x) >> 1,
    2029        5347 :                 ROUND_UV(txb_origin_y) >> 1,
    2030        5347 :                 context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    2031        5347 :                 context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    2032             :                 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
    2033             : 
    2034             :         }
    2035             : 
    2036             :         // Update the cr DC Sign Level Coeff Neighbor Array
    2037             :         {
    2038        5347 :             uint8_t dcSignLevelCoeff = (uint8_t)cu_ptr->quantized_dc[2][context_ptr->txb_itr];
    2039        5347 :             neighbor_array_unit_mode_write(
    2040             :                 picture_control_set_ptr->ep_cr_dc_sign_level_coeff_neighbor_array,
    2041             :                 (uint8_t*)&dcSignLevelCoeff,
    2042        5347 :                 ROUND_UV(txb_origin_x) >> 1,
    2043        5347 :                 ROUND_UV(txb_origin_y) >> 1,
    2044        5347 :                 context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    2045        5347 :                 context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    2046             :                 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
    2047             :         }
    2048             : 
    2049             :     } // Transform Loop
    2050             : 
    2051       15430 :     for (context_ptr->txb_itr = 0; context_ptr->txb_itr < totTu; context_ptr->txb_itr++) {
    2052        9012 :         uint8_t uv_pass = cu_ptr->tx_depth && context_ptr->txb_itr ? 0 : 1;
    2053             : 
    2054        9012 :         if (context_ptr->blk_geom->has_uv && uv_pass) {
    2055        5347 :             cu_ptr->block_has_coeff = cu_ptr->block_has_coeff |
    2056        5347 :                 cu_ptr->transform_unit_array[context_ptr->txb_itr].y_has_coeff |
    2057        5347 :                 cu_ptr->transform_unit_array[context_ptr->txb_itr].u_has_coeff |
    2058        5347 :                 cu_ptr->transform_unit_array[context_ptr->txb_itr].v_has_coeff;
    2059             : 
    2060        5347 :             if (cu_ptr->transform_unit_array[context_ptr->txb_itr].u_has_coeff)
    2061        2079 :                 cu_ptr->transform_unit_array[0].u_has_coeff = EB_TRUE;
    2062        5347 :             if (cu_ptr->transform_unit_array[context_ptr->txb_itr].v_has_coeff)
    2063        1204 :                 cu_ptr->transform_unit_array[0].v_has_coeff = EB_TRUE;
    2064             :         }
    2065             :         else {
    2066        3665 :             cu_ptr->block_has_coeff = cu_ptr->block_has_coeff |
    2067        3665 :                 cu_ptr->transform_unit_array[context_ptr->txb_itr].y_has_coeff;
    2068             :         }
    2069             :     } // Transform Loop
    2070        6418 : }
    2071             : #define REFMVS_LIMIT ((1 << 12) - 1)
    2072             : 
    2073        9165 : void av1_copy_frame_mvs(PictureControlSet *picture_control_set_ptr, const Av1Common *const cm,
    2074             :     MbModeInfo  mi, int mi_row, int mi_col,
    2075             :     int x_mis, int y_mis, EbReferenceObject *object_ptr) {
    2076        9165 :     const int frame_mvs_stride = ROUND_POWER_OF_TWO(cm->mi_cols, 1);
    2077        9165 :     MV_REF *frame_mvs = object_ptr->mvs + (mi_row >> 1) * frame_mvs_stride + (mi_col >> 1);
    2078        9165 :     x_mis = ROUND_POWER_OF_TWO(x_mis, 1);
    2079        9165 :     y_mis = ROUND_POWER_OF_TWO(y_mis, 1);
    2080             :     int w, h;
    2081             : 
    2082       99316 :     for (h = 0; h < y_mis; h++) {
    2083       90151 :         MV_REF *mv = frame_mvs;
    2084     1499320 :         for (w = 0; w < x_mis; w++) {
    2085     1409170 :             mv->ref_frame = NONE_FRAME;
    2086     1409170 :             mv->mv.as_int = 0;
    2087             : 
    2088     4227520 :             for (int idx = 0; idx < 2; ++idx) {
    2089     2818350 :                 MvReferenceFrame ref_frame = mi.block_mi.ref_frame[idx];
    2090     2818350 :                 if (ref_frame > INTRA_FRAME) {
    2091     2488480 :                     int8_t ref_idx = picture_control_set_ptr->ref_frame_side[ref_frame];
    2092     2488480 :                     if (ref_idx) continue;
    2093     1345570 :                     if ((abs(mi.block_mi.mv[idx].as_mv.row) > REFMVS_LIMIT) ||
    2094     1345570 :                         (abs(mi.block_mi.mv[idx].as_mv.col) > REFMVS_LIMIT))
    2095           0 :                         continue;
    2096     1345570 :                     mv->ref_frame = ref_frame;
    2097     1345570 :                     mv->mv.as_int = mi.block_mi.mv[idx].as_int;
    2098             :                 }
    2099             :             }
    2100     1409170 :             mv++;
    2101             :         }
    2102       90151 :         frame_mvs += frame_mvs_stride;
    2103             :     }
    2104        9165 : }
    2105             : /*******************************************
    2106             : * Encode Pass
    2107             : *
    2108             : * Summary: Performs an AV1 conformant
    2109             : *   reconstruction based on the LCU
    2110             : *   mode decision.
    2111             : *
    2112             : * Inputs:
    2113             : *   SourcePic
    2114             : *   Coding Results
    2115             : *   SB Location
    2116             : *   Sequence Control Set
    2117             : *   Picture Control Set
    2118             : *
    2119             : * Outputs:
    2120             : *   Reconstructed Samples
    2121             : *   Coefficient Samples
    2122             : *
    2123             : *******************************************/
    2124        7199 : EB_EXTERN void av1_encode_pass(
    2125             :     SequenceControlSet      *sequence_control_set_ptr,
    2126             :     PictureControlSet       *picture_control_set_ptr,
    2127             :     LargestCodingUnit       *sb_ptr,
    2128             :     uint32_t                 tbAddr,
    2129             :     uint32_t                 sb_origin_x,
    2130             :     uint32_t                 sb_origin_y,
    2131             :     EncDecContext           *context_ptr)
    2132             : {
    2133        7199 :     EbBool                    is16bit = context_ptr->is16bit;
    2134        7199 :     EbPictureBufferDesc    *recon_buffer = is16bit ? picture_control_set_ptr->recon_picture16bit_ptr : picture_control_set_ptr->recon_picture_ptr;
    2135        7199 :     EbPictureBufferDesc    *coeff_buffer_sb = sb_ptr->quantized_coeff;
    2136             :     EbPictureBufferDesc    *inputPicture;
    2137             :     ModeDecisionContext    *mdcontextPtr;
    2138        7199 :     mdcontextPtr = context_ptr->md_context;
    2139        7199 :     inputPicture = context_ptr->input_samples = (EbPictureBufferDesc*)picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
    2140             : 
    2141        7199 :     SbStat                *sb_stat_ptr = &(picture_control_set_ptr->parent_pcs_ptr->sb_stat_array[tbAddr]);
    2142             :     // SB Stats
    2143        7199 :     uint32_t                  sb_width = MIN(sequence_control_set_ptr->sb_size_pix, sequence_control_set_ptr->seq_header.max_frame_width - sb_origin_x);
    2144        7199 :     uint32_t                  sb_height = MIN(sequence_control_set_ptr->sb_size_pix, sequence_control_set_ptr->seq_header.max_frame_height - sb_origin_y);
    2145             :     // MV merge mode
    2146             :     uint32_t                  y_has_coeff;
    2147             :     uint32_t                  u_has_coeff;
    2148             :     uint32_t                  v_has_coeff;
    2149             :     uint64_t                  y_coeff_bits;
    2150             :     uint64_t                  cb_coeff_bits;
    2151             :     uint64_t                  cr_coeff_bits;
    2152             :     uint64_t                  y_full_distortion[DIST_CALC_TOTAL];
    2153             :     EB_ALIGN(16) uint64_t     yTuFullDistortion[DIST_CALC_TOTAL];
    2154             :     uint32_t                  count_non_zero_coeffs[3];
    2155             :     MacroblockPlane           cuPlane[3];
    2156             :     uint16_t                  eobs[MAX_TXB_COUNT][3];
    2157             :     uint64_t                  y_tu_coeff_bits;
    2158             :     uint64_t                  cb_tu_coeff_bits;
    2159             :     uint64_t                  cr_tu_coeff_bits;
    2160             :     EncodeContext          *encode_context_ptr;
    2161             :     // Dereferencing early
    2162        7199 :     NeighborArrayUnit      *ep_mode_type_neighbor_array = picture_control_set_ptr->ep_mode_type_neighbor_array;
    2163        7199 :     NeighborArrayUnit      *ep_intra_luma_mode_neighbor_array = picture_control_set_ptr->ep_intra_luma_mode_neighbor_array;
    2164        7199 :     NeighborArrayUnit      *ep_intra_chroma_mode_neighbor_array = picture_control_set_ptr->ep_intra_chroma_mode_neighbor_array;
    2165        7199 :     NeighborArrayUnit      *ep_mv_neighbor_array = picture_control_set_ptr->ep_mv_neighbor_array;
    2166        7199 :     NeighborArrayUnit      *ep_luma_recon_neighbor_array = is16bit ? picture_control_set_ptr->ep_luma_recon_neighbor_array16bit : picture_control_set_ptr->ep_luma_recon_neighbor_array;
    2167        7199 :     NeighborArrayUnit      *ep_cb_recon_neighbor_array = is16bit ? picture_control_set_ptr->ep_cb_recon_neighbor_array16bit : picture_control_set_ptr->ep_cb_recon_neighbor_array;
    2168        7199 :     NeighborArrayUnit      *ep_cr_recon_neighbor_array = is16bit ? picture_control_set_ptr->ep_cr_recon_neighbor_array16bit : picture_control_set_ptr->ep_cr_recon_neighbor_array;
    2169        7199 :     NeighborArrayUnit      *ep_skip_flag_neighbor_array = picture_control_set_ptr->ep_skip_flag_neighbor_array;
    2170             : 
    2171        7199 :     EbBool                 constrained_intra_flag = picture_control_set_ptr->constrained_intra_flag;
    2172             : 
    2173             : 
    2174        7199 :     EbBool dlfEnableFlag = (EbBool) picture_control_set_ptr->parent_pcs_ptr->loop_filter_mode;
    2175        7199 :     const EbBool isIntraLCU = picture_control_set_ptr->limit_intra ? EB_FALSE : EB_TRUE;
    2176             : 
    2177        7199 :     EbBool doRecon = (EbBool)(
    2178           0 :         (picture_control_set_ptr->limit_intra == 0 || isIntraLCU == 1) ||
    2179           0 :         picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag ||
    2180        7199 :         sequence_control_set_ptr->static_config.recon_enabled ||
    2181           0 :         sequence_control_set_ptr->static_config.stat_report);
    2182             : 
    2183        7199 :     EntropyCoder  *coeff_est_entropy_coder_ptr = picture_control_set_ptr->coeff_est_entropy_coder_ptr;
    2184             : 
    2185        7199 :     uint32_t           dZoffset = 0;
    2186        7199 :     context_ptr->skip_qpm_flag = EB_TRUE;
    2187             : 
    2188        7199 :     encode_context_ptr = ((SequenceControlSet*)(picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr))->encode_context_ptr;
    2189             : 
    2190        7199 :     if (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_TRUE)
    2191             :         //get the 16bit form of the input LCU
    2192        4080 :         if (is16bit)
    2193           0 :             recon_buffer = ((EbReferenceObject*)picture_control_set_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture16bit;
    2194             :         else
    2195        4080 :             recon_buffer = ((EbReferenceObject*)picture_control_set_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture;
    2196             :     else  // non ref pictures
    2197        3119 :         recon_buffer = is16bit ? picture_control_set_ptr->recon_picture16bit_ptr : picture_control_set_ptr->recon_picture_ptr;
    2198             : 
    2199             : 
    2200             :     // DeriveZeroLumaCbf
    2201        7199 :     EbBool  highIntraRef = EB_FALSE;
    2202        7199 :     EbBool  checkZeroLumaCbf = EB_FALSE;
    2203             : 
    2204        7199 :     if (is16bit) {
    2205             :         //SB128_TODO change 10bit SB creation
    2206             : 
    2207           0 :         if ((sequence_control_set_ptr->static_config.ten_bit_format == 1) || (sequence_control_set_ptr->static_config.compressed_ten_bit_format == 1))
    2208           0 :         {
    2209           0 :             const uint32_t input_luma_offset = ((sb_origin_y + inputPicture->origin_y)         * inputPicture->stride_y) + (sb_origin_x + inputPicture->origin_x);
    2210           0 :             const uint32_t input_cb_offset = (((sb_origin_y + inputPicture->origin_y) >> 1)  * inputPicture->stride_cb) + ((sb_origin_x + inputPicture->origin_x) >> 1);
    2211           0 :             const uint32_t input_cr_offset = (((sb_origin_y + inputPicture->origin_y) >> 1)  * inputPicture->stride_cr) + ((sb_origin_x + inputPicture->origin_x) >> 1);
    2212           0 :             const uint16_t luma2BitWidth = inputPicture->width / 4;
    2213           0 :             const uint16_t chroma2BitWidth = inputPicture->width / 8;
    2214             : 
    2215           0 :             compressed_pack_lcu(
    2216           0 :                 inputPicture->buffer_y + input_luma_offset,
    2217           0 :                 inputPicture->stride_y,
    2218           0 :                 inputPicture->buffer_bit_inc_y + sb_origin_y * luma2BitWidth + (sb_origin_x / 4)*sb_height,
    2219             :                 sb_width / 4,
    2220           0 :                 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_y,
    2221           0 :                 context_ptr->input_sample16bit_buffer->stride_y,
    2222             :                 sb_width,
    2223             :                 sb_height);
    2224             : 
    2225           0 :             compressed_pack_lcu(
    2226           0 :                 inputPicture->buffer_cb + input_cb_offset,
    2227           0 :                 inputPicture->stride_cb,
    2228           0 :                 inputPicture->buffer_bit_inc_cb + sb_origin_y / 2 * chroma2BitWidth + (sb_origin_x / 8)*(sb_height / 2),
    2229             :                 sb_width / 8,
    2230           0 :                 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cb,
    2231           0 :                 context_ptr->input_sample16bit_buffer->stride_cb,
    2232             :                 sb_width >> 1,
    2233             :                 sb_height >> 1);
    2234             : 
    2235           0 :             compressed_pack_lcu(
    2236           0 :                 inputPicture->buffer_cr + input_cr_offset,
    2237           0 :                 inputPicture->stride_cr,
    2238           0 :                 inputPicture->buffer_bit_inc_cr + sb_origin_y / 2 * chroma2BitWidth + (sb_origin_x / 8)*(sb_height / 2),
    2239             :                 sb_width / 8,
    2240           0 :                 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cr,
    2241           0 :                 context_ptr->input_sample16bit_buffer->stride_cr,
    2242             :                 sb_width >> 1,
    2243             :                 sb_height >> 1);
    2244             :         }
    2245             :         else {
    2246           0 :             const uint32_t input_luma_offset = ((sb_origin_y + inputPicture->origin_y)         * inputPicture->stride_y) + (sb_origin_x + inputPicture->origin_x);
    2247           0 :             const uint32_t inputBitIncLumaOffset = ((sb_origin_y + inputPicture->origin_y)         * inputPicture->stride_bit_inc_y) + (sb_origin_x + inputPicture->origin_x);
    2248           0 :             const uint32_t input_cb_offset = (((sb_origin_y + inputPicture->origin_y) >> 1)  * inputPicture->stride_cb) + ((sb_origin_x + inputPicture->origin_x) >> 1);
    2249           0 :             const uint32_t inputBitIncCbOffset = (((sb_origin_y + inputPicture->origin_y) >> 1)  * inputPicture->stride_bit_inc_cb) + ((sb_origin_x + inputPicture->origin_x) >> 1);
    2250           0 :             const uint32_t input_cr_offset = (((sb_origin_y + inputPicture->origin_y) >> 1)  * inputPicture->stride_cr) + ((sb_origin_x + inputPicture->origin_x) >> 1);
    2251           0 :             const uint32_t inputBitIncCrOffset = (((sb_origin_y + inputPicture->origin_y) >> 1)  * inputPicture->stride_bit_inc_cr) + ((sb_origin_x + inputPicture->origin_x) >> 1);
    2252             : 
    2253           0 :             pack2d_src(
    2254           0 :                 inputPicture->buffer_y + input_luma_offset,
    2255           0 :                 inputPicture->stride_y,
    2256           0 :                 inputPicture->buffer_bit_inc_y + inputBitIncLumaOffset,
    2257           0 :                 inputPicture->stride_bit_inc_y,
    2258           0 :                 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_y,
    2259           0 :                 context_ptr->input_sample16bit_buffer->stride_y,
    2260             :                 sb_width,
    2261             :                 sb_height);
    2262             : 
    2263           0 :             pack2d_src(
    2264           0 :                 inputPicture->buffer_cb + input_cb_offset,
    2265           0 :                 inputPicture->stride_cr,
    2266           0 :                 inputPicture->buffer_bit_inc_cb + inputBitIncCbOffset,
    2267           0 :                 inputPicture->stride_bit_inc_cr,
    2268           0 :                 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cb,
    2269           0 :                 context_ptr->input_sample16bit_buffer->stride_cb,
    2270             :                 sb_width >> 1,
    2271             :                 sb_height >> 1);
    2272             : 
    2273           0 :             pack2d_src(
    2274           0 :                 inputPicture->buffer_cr + input_cr_offset,
    2275           0 :                 inputPicture->stride_cr,
    2276           0 :                 inputPicture->buffer_bit_inc_cr + inputBitIncCrOffset,
    2277           0 :                 inputPicture->stride_bit_inc_cr,
    2278           0 :                 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cr,
    2279           0 :                 context_ptr->input_sample16bit_buffer->stride_cr,
    2280             :                 sb_width >> 1,
    2281             :                 sb_height >> 1);
    2282             :         }
    2283             : 
    2284           0 :         if (context_ptr->md_context->hbd_mode_decision == 0)
    2285           0 :             Store16bitInputSrc(context_ptr->input_sample16bit_buffer, picture_control_set_ptr, sb_origin_x, sb_origin_y, sb_width, sb_height);
    2286             :     }
    2287             : 
    2288        7199 :     if ((sequence_control_set_ptr->input_resolution == INPUT_SIZE_4K_RANGE) && !picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) {
    2289           0 :         if (!((sb_stat_ptr->stationary_edge_over_time_flag) || (picture_control_set_ptr->parent_pcs_ptr->logo_pic_flag)))
    2290             :         {
    2291           0 :             if (picture_control_set_ptr->slice_type == B_SLICE) {
    2292           0 :                 EbReferenceObject  *refObjL0 = (EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[REF_LIST_0][0]->object_ptr;
    2293           0 :                 EbReferenceObject  *refObjL1 = (EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[REF_LIST_1][0]->object_ptr;
    2294           0 :                 uint32_t const TH = (sequence_control_set_ptr->static_config.frame_rate >> 16) < 50 ? 25 : 30;
    2295             : 
    2296           0 :                 if ((refObjL0->tmp_layer_idx == 2 && refObjL0->intra_coded_area > TH) || (refObjL1->tmp_layer_idx == 2 && refObjL1->intra_coded_area > TH))
    2297           0 :                     highIntraRef = EB_TRUE;
    2298             :             }
    2299           0 :             if (highIntraRef == EB_FALSE)
    2300           0 :                 checkZeroLumaCbf = EB_TRUE;
    2301             :         }
    2302             :     }
    2303        7199 :     context_ptr->intra_coded_area_sb[tbAddr] = 0;
    2304        7199 :     context_ptr->coded_area_sb = 0;
    2305        7199 :     context_ptr->coded_area_sb_uv = 0;
    2306             : 
    2307             : #if AV1_LF
    2308        7199 :     if (dlfEnableFlag && picture_control_set_ptr->parent_pcs_ptr->loop_filter_mode == 1){
    2309        1800 :         if (tbAddr == 0) {
    2310          30 :             eb_av1_loop_filter_init(picture_control_set_ptr);
    2311             : 
    2312          30 :             eb_av1_pick_filter_level(
    2313             :                 0,
    2314          30 :                 (EbPictureBufferDesc*)picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr,
    2315             :                 picture_control_set_ptr,
    2316             :                 LPF_PICK_FROM_Q);
    2317             : 
    2318          30 :             eb_av1_loop_filter_frame_init(
    2319          30 :                 &picture_control_set_ptr->parent_pcs_ptr->frm_hdr,
    2320          30 :                 &picture_control_set_ptr->parent_pcs_ptr->lf_info, 0, 3);
    2321             :         }
    2322             :     }
    2323             : #endif
    2324             : 
    2325        7199 :     uint8_t allow_update_cdf = picture_control_set_ptr->update_cdf;
    2326             : 
    2327        7199 :     uint32_t final_cu_itr = 0;
    2328             : 
    2329             :     // CU Loop
    2330             : 
    2331        7199 :     uint32_t    blk_it = 0;
    2332             : 
    2333      134297 :     while (blk_it < sequence_control_set_ptr->max_block_cnt) {
    2334      127110 :         CodingUnit  *cu_ptr = context_ptr->cu_ptr = &context_ptr->md_context->md_cu_arr_nsq[blk_it];
    2335      127110 :         PartitionType part = cu_ptr->part;
    2336             : 
    2337      127110 :         const BlockGeom * blk_geom = context_ptr->blk_geom = get_blk_geom_mds(blk_it);
    2338             :         UNUSED(blk_geom);
    2339      127101 :         sb_ptr->cu_partition_array[blk_it] = context_ptr->md_context->md_cu_arr_nsq[blk_it].part;
    2340      127101 :         if (part != PARTITION_SPLIT && sequence_control_set_ptr->sb_geom[tbAddr].block_is_allowed[blk_it]) {
    2341       29201 :             int32_t offset_d1 = ns_blk_offset[(int32_t)part]; //cu_ptr->best_d1_blk; // TOCKECK
    2342       29201 :             int32_t num_d1_block = ns_blk_num[(int32_t)part]; // context_ptr->blk_geom->totns; // TOCKECK
    2343             : 
    2344             :            // for (int32_t d1_itr = blk_it; d1_itr < blk_it + num_d1_block; d1_itr++) {
    2345       63039 :             for (int32_t d1_itr = (int32_t)blk_it + offset_d1; d1_itr < (int32_t)blk_it + offset_d1 + num_d1_block; d1_itr++) {
    2346       33841 :                 const BlockGeom * blk_geom = context_ptr->blk_geom = get_blk_geom_mds(d1_itr);
    2347             : 
    2348             :                 // PU Stack variables
    2349       33841 :                 PredictionUnit        *pu_ptr = (PredictionUnit *)EB_NULL; //  done
    2350       33841 :                 EbPictureBufferDesc   *residual_buffer = context_ptr->residual_buffer;
    2351       33841 :                 EbPictureBufferDesc   *transform_buffer = context_ptr->transform_buffer;
    2352             : 
    2353       33841 :                 EbPictureBufferDesc   *inverse_quant_buffer = context_ptr->inverse_quant_buffer;
    2354             : 
    2355       33841 :                 int16_t                  *transform_inner_array_ptr = context_ptr->transform_inner_array_ptr;
    2356             : 
    2357       33841 :                 CodingUnit            *cu_ptr = context_ptr->cu_ptr = &context_ptr->md_context->md_cu_arr_nsq[d1_itr];
    2358             : 
    2359       33841 :                 context_ptr->cu_origin_x = (uint16_t)(sb_origin_x + blk_geom->origin_x);
    2360       33841 :                 context_ptr->cu_origin_y = (uint16_t)(sb_origin_y + blk_geom->origin_y);
    2361       33841 :                 cu_ptr->delta_qp = 0;
    2362       33841 :                 context_ptr->md_skip_blk = context_ptr->md_context->blk_skip_decision ? ((cu_ptr->prediction_mode_flag == INTRA_MODE || cu_ptr->block_has_coeff) ? 0 : 1) : 0;
    2363       33841 :                 cu_ptr->block_has_coeff = 0;
    2364             : 
    2365             :                 // if(picture_control_set_ptr->picture_number==4 && context_ptr->cu_origin_x==0 && context_ptr->cu_origin_y==0)
    2366             :                 //     printf("CHEDD");
    2367       33841 :                 uint32_t  coded_area_org = context_ptr->coded_area_sb;
    2368       33841 :                 uint32_t  coded_area_org_uv = context_ptr->coded_area_sb_uv;
    2369             : 
    2370             :                 // Derive disable_cfl_flag as evaluate_cfl_ep = f(disable_cfl_flag)
    2371       95822 :                 EbBool disable_cfl_flag = (context_ptr->blk_geom->sq_size > 32 ||
    2372       28140 :                     context_ptr->blk_geom->bwidth == 4 ||
    2373       61981 :                     context_ptr->blk_geom->bheight == 4) ? EB_TRUE : EB_FALSE;
    2374             :                 // Evaluate cfl @ EP if applicable, and not done @ MD
    2375       33841 :                 context_ptr->evaluate_cfl_ep = (disable_cfl_flag == EB_FALSE && context_ptr->md_context->chroma_level == CHROMA_MODE_2);
    2376             :                 // for now, segmentation independent of sharpness/delta QP.
    2377       33841 :                 if (picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled) {
    2378           0 :                     apply_segmentation_based_quantization(
    2379             :                         blk_geom,
    2380             :                         picture_control_set_ptr,
    2381             :                         sb_ptr,
    2382             :                         cu_ptr);
    2383             : 
    2384           0 :                     sb_ptr->qp = cu_ptr->qp;
    2385             :                 }
    2386             :                 else {
    2387       33841 :                     cu_ptr->qp = sb_ptr->qp;
    2388       33841 :                     cu_ptr->delta_qp = sb_ptr->delta_qp;
    2389             :                 }
    2390             : 
    2391             : 
    2392       33841 :                 if (cu_ptr->prediction_mode_flag == INTRA_MODE) {
    2393        6418 :                     context_ptr->is_inter = cu_ptr->av1xd->use_intrabc;
    2394        6418 :                     context_ptr->tot_intra_coded_area += blk_geom->bwidth* blk_geom->bheight;
    2395        6418 :                     if (picture_control_set_ptr->slice_type != I_SLICE)
    2396        1542 :                         context_ptr->intra_coded_area_sb[tbAddr] += blk_geom->bwidth* blk_geom->bheight;
    2397             : 
    2398             : 
    2399             : 
    2400             : #if PAL_SUP
    2401        6418 :                     if (sequence_control_set_ptr->static_config.encoder_bit_depth > EB_8BIT && picture_control_set_ptr->hbd_mode_decision==0 &&
    2402           0 :                         cu_ptr->palette_info.pmi.palette_size[0] > 0){
    2403             :                         //MD was done on 8bit, scale  palette colors to 10bit
    2404           0 :                         for (uint8_t col = 0; col < cu_ptr->palette_info.pmi.palette_size[0]; col++)
    2405           0 :                             cu_ptr->palette_info.pmi.palette_colors[col] *= 4;
    2406             :                     }
    2407             : #endif
    2408             :                     // *Note - Transforms are the same size as predictions
    2409             :                     // Partition Loop
    2410        6418 :                     context_ptr->txb_itr = 0;
    2411             :                     // Transform partitioning path (INTRA Luma/Chroma)
    2412             : #if ATB_10_BIT
    2413        6418 :                     if ( cu_ptr->av1xd->use_intrabc == 0) {
    2414             : #else
    2415             :                     if (sequence_control_set_ptr->static_config.encoder_bit_depth == EB_8BIT && cu_ptr->av1xd->use_intrabc == 0) {
    2416             : #endif
    2417             :                         // Set the PU Loop Variables
    2418        6418 :                         pu_ptr = cu_ptr->prediction_unit_array;
    2419             :                         // Generate Intra Luma Neighbor Modes
    2420        6418 :                         GeneratePuIntraLumaNeighborModes(
    2421             :                             cu_ptr,
    2422        6418 :                             context_ptr->cu_origin_x,
    2423        6418 :                             context_ptr->cu_origin_y,
    2424             :                             BLOCK_SIZE_64,
    2425             :                             ep_intra_luma_mode_neighbor_array,
    2426             :                             ep_intra_chroma_mode_neighbor_array,
    2427             :                             ep_mode_type_neighbor_array);
    2428             : 
    2429        6417 :                         perform_intra_coding_loop(
    2430             :                             picture_control_set_ptr,
    2431             :                             sb_ptr,
    2432             :                             tbAddr,
    2433             :                             cu_ptr,
    2434             :                             pu_ptr,
    2435             :                             context_ptr,
    2436             :                             dZoffset);
    2437             : 
    2438             :                         // Update the Intra-specific Neighbor Arrays
    2439        6418 :                         EncodePassUpdateIntraModeNeighborArrays(
    2440             :                             ep_mode_type_neighbor_array,
    2441             :                             ep_intra_luma_mode_neighbor_array,
    2442             :                             ep_intra_chroma_mode_neighbor_array,
    2443        6418 :                             (uint8_t)cu_ptr->pred_mode,
    2444        6418 :                             (uint8_t)pu_ptr->intra_chroma_mode,
    2445        6418 :                             context_ptr->cu_origin_x,
    2446        6418 :                             context_ptr->cu_origin_y,
    2447        6418 :                             context_ptr->blk_geom->bwidth,
    2448        6418 :                             context_ptr->blk_geom->bheight,
    2449        6418 :                             context_ptr->blk_geom->bwidth_uv,
    2450        6418 :                             context_ptr->blk_geom->bheight_uv,
    2451        6418 :                             blk_geom->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK);
    2452             : 
    2453             :                     }
    2454             :                     // Transform partitioning free patch (except the 128x128 case)
    2455             :                     else
    2456             :                     {
    2457             :                         // Set the PU Loop Variables
    2458           0 :                         pu_ptr = cu_ptr->prediction_unit_array;
    2459             :                         // Generate Intra Luma Neighbor Modes
    2460           0 :                         GeneratePuIntraLumaNeighborModes( // HT done
    2461             :                             cu_ptr,
    2462           0 :                             context_ptr->cu_origin_x,
    2463           0 :                             context_ptr->cu_origin_y,
    2464             :                             BLOCK_SIZE_64,
    2465             :                             ep_intra_luma_mode_neighbor_array,
    2466             :                             ep_intra_chroma_mode_neighbor_array,
    2467             :                             ep_mode_type_neighbor_array);
    2468             : 
    2469             :                         {
    2470           0 :                            uint32_t cu_originy_uv = (context_ptr->cu_origin_y >> 3 << 3) >> 1;
    2471           0 :                            uint32_t cu_originx_uv = (context_ptr->cu_origin_x >> 3 << 3) >> 1;
    2472             : 
    2473           0 :                            context_ptr->md_context->luma_txb_skip_context = 0;
    2474           0 :                            context_ptr->md_context->luma_dc_sign_context = 0;
    2475           0 :                            get_txb_ctx(
    2476           0 :                                picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr,
    2477             :                                COMPONENT_LUMA,
    2478             :                                picture_control_set_ptr->ep_luma_dc_sign_level_coeff_neighbor_array,
    2479           0 :                                context_ptr->cu_origin_x,
    2480           0 :                                context_ptr->cu_origin_y,
    2481           0 :                                context_ptr->blk_geom->bsize,
    2482           0 :                                context_ptr->blk_geom->txsize[0][0],
    2483           0 :                                &context_ptr->md_context->luma_txb_skip_context,
    2484           0 :                                &context_ptr->md_context->luma_dc_sign_context);
    2485             : 
    2486           0 :                            if (context_ptr->blk_geom->has_uv) {
    2487           0 :                                context_ptr->md_context->cb_txb_skip_context = 0;
    2488           0 :                                context_ptr->md_context->cb_dc_sign_context = 0;
    2489           0 :                                get_txb_ctx(
    2490           0 :                                    picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr,
    2491             :                                    COMPONENT_CHROMA,
    2492             :                                    picture_control_set_ptr->ep_cb_dc_sign_level_coeff_neighbor_array,
    2493             :                                    cu_originx_uv,
    2494             :                                    cu_originy_uv,
    2495           0 :                                    context_ptr->blk_geom->bsize_uv,
    2496           0 :                                    context_ptr->blk_geom->txsize_uv[0][0],
    2497           0 :                                    &context_ptr->md_context->cb_txb_skip_context,
    2498           0 :                                    &context_ptr->md_context->cb_dc_sign_context);
    2499             : 
    2500             : 
    2501           0 :                                context_ptr->md_context->cr_txb_skip_context = 0;
    2502           0 :                                context_ptr->md_context->cr_dc_sign_context = 0;
    2503           0 :                                get_txb_ctx(
    2504           0 :                                    picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr,
    2505             :                                    COMPONENT_CHROMA,
    2506             :                                    picture_control_set_ptr->ep_cr_dc_sign_level_coeff_neighbor_array,
    2507             :                                    cu_originx_uv,
    2508             :                                    cu_originy_uv,
    2509           0 :                                    context_ptr->blk_geom->bsize_uv,
    2510           0 :                                    context_ptr->blk_geom->txsize_uv[0][0],
    2511           0 :                                    &context_ptr->md_context->cr_txb_skip_context,
    2512           0 :                                    &context_ptr->md_context->cr_dc_sign_context);
    2513             :                            }
    2514             : #if !ATB_10_BIT
    2515             :                             if (cu_ptr->av1xd->use_intrabc)
    2516             : #endif
    2517             :                             {
    2518           0 :                                 MvReferenceFrame ref_frame = INTRA_FRAME;
    2519           0 :                                 generate_av1_mvp_table(
    2520             :                                     &sb_ptr->tile_info,
    2521           0 :                                     context_ptr->md_context,
    2522             :                                     cu_ptr,
    2523             :                                     context_ptr->blk_geom,
    2524           0 :                                     context_ptr->cu_origin_x,
    2525           0 :                                     context_ptr->cu_origin_y,
    2526             :                                     &ref_frame,
    2527             :                                     1,
    2528             :                                     picture_control_set_ptr);
    2529             : 
    2530             :                                 IntMv nearestmv, nearmv;
    2531           0 :                                 eb_av1_find_best_ref_mvs_from_stack(0, context_ptr->md_context->md_local_cu_unit[blk_geom->blkidx_mds].ed_ref_mv_stack, cu_ptr->av1xd, ref_frame, &nearestmv, &nearmv,
    2532             :                                     0);
    2533             : 
    2534           0 :                                 if (nearestmv.as_int == INVALID_MV)
    2535           0 :                                     nearestmv.as_int = 0;
    2536           0 :                                 if (nearmv.as_int == INVALID_MV)
    2537           0 :                                     nearmv.as_int = 0;
    2538           0 :                                 IntMv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
    2539           0 :                                 if (dv_ref.as_int == 0)
    2540           0 :                                     av1_find_ref_dv(&dv_ref, &cu_ptr->av1xd->tile, sequence_control_set_ptr->seq_header.sb_mi_size, context_ptr->cu_origin_y >> MI_SIZE_LOG2, context_ptr->cu_origin_x >> MI_SIZE_LOG2);
    2541             :                                 // Ref DV should not have sub-pel.
    2542           0 :                                 assert((dv_ref.as_mv.col & 7) == 0);
    2543           0 :                                 assert((dv_ref.as_mv.row & 7) == 0);
    2544           0 :                                 context_ptr->md_context->md_local_cu_unit[blk_geom->blkidx_mds].ed_ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
    2545           0 :                                 cu_ptr->predmv[0] = dv_ref;
    2546             : 
    2547             :                                 //keep final usefull mvp for entropy
    2548           0 :                                 memcpy(cu_ptr->av1xd->final_ref_mv_stack,
    2549           0 :                                     context_ptr->md_context->md_local_cu_unit[context_ptr->blk_geom->blkidx_mds].ed_ref_mv_stack[cu_ptr->prediction_unit_array[0].ref_frame_type],
    2550             :                                     sizeof(CandidateMv)*MAX_REF_MV_STACK_SIZE);
    2551             : 
    2552           0 :                                 pu_ptr = cu_ptr->prediction_unit_array;
    2553             :                                 // Set MvUnit
    2554           0 :                                 context_ptr->mv_unit.pred_direction = (uint8_t)pu_ptr->inter_pred_direction_index;
    2555           0 :                                 context_ptr->mv_unit.mv[REF_LIST_0].mv_union = pu_ptr->mv[REF_LIST_0].mv_union;
    2556           0 :                                 context_ptr->mv_unit.mv[REF_LIST_1].mv_union = pu_ptr->mv[REF_LIST_1].mv_union;
    2557             : 
    2558           0 :                                 EbPictureBufferDesc * ref_pic_list0 = ((EbReferenceObject*)picture_control_set_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture;
    2559             : 
    2560           0 :                                 if (is16bit)
    2561           0 :                                     ref_pic_list0 = ((EbReferenceObject*)picture_control_set_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture16bit;
    2562             : 
    2563           0 :                                 av1_inter_prediction_function_table[is16bit](
    2564             :                                     picture_control_set_ptr,
    2565             :                                     cu_ptr->interp_filters,
    2566             :                                     cu_ptr,
    2567           0 :                                     cu_ptr->prediction_unit_array->ref_frame_type,
    2568             :                                     &context_ptr->mv_unit,
    2569             :                                     1,// use_intrabc,
    2570             : #if OBMC_FLAG
    2571             :                                     SIMPLE_TRANSLATION,
    2572             :                                     0,
    2573             :                                     0,
    2574             : #endif
    2575             :                                     1,
    2576             :                                     &cu_ptr->interinter_comp,
    2577             : #if II_COMP_FLAG
    2578             :                                     &sb_ptr->tile_info,
    2579             :                                     ep_luma_recon_neighbor_array,
    2580             :                                     ep_cb_recon_neighbor_array ,
    2581             :                                     ep_cr_recon_neighbor_array ,
    2582           0 :                                     cu_ptr->is_interintra_used,
    2583           0 :                                     cu_ptr->interintra_mode,
    2584           0 :                                     cu_ptr->use_wedge_interintra,
    2585             :                                     cu_ptr->interintra_wedge_index,
    2586             : 
    2587             : #endif
    2588           0 :                                     context_ptr->cu_origin_x,
    2589           0 :                                     context_ptr->cu_origin_y,
    2590           0 :                                     blk_geom->bwidth,
    2591           0 :                                     blk_geom->bheight,
    2592             :                                     ref_pic_list0,
    2593             :                                     0,
    2594             :                                     recon_buffer,
    2595           0 :                                     context_ptr->cu_origin_x,
    2596           0 :                                     context_ptr->cu_origin_y,
    2597             :                                     EB_TRUE,
    2598           0 :                                     (uint8_t)sequence_control_set_ptr->static_config.encoder_bit_depth);
    2599             :                             }
    2600             : #if !ATB_10_BIT
    2601             :                             else
    2602             :                             {
    2603             :                                 if (is16bit) {
    2604             :                                     uint16_t    topNeighArray[64 * 2 + 1];
    2605             :                                     uint16_t    leftNeighArray[64 * 2 + 1];
    2606             :                                     PredictionMode mode;
    2607             : 
    2608             :                                 int32_t plane_end = blk_geom->has_uv ? 2 : 0;
    2609             : 
    2610             :                                 for (int32_t plane = 0; plane <= plane_end; ++plane) {
    2611             :                                     TxSize  tx_size = plane ? blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr] : blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr];
    2612             :                                     if (plane == 0) {
    2613             :                                         if (context_ptr->cu_origin_y != 0)
    2614             :                                             memcpy(topNeighArray + 1, (uint16_t*)(ep_luma_recon_neighbor_array->top_array) + context_ptr->cu_origin_x, blk_geom->bwidth * 2 * sizeof(uint16_t));
    2615             :                                         if (context_ptr->cu_origin_x != 0)
    2616             :                                             memcpy(leftNeighArray + 1, (uint16_t*)(ep_luma_recon_neighbor_array->left_array) + context_ptr->cu_origin_y, blk_geom->bheight * 2 * sizeof(uint16_t));
    2617             :                                         if (context_ptr->cu_origin_y != 0 && context_ptr->cu_origin_x != 0)
    2618             :                                             topNeighArray[0] = leftNeighArray[0] = ((uint16_t*)(ep_luma_recon_neighbor_array->top_left_array) + MAX_PICTURE_HEIGHT_SIZE + context_ptr->cu_origin_x - context_ptr->cu_origin_y)[0];
    2619             :                                     }
    2620             : 
    2621             :                                     else if (plane == 1) {
    2622             :                                         if (cu_originy_uv != 0)
    2623             :                                             memcpy(topNeighArray + 1, (uint16_t*)(ep_cb_recon_neighbor_array->top_array) + cu_originx_uv, blk_geom->bwidth_uv * 2 * sizeof(uint16_t));
    2624             :                                         if (cu_originx_uv != 0)
    2625             :                                             memcpy(leftNeighArray + 1, (uint16_t*)(ep_cb_recon_neighbor_array->left_array) + cu_originy_uv, blk_geom->bheight_uv * 2 * sizeof(uint16_t));
    2626             :                                         if (cu_originy_uv != 0 && cu_originx_uv != 0)
    2627             :                                             topNeighArray[0] = leftNeighArray[0] = ((uint16_t*)(ep_cb_recon_neighbor_array->top_left_array) + MAX_PICTURE_HEIGHT_SIZE / 2 + cu_originx_uv - cu_originy_uv)[0];
    2628             :                                     }
    2629             :                                     else {
    2630             :                                         if (cu_originy_uv != 0)
    2631             :                                             memcpy(topNeighArray + 1, (uint16_t*)(ep_cr_recon_neighbor_array->top_array) + cu_originx_uv, blk_geom->bwidth_uv * 2 * sizeof(uint16_t));
    2632             :                                         if (cu_originx_uv != 0)
    2633             :                                             memcpy(leftNeighArray + 1, (uint16_t*)(ep_cr_recon_neighbor_array->left_array) + cu_originy_uv, blk_geom->bheight_uv * 2 * sizeof(uint16_t));
    2634             :                                         if (cu_originy_uv != 0 && cu_originx_uv != 0)
    2635             :                                             topNeighArray[0] = leftNeighArray[0] = ((uint16_t*)(ep_cr_recon_neighbor_array->top_left_array) + MAX_PICTURE_HEIGHT_SIZE / 2 + cu_originx_uv - cu_originy_uv)[0];
    2636             :                                     }
    2637             : 
    2638             :                                     if (plane)
    2639             :                                         mode = (pu_ptr->intra_chroma_mode == UV_CFL_PRED) ? (PredictionMode)UV_DC_PRED : (PredictionMode)pu_ptr->intra_chroma_mode;
    2640             :                                     else
    2641             :                                         mode = cu_ptr->pred_mode; //PredictionMode mode,
    2642             :                                     eb_av1_predict_intra_block_16bit(
    2643             :                                         &sb_ptr->tile_info,
    2644             :                                         ED_STAGE,
    2645             :                                         context_ptr->blk_geom,
    2646             :                                         picture_control_set_ptr->parent_pcs_ptr->av1_cm,                  //const Av1Common *cm,
    2647             :                                         plane ? blk_geom->bwidth_uv : blk_geom->bwidth,                  //int32_t wpx,
    2648             :                                         plane ? blk_geom->bheight_uv : blk_geom->bheight,                  //int32_t hpx,
    2649             :                                         tx_size,
    2650             :                                         mode,                                                       //PredictionMode mode,
    2651             :                                         plane ? pu_ptr->angle_delta[PLANE_TYPE_UV] : pu_ptr->angle_delta[PLANE_TYPE_Y],
    2652             : #if PAL_SUP
    2653             :                                         plane ? 0    : cu_ptr->palette_info.pmi.palette_size[0] > 0,
    2654             :                                         plane ? NULL : &cu_ptr->palette_info,
    2655             : #else
    2656             :                                         0,                                                          //int32_t use_palette,
    2657             : #endif
    2658             : 
    2659             : #if FILTER_INTRA_FLAG
    2660             :                                         plane ? FILTER_INTRA_MODES : cu_ptr->filter_intra_mode,
    2661             : #else
    2662             :                                         FILTER_INTRA_MODES,                                         //CHKN FilterIntraMode filter_intra_mode,
    2663             : #endif
    2664             :                                         topNeighArray + 1,
    2665             :                                         leftNeighArray + 1,
    2666             :                                         recon_buffer,                                                //uint8_t *dst,
    2667             :                                         //int32_t dst_stride,
    2668             :                                         0,                                                          //int32_t col_off,
    2669             :                                         0,                                                          //int32_t row_off,
    2670             :                                         plane,                                                      //int32_t plane,
    2671             :                                         blk_geom->bsize,                  //uint32_t puSize,
    2672             :                                         context_ptr->cu_origin_x,
    2673             :                                         context_ptr->cu_origin_y,
    2674             :                                         context_ptr->cu_origin_x,  //uint32_t cuOrgX,
    2675             :                                         context_ptr->cu_origin_y,
    2676             :                                         0,                          // MD ONLY - NOT USED BY ENCDEC
    2677             :                                         0);                         //uint32_t cuOrgY
    2678             :                                 }
    2679             :                             }
    2680             :                             else {
    2681             :                                 uint8_t    topNeighArray[64 * 2 + 1];
    2682             :                                 uint8_t    leftNeighArray[64 * 2 + 1];
    2683             :                                 PredictionMode mode;
    2684             :                                 // Partition Loop
    2685             :                                 int32_t plane_end = blk_geom->has_uv ? 2 : 0;
    2686             : 
    2687             :                                 for (int32_t plane = 0; plane <= plane_end; ++plane) {
    2688             :                                     TxSize  tx_size = plane ? blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr] : blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr];
    2689             :                                     if (plane == 0) {
    2690             :                                         if (context_ptr->cu_origin_y != 0)
    2691             :                                             memcpy(topNeighArray + 1, ep_luma_recon_neighbor_array->top_array + context_ptr->cu_origin_x, blk_geom->bwidth * 2);
    2692             : 
    2693             :                                         if (context_ptr->cu_origin_x != 0)
    2694             :                                             memcpy(leftNeighArray + 1, ep_luma_recon_neighbor_array->left_array + context_ptr->cu_origin_y, blk_geom->bheight * 2);
    2695             : 
    2696             :                                         if (context_ptr->cu_origin_y != 0 && context_ptr->cu_origin_x != 0)
    2697             :                                             topNeighArray[0] = leftNeighArray[0] = ep_luma_recon_neighbor_array->top_left_array[MAX_PICTURE_HEIGHT_SIZE + context_ptr->cu_origin_x - context_ptr->cu_origin_y];
    2698             :                                     }
    2699             : 
    2700             :                                     else if (plane == 1) {
    2701             :                                         if (cu_originy_uv != 0)
    2702             :                                             memcpy(topNeighArray + 1, ep_cb_recon_neighbor_array->top_array + cu_originx_uv, blk_geom->bwidth_uv * 2);
    2703             : 
    2704             :                                         if (cu_originx_uv != 0)
    2705             :                                             memcpy(leftNeighArray + 1, ep_cb_recon_neighbor_array->left_array + cu_originy_uv, blk_geom->bheight_uv * 2);
    2706             : 
    2707             :                                         if (cu_originy_uv != 0 && cu_originx_uv != 0)
    2708             :                                             topNeighArray[0] = leftNeighArray[0] = ep_cb_recon_neighbor_array->top_left_array[MAX_PICTURE_HEIGHT_SIZE / 2 + cu_originx_uv - cu_originy_uv];
    2709             :                                     }
    2710             :                                     else {
    2711             :                                         if (cu_originy_uv != 0)
    2712             :                                             memcpy(topNeighArray + 1, ep_cr_recon_neighbor_array->top_array + cu_originx_uv, blk_geom->bwidth_uv * 2);
    2713             : 
    2714             :                                         if (cu_originx_uv != 0)
    2715             :                                             memcpy(leftNeighArray + 1, ep_cr_recon_neighbor_array->left_array + cu_originy_uv, blk_geom->bheight_uv * 2);
    2716             : 
    2717             :                                         if (cu_originy_uv != 0 && cu_originx_uv != 0)
    2718             :                                             topNeighArray[0] = leftNeighArray[0] = ep_cr_recon_neighbor_array->top_left_array[MAX_PICTURE_HEIGHT_SIZE / 2 + cu_originx_uv - cu_originy_uv];
    2719             :                                     }
    2720             : 
    2721             :                                     if (plane)
    2722             :                                         mode = (pu_ptr->intra_chroma_mode == UV_CFL_PRED) ? (PredictionMode)UV_DC_PRED : (PredictionMode)pu_ptr->intra_chroma_mode;
    2723             :                                     else
    2724             :                                         mode = cu_ptr->pred_mode; //PredictionMode mode,
    2725             :                                     // Hsan: if CHROMA_MODE_2, then CFL will be evaluated @ EP as no CHROMA @ MD
    2726             :                                     // If that's the case then you should ensure than the 1st chroma prediction uses UV_DC_PRED (that's the default configuration for CHROMA_MODE_2 if CFL applicable (set @ fast loop candidates injection) then MD assumes chroma mode always UV_DC_PRED)
    2727             :                                     eb_av1_predict_intra_block(
    2728             :                                         &sb_ptr->tile_info,
    2729             :                                         ED_STAGE,
    2730             :                                         context_ptr->blk_geom,
    2731             :                                         picture_control_set_ptr->parent_pcs_ptr->av1_cm,                  //const Av1Common *cm,
    2732             :                                         plane ? blk_geom->bwidth_uv : blk_geom->bwidth,                   //int32_t wpx,
    2733             :                                         plane ? blk_geom->bheight_uv : blk_geom->bheight,                  //int32_t hpx,
    2734             :                                         tx_size,
    2735             :                                         mode,                                                       //PredictionMode mode,
    2736             :                                         plane ? pu_ptr->angle_delta[PLANE_TYPE_UV] : pu_ptr->angle_delta[PLANE_TYPE_Y],
    2737             : #if PAL_SUP
    2738             :                                         1,
    2739             :                                         NULL, //coz we should not get here ?
    2740             : #else
    2741             :                                         0,                                                          //int32_t use_palette,
    2742             : #endif
    2743             :                                         FILTER_INTRA_MODES,                                         //CHKN FilterIntraMode filter_intra_mode,
    2744             :                                         topNeighArray + 1,
    2745             :                                         leftNeighArray + 1,
    2746             :                                         recon_buffer,                                                //uint8_t *dst,
    2747             :                                         //int32_t dst_stride,
    2748             :                                         0,                                                          //int32_t col_off,
    2749             :                                         0,                                                          //int32_t row_off,
    2750             :                                         plane,                                                      //int32_t plane,
    2751             :                                         blk_geom->bsize,                  //uint32_t puSize,
    2752             :                                         context_ptr->cu_origin_x,
    2753             :                                         context_ptr->cu_origin_y,
    2754             :                                         context_ptr->cu_origin_x,
    2755             :                                         context_ptr->cu_origin_y,
    2756             :                                         0,  // MD ONLY - NOT USED BY ENCDEC
    2757             :                                         0);
    2758             :                                 }
    2759             :                                 }
    2760             :                             }
    2761             : #endif
    2762             :                             // Encode Transform Unit -INTRA-
    2763             :                             {
    2764           0 :                                 uint16_t             cb_qp = cu_ptr->qp;
    2765             : 
    2766           0 :                                 Av1EncodeLoopFunctionTable[is16bit](
    2767             :                                     picture_control_set_ptr,
    2768             :                                     context_ptr,
    2769             :                                     sb_ptr,
    2770           0 :                                     context_ptr->cu_origin_x,
    2771           0 :                                     context_ptr->cu_origin_y,
    2772             :                                     cb_qp,
    2773             :                                     recon_buffer,
    2774             :                                     coeff_buffer_sb,
    2775             :                                     residual_buffer,
    2776             :                                     transform_buffer,
    2777             :                                     inverse_quant_buffer,
    2778             :                                     transform_inner_array_ptr,
    2779             :                                     count_non_zero_coeffs,
    2780           0 :                                     blk_geom->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK,
    2781           0 :                                     cu_ptr->delta_qp > 0 ? 0 : dZoffset,
    2782           0 :                                     eobs[context_ptr->txb_itr],
    2783             :                                     cuPlane);
    2784             : 
    2785           0 :                                 if(allow_update_cdf)
    2786             :                                 {
    2787           0 :                                     ModeDecisionCandidateBuffer         **candidate_buffer_ptr_array_base = context_ptr->md_context->candidate_buffer_ptr_array;
    2788           0 :                                     ModeDecisionCandidateBuffer         **candidate_buffer_ptr_array = &(candidate_buffer_ptr_array_base[0]);
    2789             :                                     ModeDecisionCandidateBuffer          *candidate_buffer;
    2790             : 
    2791             :                                     // Set the Candidate Buffer
    2792           0 :                                     candidate_buffer = candidate_buffer_ptr_array[0];
    2793             :                                     // Rate estimation function uses the values from CandidatePtr. The right values are copied from cu_ptr to CandidatePtr
    2794           0 :                                     candidate_buffer->candidate_ptr->type = cu_ptr->prediction_mode_flag;
    2795           0 :                                     candidate_buffer->candidate_ptr->pred_mode = cu_ptr->pred_mode;
    2796             : 
    2797             : #if FILTER_INTRA_FLAG
    2798           0 :                                     candidate_buffer->candidate_ptr->filter_intra_mode = cu_ptr->filter_intra_mode;
    2799             : #endif
    2800           0 :                                     const uint32_t coeff1dOffset = context_ptr->coded_area_sb;
    2801             : 
    2802           0 :                                     av1_tu_estimate_coeff_bits(
    2803           0 :                                         context_ptr->md_context,
    2804             :                                         1,//allow_update_cdf,
    2805           0 :                                         &picture_control_set_ptr->ec_ctx_array[tbAddr],
    2806             :                                         picture_control_set_ptr,
    2807             :                                         candidate_buffer,
    2808             :                                         coeff1dOffset,
    2809           0 :                                         context_ptr->coded_area_sb_uv,
    2810             :                                         coeff_est_entropy_coder_ptr,
    2811             :                                         coeff_buffer_sb,
    2812           0 :                                         eobs[context_ptr->txb_itr][0],
    2813           0 :                                         eobs[context_ptr->txb_itr][1],
    2814           0 :                                         eobs[context_ptr->txb_itr][2],
    2815             :                                         &y_tu_coeff_bits,
    2816             :                                         &cb_tu_coeff_bits,
    2817             :                                         &cr_tu_coeff_bits,
    2818           0 :                                         context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    2819           0 :                                         context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    2820           0 :                                         cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_Y],
    2821           0 :                                         cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_UV],
    2822           0 :                                         context_ptr->blk_geom->has_uv ? COMPONENT_ALL : COMPONENT_LUMA);
    2823             :                                 }
    2824             :                                 //intra mode
    2825           0 :                                 Av1EncodeGenerateReconFunctionPtr[is16bit](
    2826             :                                     context_ptr,
    2827           0 :                                     context_ptr->cu_origin_x,
    2828           0 :                                     context_ptr->cu_origin_y,
    2829             :                                     recon_buffer,
    2830             :                                     inverse_quant_buffer,
    2831             :                                     transform_inner_array_ptr,
    2832           0 :                                     blk_geom->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK,
    2833           0 :                                     eobs[context_ptr->txb_itr]);
    2834             :                             }
    2835             : 
    2836             :                             // Update the Intra-specific Neighbor Arrays
    2837           0 :                             EncodePassUpdateIntraModeNeighborArrays(
    2838             :                                 ep_mode_type_neighbor_array,
    2839             :                                 ep_intra_luma_mode_neighbor_array,
    2840             :                                 ep_intra_chroma_mode_neighbor_array,
    2841           0 :                                 (uint8_t)cu_ptr->pred_mode,
    2842           0 :                                 (uint8_t)pu_ptr->intra_chroma_mode,
    2843           0 :                                 context_ptr->cu_origin_x,
    2844           0 :                                 context_ptr->cu_origin_y,
    2845           0 :                                 context_ptr->blk_geom->bwidth,
    2846           0 :                                 context_ptr->blk_geom->bheight,
    2847           0 :                                 context_ptr->blk_geom->bwidth_uv,
    2848           0 :                                 context_ptr->blk_geom->bheight_uv,
    2849           0 :                                 blk_geom->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK);
    2850             : 
    2851             :                             // Update Recon Samples-INTRA-
    2852           0 :                             EncodePassUpdateReconSampleNeighborArrays(
    2853             :                                 ep_luma_recon_neighbor_array,
    2854             :                                 ep_cb_recon_neighbor_array,
    2855             :                                 ep_cr_recon_neighbor_array,
    2856             :                                 recon_buffer,
    2857           0 :                                 context_ptr->cu_origin_x,
    2858           0 :                                 context_ptr->cu_origin_y,
    2859           0 :                                 context_ptr->blk_geom->bwidth,
    2860           0 :                                 context_ptr->blk_geom->bheight,
    2861           0 :                                 context_ptr->blk_geom->bwidth_uv,
    2862           0 :                                 context_ptr->blk_geom->bheight_uv,
    2863           0 :                                 blk_geom->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK,
    2864             :                                 is16bit);
    2865             : 
    2866             : 
    2867             :                             // Update the luma Dc Sign Level Coeff Neighbor Array
    2868             :                             {
    2869           0 :                                 uint8_t dcSignLevelCoeff = (uint8_t)cu_ptr->quantized_dc[0][context_ptr->txb_itr];
    2870           0 :                                 neighbor_array_unit_mode_write(
    2871             :                                     picture_control_set_ptr->ep_luma_dc_sign_level_coeff_neighbor_array,
    2872             :                                     (uint8_t*)&dcSignLevelCoeff,
    2873           0 :                                     context_ptr->cu_origin_x,
    2874           0 :                                     context_ptr->cu_origin_y,
    2875           0 :                                     context_ptr->blk_geom->bwidth,
    2876           0 :                                     context_ptr->blk_geom->bheight,
    2877             :                                     NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
    2878             :                             }
    2879             : 
    2880             :                             // Update the cb Dc Sign Level Coeff Neighbor Array
    2881           0 :                             if (context_ptr->blk_geom->has_uv) {
    2882           0 :                                 uint8_t dcSignLevelCoeff = (uint8_t)cu_ptr->quantized_dc[1][context_ptr->txb_itr];
    2883           0 :                                 neighbor_array_unit_mode_write(
    2884             :                                     picture_control_set_ptr->ep_cb_dc_sign_level_coeff_neighbor_array,
    2885             :                                     (uint8_t*)&dcSignLevelCoeff,
    2886           0 :                                     ROUND_UV(context_ptr->cu_origin_x) >> 1,
    2887           0 :                                     ROUND_UV(context_ptr->cu_origin_y) >> 1,
    2888           0 :                                     context_ptr->blk_geom->bwidth_uv,
    2889           0 :                                     context_ptr->blk_geom->bheight_uv,
    2890             :                                     NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
    2891             : 
    2892             :                             }
    2893             : 
    2894             :                             // Update the cr DC Sign Level Coeff Neighbor Array
    2895           0 :                             if (context_ptr->blk_geom->has_uv) {
    2896           0 :                                 uint8_t dcSignLevelCoeff = (uint8_t)cu_ptr->quantized_dc[2][context_ptr->txb_itr];
    2897           0 :                                 neighbor_array_unit_mode_write(
    2898             :                                     picture_control_set_ptr->ep_cr_dc_sign_level_coeff_neighbor_array,
    2899             :                                     (uint8_t*)&dcSignLevelCoeff,
    2900           0 :                                     ROUND_UV(context_ptr->cu_origin_x) >> 1,
    2901           0 :                                     ROUND_UV(context_ptr->cu_origin_y) >> 1,
    2902           0 :                                     context_ptr->blk_geom->bwidth_uv,
    2903           0 :                                     context_ptr->blk_geom->bheight_uv,
    2904             :                                     NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
    2905             :                             }
    2906             : 
    2907           0 :                             if (context_ptr->blk_geom->has_uv) {
    2908           0 :                                 cu_ptr->block_has_coeff = cu_ptr->block_has_coeff |
    2909           0 :                                     cu_ptr->transform_unit_array[context_ptr->txb_itr].y_has_coeff |
    2910           0 :                                     cu_ptr->transform_unit_array[context_ptr->txb_itr].u_has_coeff |
    2911           0 :                                     cu_ptr->transform_unit_array[context_ptr->txb_itr].v_has_coeff;
    2912             : 
    2913           0 :                                 if (cu_ptr->transform_unit_array[context_ptr->txb_itr].u_has_coeff)
    2914           0 :                                     cu_ptr->transform_unit_array[0].u_has_coeff = EB_TRUE;
    2915           0 :                                 if (cu_ptr->transform_unit_array[context_ptr->txb_itr].v_has_coeff)
    2916           0 :                                     cu_ptr->transform_unit_array[0].v_has_coeff = EB_TRUE;
    2917             :                             }
    2918             :                             else {
    2919           0 :                                 cu_ptr->block_has_coeff = cu_ptr->block_has_coeff |
    2920           0 :                                     cu_ptr->transform_unit_array[context_ptr->txb_itr].y_has_coeff;
    2921             :                             }
    2922             :                         } // Transform Loop
    2923           0 :                     context_ptr->coded_area_sb += blk_geom->bwidth * blk_geom->bheight;
    2924           0 :                     if (blk_geom->has_uv)
    2925           0 :                         context_ptr->coded_area_sb_uv += blk_geom->bwidth_uv * blk_geom->bheight_uv;
    2926             :                     }
    2927             :                 }
    2928             : 
    2929             :                 // Inter
    2930       27423 :                 else if (cu_ptr->prediction_mode_flag == INTER_MODE) {
    2931       27422 :                     context_ptr->is_inter = 1;
    2932       27422 :                     int8_t ref_idx_l0 = (&cu_ptr->prediction_unit_array[0])->ref_frame_index_l0;
    2933       27422 :                     int8_t ref_idx_l1 = (&cu_ptr->prediction_unit_array[0])->ref_frame_index_l1;
    2934             :                     MvReferenceFrame rf[2];
    2935       27422 :                     av1_set_ref_frame(rf, (&cu_ptr->prediction_unit_array[0])->ref_frame_type);
    2936             :                     uint8_t list_idx0, list_idx1;
    2937       27423 :                     list_idx0 = get_list_idx(rf[0]);
    2938       27422 :                     if (rf[1] == NONE_FRAME)
    2939       12303 :                         list_idx1 = get_list_idx(rf[0]);
    2940             :                     else
    2941       15119 :                         list_idx1 = get_list_idx(rf[1]);
    2942       27420 :                     EbReferenceObject* refObj0 = ref_idx_l0 >= 0 ? (EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[list_idx0][ref_idx_l0]->object_ptr : (EbReferenceObject*)EB_NULL;
    2943       27420 :                     EbReferenceObject* refObj1 = ref_idx_l1 >= 0 ? (EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[list_idx1][ref_idx_l1]->object_ptr : (EbReferenceObject*)EB_NULL;
    2944             :                     uint16_t  txb_origin_x;
    2945             :                     uint16_t  txb_origin_y;
    2946       27420 :                     EbBool isCuSkip = EB_FALSE;
    2947             : 
    2948             :                     //********************************
    2949             :                     //        INTER
    2950             :                     //********************************
    2951             : 
    2952       27420 :                     EbBool  zeroLumaCbfMD = EB_FALSE;
    2953             :                     //EbBool doLumaMC = EB_TRUE;
    2954       27420 :                     EbBool doMVpred = EB_TRUE;
    2955             :                     //if QP M and Segments are used, First Cu in SB row should have at least one coeff.
    2956       27420 :                     EbBool isFirstCUinRow = EB_FALSE;
    2957       27420 :                     zeroLumaCbfMD = (EbBool)(checkZeroLumaCbf && ((&cu_ptr->prediction_unit_array[0])->merge_flag == EB_FALSE && cu_ptr->block_has_coeff == 0 && isFirstCUinRow == EB_FALSE));
    2958       27420 :                     zeroLumaCbfMD = EB_FALSE;
    2959             : 
    2960             :                     //Motion Compensation could be avoided in the case below
    2961       27420 :                     EbBool doMC = EB_TRUE;
    2962             : 
    2963             :                     // Perform Merge/Skip Decision if the mode coming from MD is merge. for the First CU in Row merge will remain as is.
    2964       27420 :                     if (cu_ptr->prediction_unit_array[0].merge_flag == EB_TRUE)
    2965             :                     {
    2966        8273 :                         if (isFirstCUinRow == EB_FALSE)
    2967        8274 :                             isCuSkip = mdcontextPtr->md_ep_pipe_sb[cu_ptr->mds_idx].skip_cost <= mdcontextPtr->md_ep_pipe_sb[cu_ptr->mds_idx].merge_cost ? 1 : 0;
    2968             :                     }
    2969             : 
    2970             :                     //MC could be avoided in some cases below
    2971       27420 :                     if (isFirstCUinRow == EB_FALSE) {
    2972       27419 :                         if (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_FALSE && constrained_intra_flag == EB_TRUE &&
    2973           0 :                             cu_ptr->prediction_unit_array[0].merge_flag == EB_TRUE)
    2974             :                         {
    2975           0 :                             if (isCuSkip)
    2976             :                             {
    2977             :                                 //here merge is decided to be skip in nonRef frame.
    2978           0 :                                 doMC = EB_FALSE;
    2979           0 :                                 doMVpred = EB_FALSE;
    2980             :                             }
    2981             :                         }
    2982       27419 :                         else if (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_FALSE && constrained_intra_flag == EB_TRUE &&
    2983             :                             zeroLumaCbfMD == EB_TRUE)
    2984             :                         {
    2985             :                             //MV mode with no Coeff  in nonRef frame.
    2986           0 :                             doMC = EB_FALSE;
    2987             :                         }
    2988             : 
    2989       27419 :                         else if (picture_control_set_ptr->limit_intra && isIntraLCU == EB_FALSE)
    2990             :                         {
    2991           0 :                             if (isCuSkip)
    2992             :                             {
    2993           0 :                                 doMC = EB_FALSE;
    2994           0 :                                 doMVpred = EB_FALSE;
    2995             :                             }
    2996             :                         }
    2997             :                     }
    2998             : 
    2999       27420 :                     doMC = (EbBool)(doRecon | doMC);
    3000             : 
    3001       27420 :                     doMVpred = (EbBool)(doRecon | doMVpred);
    3002             : 
    3003             :                     //IntMv  predmv[2];
    3004       27420 :                     enc_pass_av1_mv_pred(
    3005             :                         &sb_ptr->tile_info,
    3006       27420 :                          context_ptr->md_context,
    3007             :                         cu_ptr,
    3008             :                         blk_geom,
    3009       27420 :                         context_ptr->cu_origin_x,
    3010       27420 :                         context_ptr->cu_origin_y,
    3011             :                         picture_control_set_ptr,
    3012       27420 :                         cu_ptr->prediction_unit_array[0].ref_frame_type,
    3013       27420 :                         cu_ptr->prediction_unit_array[0].is_compound,
    3014       27420 :                         cu_ptr->pred_mode,
    3015       27420 :                         cu_ptr->predmv);
    3016             :                     //out1:  predmv
    3017             :                     //out2:   cu_ptr->inter_mode_ctx[ cu_ptr->prediction_unit_array[0].ref_frame_type ]
    3018             : 
    3019             :                     //keep final usefull mvp for entropy
    3020       27425 :                     memcpy(cu_ptr->av1xd->final_ref_mv_stack,
    3021       27425 :                        context_ptr->md_context->md_local_cu_unit[context_ptr->blk_geom->blkidx_mds].ed_ref_mv_stack[cu_ptr->prediction_unit_array[0].ref_frame_type],
    3022             :                         sizeof(CandidateMv)*MAX_REF_MV_STACK_SIZE);
    3023             : 
    3024             :                     {
    3025             :                         // 1st Partition Loop
    3026       27425 :                         pu_ptr = cu_ptr->prediction_unit_array;
    3027             : 
    3028             :                         // Set MvUnit
    3029       27425 :                         context_ptr->mv_unit.pred_direction = (uint8_t)pu_ptr->inter_pred_direction_index;
    3030       27425 :                         context_ptr->mv_unit.mv[REF_LIST_0].mv_union = pu_ptr->mv[REF_LIST_0].mv_union;
    3031       27425 :                         context_ptr->mv_unit.mv[REF_LIST_1].mv_union = pu_ptr->mv[REF_LIST_1].mv_union;
    3032             : 
    3033             :                         // Inter Prediction
    3034       27425 :                         if (doMC &&
    3035       27421 :                             pu_ptr->motion_mode == WARPED_CAUSAL)
    3036             :                         {
    3037        4019 :                             warped_motion_prediction(
    3038             :                                 picture_control_set_ptr,
    3039             :                                 &context_ptr->mv_unit,
    3040        4019 :                                 cu_ptr->prediction_unit_array[0].ref_frame_type,
    3041        4019 :                                 cu_ptr->prediction_unit_array[0].compound_idx,
    3042             :                                 &cu_ptr->prediction_unit_array[0].interinter_comp,
    3043        4019 :                                 context_ptr->cu_origin_x,
    3044        4019 :                                 context_ptr->cu_origin_y,
    3045             :                                 cu_ptr,
    3046             :                                 blk_geom,
    3047             :                                 is16bit ? refObj0->reference_picture16bit : refObj0->reference_picture,
    3048        2460 :                                 ref_idx_l1 >= 0 ? is16bit ? refObj1->reference_picture16bit : refObj1->reference_picture : NULL,
    3049             :                                 recon_buffer,
    3050        4019 :                                 context_ptr->cu_origin_x,
    3051        4019 :                                 context_ptr->cu_origin_y,
    3052             :                                 &cu_ptr->prediction_unit_array[0].wm_params_l0,
    3053             :                                 &cu_ptr->prediction_unit_array[0].wm_params_l1,
    3054        4019 :                                 (uint8_t) sequence_control_set_ptr->static_config.encoder_bit_depth,
    3055             :                                 EB_TRUE);
    3056             :                         }
    3057             : 
    3058       27425 :                         if (doMC &&
    3059       27421 :                             pu_ptr->motion_mode != WARPED_CAUSAL)
    3060             :                         {
    3061             :                             EbPictureBufferDesc             *ref_pic_list0;
    3062             :                             EbPictureBufferDesc             *ref_pic_list1;
    3063             : 
    3064       23403 :                             if (!is16bit) {
    3065       23403 :                                 ref_pic_list0 = cu_ptr->prediction_unit_array->ref_frame_index_l0 >= 0 ? refObj0->reference_picture : (EbPictureBufferDesc*)EB_NULL;
    3066       23403 :                                 ref_pic_list1 = cu_ptr->prediction_unit_array->ref_frame_index_l1 >= 0 ? refObj1->reference_picture : (EbPictureBufferDesc*)EB_NULL;
    3067             :                             }
    3068             :                             else {
    3069           0 :                                 ref_pic_list0  = cu_ptr->prediction_unit_array->ref_frame_index_l0 >= 0 ? refObj0->reference_picture16bit : (EbPictureBufferDesc*)EB_NULL;
    3070           0 :                                 ref_pic_list1  = cu_ptr->prediction_unit_array->ref_frame_index_l1 >= 0 ? refObj1->reference_picture16bit : (EbPictureBufferDesc*)EB_NULL;
    3071             :                             }
    3072             : 
    3073       23403 :                             av1_inter_prediction_function_table[is16bit](
    3074             :                                 picture_control_set_ptr,
    3075             :                                 cu_ptr->interp_filters,
    3076             :                                 cu_ptr,
    3077       23403 :                                 cu_ptr->prediction_unit_array->ref_frame_type,
    3078             :                                 &context_ptr->mv_unit,
    3079             :                                 0,//use_intrabc,
    3080             : #if OBMC_FLAG
    3081       23403 :                                 cu_ptr->prediction_unit_array->motion_mode,
    3082             :                                 0,//use_precomputed_obmc,
    3083             :                                 0,
    3084             : #endif
    3085             : #if INTER_INTER_HBD
    3086       23403 :                                 cu_ptr->compound_idx,
    3087             :                                 &cu_ptr->interinter_comp,
    3088             : #endif
    3089             : #if INTER_INTRA_HBD
    3090             :                                 &sb_ptr->tile_info,
    3091             :                                 ep_luma_recon_neighbor_array,
    3092             :                                 ep_cb_recon_neighbor_array ,
    3093             :                                 ep_cr_recon_neighbor_array ,
    3094       23403 :                                 cu_ptr->is_interintra_used,
    3095       23403 :                                 cu_ptr->interintra_mode,
    3096       23403 :                                 cu_ptr->use_wedge_interintra,
    3097             :                                 cu_ptr->interintra_wedge_index,
    3098             : 
    3099             : #endif
    3100       23403 :                                 context_ptr->cu_origin_x,
    3101       23403 :                                 context_ptr->cu_origin_y,
    3102       23403 :                                 blk_geom->bwidth,
    3103       23403 :                                 blk_geom->bheight,
    3104             :                                 ref_pic_list0,
    3105             :                                 ref_pic_list1,
    3106             :                                 recon_buffer,
    3107       23403 :                                 context_ptr->cu_origin_x,
    3108       23403 :                                 context_ptr->cu_origin_y,
    3109             :                                 EB_TRUE,
    3110       23403 :                                 (uint8_t)sequence_control_set_ptr->static_config.encoder_bit_depth);
    3111             : 
    3112             :                         }
    3113             :                     }
    3114             : 
    3115       27425 :                     context_ptr->txb_itr = 0;
    3116             :                     // Transform Loop
    3117       27425 :                     cu_ptr->transform_unit_array[0].y_has_coeff = EB_FALSE;
    3118       27425 :                     cu_ptr->transform_unit_array[0].u_has_coeff = EB_FALSE;
    3119       27425 :                     cu_ptr->transform_unit_array[0].v_has_coeff = EB_FALSE;
    3120             : 
    3121             :                     // initialize TU Split
    3122       27425 :                     y_full_distortion[DIST_CALC_RESIDUAL] = 0;
    3123       27425 :                     y_full_distortion[DIST_CALC_PREDICTION] = 0;
    3124             : 
    3125       27425 :                     y_coeff_bits = 0;
    3126       27425 :                     cb_coeff_bits = 0;
    3127       27425 :                     cr_coeff_bits = 0;
    3128             : 
    3129       27425 :                     uint32_t totTu = context_ptr->blk_geom->txb_count[cu_ptr->tx_depth];
    3130             :                     uint8_t   tuIt;
    3131       27425 :                     uint16_t   cb_qp = cu_ptr->qp;
    3132       27425 :                     uint32_t  component_mask = context_ptr->blk_geom->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK;
    3133             : 
    3134       27425 :                     if (cu_ptr->prediction_unit_array[0].merge_flag == EB_FALSE) {
    3135       40089 :                         for (uint8_t tuIt = 0; tuIt < totTu; tuIt++) {
    3136       20940 :                             context_ptr->txb_itr = tuIt;
    3137       20940 :                             uint8_t uv_pass = cu_ptr->tx_depth && tuIt ? 0 : 1; //NM: 128x128 exeption
    3138       20940 :                             txb_origin_x = context_ptr->cu_origin_x + context_ptr->blk_geom->tx_boff_x[cu_ptr->tx_depth][tuIt];
    3139       20940 :                             txb_origin_y = context_ptr->cu_origin_y + context_ptr->blk_geom->tx_boff_y[cu_ptr->tx_depth][tuIt];
    3140             : 
    3141       20940 :                             context_ptr->md_context->luma_txb_skip_context = 0;
    3142       20940 :                             context_ptr->md_context->luma_dc_sign_context = 0;
    3143       20940 :                             get_txb_ctx(
    3144       20940 :                                 picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr,
    3145             :                                 COMPONENT_LUMA,
    3146             :                                 picture_control_set_ptr->ep_luma_dc_sign_level_coeff_neighbor_array,
    3147             :                                 txb_origin_x,
    3148             :                                 txb_origin_y,
    3149       20940 :                                 context_ptr->blk_geom->bsize,
    3150       20940 :                                 context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    3151       20940 :                                 &context_ptr->md_context->luma_txb_skip_context,
    3152       20940 :                                 &context_ptr->md_context->luma_dc_sign_context);
    3153             : 
    3154       20941 :                             if (context_ptr->blk_geom->has_uv && uv_pass) {
    3155       18878 :                                 context_ptr->md_context->cb_txb_skip_context = 0;
    3156       18878 :                                 context_ptr->md_context->cb_dc_sign_context = 0;
    3157       18878 :                                 get_txb_ctx(
    3158       18878 :                                     picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr,
    3159             :                                     COMPONENT_CHROMA,
    3160             :                                     picture_control_set_ptr->ep_cb_dc_sign_level_coeff_neighbor_array,
    3161       18878 :                                     ROUND_UV(txb_origin_x) >> 1,
    3162       18878 :                                     ROUND_UV(txb_origin_y) >> 1,
    3163       18878 :                                     context_ptr->blk_geom->bsize_uv,
    3164       18878 :                                     context_ptr->blk_geom->txsize_uv[context_ptr->cu_ptr->tx_depth][context_ptr->txb_itr],
    3165       18878 :                                     &context_ptr->md_context->cb_txb_skip_context,
    3166       18878 :                                     &context_ptr->md_context->cb_dc_sign_context);
    3167             : 
    3168       18876 :                                 context_ptr->md_context->cr_txb_skip_context = 0;
    3169       18876 :                                 context_ptr->md_context->cr_dc_sign_context = 0;
    3170       18876 :                                 get_txb_ctx(
    3171       18876 :                                     picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr,
    3172             :                                     COMPONENT_CHROMA,
    3173             :                                     picture_control_set_ptr->ep_cr_dc_sign_level_coeff_neighbor_array,
    3174       18876 :                                     ROUND_UV(txb_origin_x) >> 1,
    3175       18876 :                                     ROUND_UV(txb_origin_y) >> 1,
    3176       18876 :                                     context_ptr->blk_geom->bsize_uv,
    3177       18876 :                                     context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    3178       18876 :                                     &context_ptr->md_context->cr_txb_skip_context,
    3179       18876 :                                     &context_ptr->md_context->cr_dc_sign_context);
    3180             :                             }
    3181       20940 :                             if (!zeroLumaCbfMD)
    3182             :                                 //inter mode  1
    3183       41880 :                                 Av1EncodeLoopFunctionTable[is16bit](
    3184             :                                     picture_control_set_ptr,
    3185             :                                     context_ptr,
    3186             :                                     sb_ptr,
    3187             :                                     txb_origin_x,   //pic org
    3188             :                                     txb_origin_y,
    3189             :                                     cb_qp,
    3190             :                                     recon_buffer,
    3191             :                                     coeff_buffer_sb,
    3192             :                                     residual_buffer,
    3193             :                                     transform_buffer,
    3194             :                                     inverse_quant_buffer,
    3195             :                                     transform_inner_array_ptr,
    3196             :                                     count_non_zero_coeffs,
    3197       20940 :                                     context_ptr->blk_geom->has_uv && uv_pass ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK,
    3198       20940 :                                     cu_ptr->delta_qp > 0 ? 0 : dZoffset,
    3199       20940 :                                     eobs[context_ptr->txb_itr],
    3200             :                                     cuPlane);
    3201             : 
    3202             :                             // SKIP the CBF zero mode for DC path. There are problems with cost calculations
    3203             :                             {
    3204             :                                 // Compute Tu distortion
    3205       20940 :                                 if (!zeroLumaCbfMD)
    3206             : 
    3207             :                                     // LUMA DISTORTION
    3208       20940 :                                     picture_full_distortion32_bits(
    3209             :                                         transform_buffer,
    3210       20940 :                                         context_ptr->coded_area_sb,
    3211             :                                         0,
    3212             :                                         inverse_quant_buffer,
    3213       20940 :                                         context_ptr->coded_area_sb,
    3214             :                                         0,
    3215       20940 :                                         blk_geom->tx_width[cu_ptr->tx_depth][tuIt],
    3216       20940 :                                         blk_geom->tx_height[cu_ptr->tx_depth][tuIt],
    3217       20940 :                                         context_ptr->blk_geom->bwidth_uv,
    3218       20940 :                                         context_ptr->blk_geom->bheight_uv,
    3219             :                                         yTuFullDistortion,
    3220             :                                         yTuFullDistortion,
    3221             :                                         yTuFullDistortion,
    3222       20940 :                                         eobs[context_ptr->txb_itr][0],
    3223             :                                         0,
    3224             :                                         0,
    3225             :                                         COMPONENT_LUMA);
    3226       20941 :                                 TxSize  txSize = blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr];
    3227       20941 :                                 int32_t shift = (MAX_TX_SCALE - av1_get_tx_scale(txSize)) * 2;
    3228       20941 :                                 yTuFullDistortion[DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(yTuFullDistortion[DIST_CALC_RESIDUAL], shift);
    3229       20941 :                                 yTuFullDistortion[DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(yTuFullDistortion[DIST_CALC_PREDICTION], shift);
    3230             : 
    3231       20941 :                                 y_tu_coeff_bits = 0;
    3232       20941 :                                 cb_tu_coeff_bits = 0;
    3233       20941 :                                 cr_tu_coeff_bits = 0;
    3234             : 
    3235       20941 :                                 if (!zeroLumaCbfMD) {
    3236       20941 :                                     ModeDecisionCandidateBuffer         **candidate_buffer_ptr_array_base = context_ptr->md_context->candidate_buffer_ptr_array;
    3237       20941 :                                     ModeDecisionCandidateBuffer         **candidate_buffer_ptr_array = &(candidate_buffer_ptr_array_base[0]);
    3238             :                                     ModeDecisionCandidateBuffer          *candidate_buffer;
    3239             : 
    3240             :                                     // Set the Candidate Buffer
    3241       20941 :                                     candidate_buffer = candidate_buffer_ptr_array[0];
    3242             :                                     // Rate estimation function uses the values from CandidatePtr. The right values are copied from cu_ptr to CandidatePtr
    3243       20941 :                                     candidate_buffer->candidate_ptr->type = cu_ptr->prediction_mode_flag;
    3244             : 
    3245       20941 :                                     const uint32_t coeff1dOffset = context_ptr->coded_area_sb;
    3246             : 
    3247       20941 :                                     av1_tu_estimate_coeff_bits(
    3248       20941 :                                         context_ptr->md_context,
    3249             :                                         0,//allow_update_cdf,
    3250             :                                         NULL,
    3251             :                                         picture_control_set_ptr,
    3252             :                                         candidate_buffer,
    3253             :                                         coeff1dOffset,
    3254       20941 :                                         context_ptr->coded_area_sb_uv,
    3255             :                                         coeff_est_entropy_coder_ptr,
    3256             :                                         coeff_buffer_sb,
    3257       20941 :                                         eobs[context_ptr->txb_itr][0],
    3258       20941 :                                         eobs[context_ptr->txb_itr][1],
    3259       20941 :                                         eobs[context_ptr->txb_itr][2],
    3260             :                                         &y_tu_coeff_bits,
    3261             :                                         &cb_tu_coeff_bits,
    3262             :                                         &cr_tu_coeff_bits,
    3263       20941 :                                         context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    3264       20941 :                                         context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    3265       20941 :                                         cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_Y],
    3266       20941 :                                         cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_UV],
    3267       20941 :                                         context_ptr->blk_geom->has_uv && uv_pass ? COMPONENT_ALL : COMPONENT_LUMA);
    3268             :                                 }
    3269             : 
    3270             :                                 // CBF Tu decision
    3271       20940 :                                 if (zeroLumaCbfMD == EB_FALSE)
    3272             : 
    3273       20940 :                                     av1_encode_tu_calc_cost(
    3274             :                                         context_ptr,
    3275             :                                         count_non_zero_coeffs,
    3276             :                                         yTuFullDistortion,
    3277             :                                         &y_tu_coeff_bits,
    3278             :                                         component_mask);
    3279             : 
    3280             :                                 else {
    3281           0 :                                     cu_ptr->transform_unit_array[context_ptr->txb_itr].y_has_coeff = 0;
    3282           0 :                                     cu_ptr->transform_unit_array[context_ptr->txb_itr].u_has_coeff = 0;
    3283           0 :                                     cu_ptr->transform_unit_array[context_ptr->txb_itr].v_has_coeff = 0;
    3284             :                                 }
    3285             :                                 // Update count_non_zero_coeffs after CBF decision
    3286       20940 :                                 if (cu_ptr->transform_unit_array[context_ptr->txb_itr].y_has_coeff == EB_FALSE)
    3287       14036 :                                     count_non_zero_coeffs[0] = 0;
    3288       20940 :                                 if (context_ptr->blk_geom->has_uv && uv_pass) {
    3289       18877 :                                     if (cu_ptr->transform_unit_array[context_ptr->txb_itr].u_has_coeff == EB_FALSE)
    3290       17642 :                                         count_non_zero_coeffs[1] = 0;
    3291       18877 :                                     if (cu_ptr->transform_unit_array[context_ptr->txb_itr].v_has_coeff == EB_FALSE)
    3292       18391 :                                         count_non_zero_coeffs[2] = 0;
    3293             :                                 }
    3294             : 
    3295             :                                 // Update TU count_non_zero_coeffs
    3296       20940 :                                 cu_ptr->transform_unit_array[context_ptr->txb_itr].nz_coef_count[0] = (uint16_t)count_non_zero_coeffs[0];
    3297       20940 :                                 cu_ptr->transform_unit_array[context_ptr->txb_itr].nz_coef_count[1] = (uint16_t)count_non_zero_coeffs[1];
    3298       20940 :                                 cu_ptr->transform_unit_array[context_ptr->txb_itr].nz_coef_count[2] = (uint16_t)count_non_zero_coeffs[2];
    3299             : 
    3300       20940 :                                 y_coeff_bits += y_tu_coeff_bits;
    3301       20940 :                                 if (context_ptr->blk_geom->has_uv && uv_pass) {
    3302       18877 :                                     cb_coeff_bits += cb_tu_coeff_bits;
    3303       18877 :                                     cr_coeff_bits += cr_tu_coeff_bits;
    3304             :                                 }
    3305             : 
    3306       20940 :                                 y_full_distortion[DIST_CALC_RESIDUAL] += yTuFullDistortion[DIST_CALC_RESIDUAL];
    3307       20940 :                                 y_full_distortion[DIST_CALC_PREDICTION] += yTuFullDistortion[DIST_CALC_PREDICTION];
    3308             : 
    3309       20940 :                                 if (allow_update_cdf) {
    3310       10869 :                                     ModeDecisionCandidateBuffer         **candidate_buffer_ptr_array_base = context_ptr->md_context->candidate_buffer_ptr_array;
    3311       10869 :                                     ModeDecisionCandidateBuffer         **candidate_buffer_ptr_array = &(candidate_buffer_ptr_array_base[0]);
    3312             :                                     ModeDecisionCandidateBuffer          *candidate_buffer;
    3313             : 
    3314             :                                     // Set the Candidate Buffer
    3315       10869 :                                     candidate_buffer = candidate_buffer_ptr_array[0];
    3316             :                                     // Rate estimation function uses the values from CandidatePtr. The right values are copied from cu_ptr to CandidatePtr
    3317       10869 :                                     candidate_buffer->candidate_ptr->type = cu_ptr->prediction_mode_flag;
    3318       10869 :                                     candidate_buffer->candidate_ptr->pred_mode = cu_ptr->pred_mode;
    3319             : #if FILTER_INTRA_FLAG
    3320       10869 :                                     candidate_buffer->candidate_ptr->filter_intra_mode = cu_ptr->filter_intra_mode;
    3321             : #endif
    3322       10869 :                                     const uint32_t coeff1dOffset = context_ptr->coded_area_sb;
    3323             : 
    3324             :                                     //CHKN add updating eobs[] after CBF decision
    3325       10869 :                                     if (cu_ptr->transform_unit_array[context_ptr->txb_itr].y_has_coeff == EB_FALSE)
    3326        8064 :                                         eobs[context_ptr->txb_itr][0] = 0;
    3327       10869 :                                     if (context_ptr->blk_geom->has_uv && uv_pass) {
    3328        8806 :                                         if (cu_ptr->transform_unit_array[context_ptr->txb_itr].u_has_coeff == EB_FALSE)
    3329        8365 :                                             eobs[context_ptr->txb_itr][1] = 0;
    3330        8806 :                                         if (cu_ptr->transform_unit_array[context_ptr->txb_itr].v_has_coeff == EB_FALSE)
    3331        8640 :                                             eobs[context_ptr->txb_itr][2] = 0;
    3332             :                                     }
    3333             : 
    3334       10869 :                                     av1_tu_estimate_coeff_bits(
    3335       10869 :                                         context_ptr->md_context,
    3336             :                                         1,//allow_update_cdf,
    3337       10869 :                                         &picture_control_set_ptr->ec_ctx_array[tbAddr],
    3338             :                                         picture_control_set_ptr,
    3339             :                                         candidate_buffer,
    3340             :                                         coeff1dOffset,
    3341       10869 :                                         context_ptr->coded_area_sb_uv,
    3342             :                                         coeff_est_entropy_coder_ptr,
    3343             :                                         coeff_buffer_sb,
    3344       10869 :                                         eobs[context_ptr->txb_itr][0],
    3345       10869 :                                         eobs[context_ptr->txb_itr][1],
    3346       10869 :                                         eobs[context_ptr->txb_itr][2],
    3347             :                                         &y_tu_coeff_bits,
    3348             :                                         &cb_tu_coeff_bits,
    3349             :                                         &cr_tu_coeff_bits,
    3350       10869 :                                         context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    3351       10869 :                                         context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    3352       10869 :                                         cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_Y],
    3353       10869 :                                         cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_UV],
    3354       10869 :                                         context_ptr->blk_geom->has_uv && uv_pass ? COMPONENT_ALL : COMPONENT_LUMA);
    3355             :                                 }
    3356             :                             }
    3357       20940 :                             context_ptr->coded_area_sb += blk_geom->tx_width[cu_ptr->tx_depth][tuIt] * blk_geom->tx_height[cu_ptr->tx_depth][tuIt];
    3358       20940 :                             if (context_ptr->blk_geom->has_uv && uv_pass)
    3359       18878 :                                 context_ptr->coded_area_sb_uv += blk_geom->tx_width_uv[cu_ptr->tx_depth][tuIt] * blk_geom->tx_height_uv[cu_ptr->tx_depth][tuIt];
    3360             : 
    3361             : 
    3362             :                             // Update the luma Dc Sign Level Coeff Neighbor Array
    3363             :                             {
    3364       20940 :                                 uint8_t dcSignLevelCoeff = (uint8_t)cu_ptr->quantized_dc[0][context_ptr->txb_itr];
    3365             : 
    3366       20940 :                                 neighbor_array_unit_mode_write(
    3367             :                                     picture_control_set_ptr->ep_luma_dc_sign_level_coeff_neighbor_array,
    3368             :                                     (uint8_t*)&dcSignLevelCoeff,
    3369             :                                     txb_origin_x,
    3370             :                                     txb_origin_y,
    3371       20940 :                                     context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr],
    3372       20940 :                                     context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr],
    3373             :                                     NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
    3374             :                             }
    3375             : 
    3376             : 
    3377             :                             // Update the cb Dc Sign Level Coeff Neighbor Array
    3378       20939 :                             if (context_ptr->blk_geom->has_uv && uv_pass)
    3379             :                             {
    3380       18876 :                                 uint8_t dcSignLevelCoeff = (uint8_t)cu_ptr->quantized_dc[1][context_ptr->txb_itr];
    3381       18876 :                                 neighbor_array_unit_mode_write(
    3382             :                                     picture_control_set_ptr->ep_cb_dc_sign_level_coeff_neighbor_array,
    3383             :                                     (uint8_t*)&dcSignLevelCoeff,
    3384       18876 :                                     ROUND_UV(txb_origin_x) >> 1,
    3385       18876 :                                     ROUND_UV(txb_origin_y) >> 1,
    3386       18876 :                                     context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    3387       18876 :                                     context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    3388             :                                     NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
    3389             :                             }
    3390             : 
    3391             :                             // Update the cr DC Sign Level Coeff Neighbor Array
    3392       20939 :                             if (context_ptr->blk_geom->has_uv && uv_pass)
    3393             :                             {
    3394       18876 :                                 uint8_t dcSignLevelCoeff = (uint8_t)cu_ptr->quantized_dc[2][context_ptr->txb_itr];
    3395       18876 :                                 neighbor_array_unit_mode_write(
    3396             :                                     picture_control_set_ptr->ep_cr_dc_sign_level_coeff_neighbor_array,
    3397             :                                     (uint8_t*)&dcSignLevelCoeff,
    3398       18876 :                                     ROUND_UV(txb_origin_x) >> 1,
    3399       18876 :                                     ROUND_UV(txb_origin_y) >> 1,
    3400       18876 :                                     context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    3401       18876 :                                     context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    3402             :                                     NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
    3403             :                             }
    3404             : 
    3405             :                         } // Transform Loop
    3406             :                     }
    3407             : 
    3408             :                     //Set Final CU data flags after skip/Merge decision.
    3409       27424 :                     if (isFirstCUinRow == EB_FALSE) {
    3410       27424 :                         if (cu_ptr->prediction_unit_array[0].merge_flag == EB_TRUE) {
    3411        8275 :                             cu_ptr->skip_flag = (isCuSkip) ? EB_TRUE : EB_FALSE;
    3412        8275 :                             cu_ptr->prediction_unit_array[0].merge_flag = (isCuSkip) ? EB_FALSE : EB_TRUE;
    3413             :                         }
    3414             :                     }
    3415             : 
    3416             :                     // Initialize the Transform Loop
    3417             : 
    3418       27424 :                     context_ptr->txb_itr = 0;
    3419       27424 :                     y_has_coeff = 0;
    3420       27424 :                     u_has_coeff = 0;
    3421       27424 :                     v_has_coeff = 0;
    3422       27424 :                     totTu = context_ptr->blk_geom->txb_count[cu_ptr->tx_depth];
    3423             : 
    3424             :                     //reset coeff buffer offsets at the start of a new Tx loop
    3425       27424 :                     context_ptr->coded_area_sb = coded_area_org;
    3426       27424 :                     context_ptr->coded_area_sb_uv = coded_area_org_uv;
    3427       56642 :                     for (tuIt = 0; tuIt < totTu; tuIt++)
    3428             :                     {
    3429       29214 :                         uint8_t uv_pass = cu_ptr->tx_depth && tuIt ? 0 : 1; //NM: 128x128 exeption
    3430       29214 :                         context_ptr->txb_itr = tuIt;
    3431       29214 :                         txb_origin_x = context_ptr->cu_origin_x + context_ptr->blk_geom->tx_boff_x[cu_ptr->tx_depth][tuIt];
    3432       29214 :                         txb_origin_y = context_ptr->cu_origin_y + context_ptr->blk_geom->tx_boff_y[cu_ptr->tx_depth][tuIt];
    3433             : 
    3434             : 
    3435             : 
    3436       29214 :                             context_ptr->md_context->luma_txb_skip_context = 0;
    3437       29214 :                             context_ptr->md_context->luma_dc_sign_context = 0;
    3438       29214 :                             get_txb_ctx(
    3439       29214 :                                 picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr,
    3440             :                                 COMPONENT_LUMA,
    3441             :                                 picture_control_set_ptr->ep_luma_dc_sign_level_coeff_neighbor_array,
    3442             :                                 txb_origin_x,
    3443             :                                 txb_origin_y,
    3444       29214 :                                 context_ptr->blk_geom->bsize,
    3445       29214 :                                 context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    3446       29214 :                                 &context_ptr->md_context->luma_txb_skip_context,
    3447       29214 :                                 &context_ptr->md_context->luma_dc_sign_context);
    3448             : 
    3449       29214 :                             if (context_ptr->blk_geom->has_uv && uv_pass) {
    3450             : 
    3451       27151 :                                 context_ptr->md_context->cb_txb_skip_context = 0;
    3452       27151 :                                 context_ptr->md_context->cb_dc_sign_context = 0;
    3453       27151 :                                 get_txb_ctx(
    3454       27151 :                                     picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr,
    3455             :                                     COMPONENT_CHROMA,
    3456             :                                     picture_control_set_ptr->ep_cb_dc_sign_level_coeff_neighbor_array,
    3457       27151 :                                     ROUND_UV(txb_origin_x) >> 1,
    3458       27151 :                                     ROUND_UV(txb_origin_y) >> 1,
    3459       27151 :                                     context_ptr->blk_geom->bsize_uv,
    3460       27151 :                                     context_ptr->blk_geom->txsize_uv[context_ptr->cu_ptr->tx_depth][context_ptr->txb_itr],
    3461       27151 :                                     &context_ptr->md_context->cb_txb_skip_context,
    3462       27151 :                                     &context_ptr->md_context->cb_dc_sign_context);
    3463             : 
    3464             : 
    3465       27149 :                                 context_ptr->md_context->cr_txb_skip_context = 0;
    3466       27149 :                                 context_ptr->md_context->cr_dc_sign_context = 0;
    3467       27149 :                                 get_txb_ctx(
    3468       27149 :                                     picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr,
    3469             :                                     COMPONENT_CHROMA,
    3470             :                                     picture_control_set_ptr->ep_cr_dc_sign_level_coeff_neighbor_array,
    3471       27149 :                                     ROUND_UV(txb_origin_x) >> 1,
    3472       27149 :                                     ROUND_UV(txb_origin_y) >> 1,
    3473       27149 :                                     context_ptr->blk_geom->bsize_uv,
    3474       27149 :                                     context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    3475       27149 :                                     &context_ptr->md_context->cr_txb_skip_context,
    3476       27149 :                                     &context_ptr->md_context->cr_dc_sign_context);
    3477             :                             }
    3478       29214 :                         if (cu_ptr->skip_flag == EB_TRUE) {
    3479        4167 :                             cu_ptr->transform_unit_array[context_ptr->txb_itr].y_has_coeff = EB_FALSE;
    3480        4167 :                             cu_ptr->transform_unit_array[context_ptr->txb_itr].u_has_coeff = EB_FALSE;
    3481        4167 :                             cu_ptr->transform_unit_array[context_ptr->txb_itr].v_has_coeff = EB_FALSE;
    3482             : 
    3483             : 
    3484        4167 :                             context_ptr->cu_ptr->quantized_dc[0][context_ptr->txb_itr] = 0;
    3485        4167 :                             context_ptr->cu_ptr->quantized_dc[1][context_ptr->txb_itr] = 0;
    3486        4167 :                             context_ptr->cu_ptr->quantized_dc[2][context_ptr->txb_itr] = 0;
    3487             :                         }
    3488       25047 :                         else if ((&cu_ptr->prediction_unit_array[0])->merge_flag == EB_TRUE) {
    3489             :                             //inter mode  2
    3490             : 
    3491        8218 :                             Av1EncodeLoopFunctionTable[is16bit](
    3492             :                                 picture_control_set_ptr,
    3493             :                                 context_ptr,
    3494             :                                 sb_ptr,
    3495             :                                 txb_origin_x, //pic offset
    3496             :                                 txb_origin_y,
    3497             :                                 cb_qp,
    3498             :                                 recon_buffer,
    3499             :                                 coeff_buffer_sb,
    3500             :                                 residual_buffer,
    3501             :                                 transform_buffer,
    3502             :                                 inverse_quant_buffer,
    3503             :                                 transform_inner_array_ptr,
    3504             :                                 count_non_zero_coeffs,
    3505        4109 :                                 context_ptr->blk_geom->has_uv && uv_pass ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK,
    3506        4109 :                                 cu_ptr->delta_qp > 0 ? 0 : dZoffset,
    3507        4109 :                                 eobs[context_ptr->txb_itr],
    3508             :                                 cuPlane);
    3509             : 
    3510        4111 :                             if (allow_update_cdf) {
    3511         130 :                                 ModeDecisionCandidateBuffer         **candidate_buffer_ptr_array_base = context_ptr->md_context->candidate_buffer_ptr_array;
    3512         130 :                                 ModeDecisionCandidateBuffer         **candidate_buffer_ptr_array = &(candidate_buffer_ptr_array_base[0]);
    3513             :                                 ModeDecisionCandidateBuffer          *candidate_buffer;
    3514             : 
    3515             :                                 // Set the Candidate Buffer
    3516         130 :                                 candidate_buffer = candidate_buffer_ptr_array[0];
    3517             :                                 // Rate estimation function uses the values from CandidatePtr. The right values are copied from cu_ptr to CandidatePtr
    3518         130 :                                 candidate_buffer->candidate_ptr->type = cu_ptr->prediction_mode_flag;
    3519         130 :                                 candidate_buffer->candidate_ptr->pred_mode = cu_ptr->pred_mode;
    3520             : #if FILTER_INTRA_FLAG
    3521         130 :                                 candidate_buffer->candidate_ptr->filter_intra_mode = cu_ptr->filter_intra_mode;
    3522             : #endif
    3523         130 :                                 const uint32_t coeff1dOffset = context_ptr->coded_area_sb;
    3524             : 
    3525         130 :                                 av1_tu_estimate_coeff_bits(
    3526         130 :                                     context_ptr->md_context,
    3527             :                                     1,//allow_update_cdf,
    3528         130 :                                     &picture_control_set_ptr->ec_ctx_array[tbAddr],
    3529             :                                     picture_control_set_ptr,
    3530             :                                     candidate_buffer,
    3531             :                                     coeff1dOffset,
    3532         130 :                                     context_ptr->coded_area_sb_uv,
    3533             :                                     coeff_est_entropy_coder_ptr,
    3534             :                                     coeff_buffer_sb,
    3535         130 :                                     eobs[context_ptr->txb_itr][0],
    3536         130 :                                     eobs[context_ptr->txb_itr][1],
    3537         130 :                                     eobs[context_ptr->txb_itr][2],
    3538             :                                     &y_tu_coeff_bits,
    3539             :                                     &cb_tu_coeff_bits,
    3540             :                                     &cr_tu_coeff_bits,
    3541         130 :                                     context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
    3542         130 :                                     context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    3543         130 :                                     cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_Y],
    3544         130 :                                     cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_UV],
    3545         130 :                                     context_ptr->blk_geom->has_uv && uv_pass ? COMPONENT_ALL : COMPONENT_LUMA);
    3546             :                             }
    3547             :                         }
    3548       29212 :                         if (context_ptr->blk_geom->has_uv && uv_pass) {
    3549       27149 :                             cu_ptr->block_has_coeff = cu_ptr->block_has_coeff |
    3550       27149 :                                 cu_ptr->transform_unit_array[context_ptr->txb_itr].y_has_coeff |
    3551       27149 :                                 cu_ptr->transform_unit_array[context_ptr->txb_itr].u_has_coeff |
    3552       27149 :                                 cu_ptr->transform_unit_array[context_ptr->txb_itr].v_has_coeff;
    3553             :                         }
    3554             :                         else {
    3555        2063 :                             cu_ptr->block_has_coeff = cu_ptr->block_has_coeff |
    3556        2063 :                                 cu_ptr->transform_unit_array[context_ptr->txb_itr].y_has_coeff;
    3557             :                         }
    3558             : 
    3559             :                         //inter mode
    3560       29212 :                         if (doRecon)
    3561             : 
    3562       29213 :                             Av1EncodeGenerateReconFunctionPtr[is16bit](
    3563             :                                 context_ptr,
    3564             :                                 txb_origin_x,  //pic offset
    3565             :                                 txb_origin_y,
    3566             :                                 recon_buffer,
    3567             :                                 inverse_quant_buffer,
    3568             :                                 transform_inner_array_ptr,
    3569       28888 :                                 context_ptr->blk_geom->has_uv && uv_pass ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK,
    3570       29213 :                                 eobs[context_ptr->txb_itr]);
    3571             : 
    3572       29214 :                         if (context_ptr->blk_geom->has_uv && uv_pass) {
    3573       27151 :                             y_has_coeff |= cu_ptr->transform_unit_array[context_ptr->txb_itr].y_has_coeff;
    3574       27151 :                             u_has_coeff |= cu_ptr->transform_unit_array[context_ptr->txb_itr].u_has_coeff;
    3575       27151 :                             v_has_coeff |= cu_ptr->transform_unit_array[context_ptr->txb_itr].v_has_coeff;
    3576             :                         }
    3577             :                         else
    3578        2063 :                             y_has_coeff |= cu_ptr->transform_unit_array[context_ptr->txb_itr].y_has_coeff;
    3579       29214 :                         context_ptr->coded_area_sb += blk_geom->tx_width[cu_ptr->tx_depth][tuIt] * blk_geom->tx_height[cu_ptr->tx_depth][tuIt];
    3580             : 
    3581       29214 :                         if (context_ptr->blk_geom->has_uv && uv_pass)
    3582       27152 :                             context_ptr->coded_area_sb_uv += blk_geom->tx_width_uv[cu_ptr->tx_depth][tuIt] * blk_geom->tx_height_uv[cu_ptr->tx_depth][tuIt];
    3583             : 
    3584             :                         // Update the luma Dc Sign Level Coeff Neighbor Array
    3585             :                         {
    3586       29214 :                             uint8_t dcSignLevelCoeff = (uint8_t)cu_ptr->quantized_dc[0][context_ptr->txb_itr];
    3587             : 
    3588       29214 :                             neighbor_array_unit_mode_write(
    3589             :                                 picture_control_set_ptr->ep_luma_dc_sign_level_coeff_neighbor_array,
    3590             :                                 (uint8_t*)&dcSignLevelCoeff,
    3591             :                                 txb_origin_x,
    3592             :                                 txb_origin_y,
    3593       29214 :                                 context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr],
    3594       29214 :                                 context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr],
    3595             :                                 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
    3596             :                         }
    3597             : 
    3598             :                         // Update the cb Dc Sign Level Coeff Neighbor Array
    3599       29213 :                         if (context_ptr->blk_geom->has_uv && uv_pass)
    3600             :                         {
    3601       27149 :                             uint8_t dcSignLevelCoeff = (uint8_t)cu_ptr->quantized_dc[1][context_ptr->txb_itr];
    3602       27149 :                             neighbor_array_unit_mode_write(
    3603             :                                 picture_control_set_ptr->ep_cb_dc_sign_level_coeff_neighbor_array,
    3604             :                                 (uint8_t*)&dcSignLevelCoeff,
    3605       27149 :                                 ROUND_UV(txb_origin_x) >> 1,
    3606       27149 :                                 ROUND_UV(txb_origin_y) >> 1,
    3607       27149 :                                 context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    3608       27149 :                                 context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    3609             :                                 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
    3610             :                         }
    3611             : 
    3612             :                         // Update the cr DC Sign Level Coeff Neighbor Array
    3613       29216 :                         if (context_ptr->blk_geom->has_uv && uv_pass)
    3614             :                         {
    3615       27152 :                             uint8_t dcSignLevelCoeff = (uint8_t)cu_ptr->quantized_dc[2][context_ptr->txb_itr];
    3616       27152 :                             neighbor_array_unit_mode_write(
    3617             :                                 picture_control_set_ptr->ep_cr_dc_sign_level_coeff_neighbor_array,
    3618             :                                 (uint8_t*)&dcSignLevelCoeff,
    3619       27152 :                                 ROUND_UV(txb_origin_x) >> 1,
    3620       27152 :                                 ROUND_UV(txb_origin_y) >> 1,
    3621       27152 :                                 context_ptr->blk_geom->tx_width_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    3622       27152 :                                 context_ptr->blk_geom->tx_height_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
    3623             :                                 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
    3624             :                         }
    3625             : 
    3626             :                     } // Transform Loop
    3627             : 
    3628             :                     // Calculate Root CBF
    3629       27428 :                     if (context_ptr->blk_geom->has_uv)
    3630       27154 :                         cu_ptr->block_has_coeff = (y_has_coeff | u_has_coeff | v_has_coeff) ? EB_TRUE : EB_FALSE;
    3631             :                     else
    3632         274 :                         cu_ptr->block_has_coeff = (y_has_coeff) ? EB_TRUE : EB_FALSE;
    3633             : 
    3634             :                     // Force Skip if MergeFlag == TRUE && RootCbf == 0
    3635             : 
    3636       27428 :                     if (cu_ptr->skip_flag == EB_FALSE &&
    3637       23261 :                         cu_ptr->prediction_unit_array[0].merge_flag == EB_TRUE && cu_ptr->block_has_coeff == EB_FALSE)
    3638             :                     {
    3639        3810 :                         cu_ptr->skip_flag = EB_TRUE;
    3640             :                     }
    3641             : 
    3642             :                     {
    3643             :                         // Set the PU Loop Variables
    3644       27428 :                         pu_ptr = cu_ptr->prediction_unit_array;
    3645             : 
    3646             :                         // Set MvUnit
    3647       27428 :                         context_ptr->mv_unit.pred_direction = (uint8_t)pu_ptr->inter_pred_direction_index;
    3648       27428 :                         context_ptr->mv_unit.mv[REF_LIST_0].mv_union = pu_ptr->mv[REF_LIST_0].mv_union;
    3649       27428 :                         context_ptr->mv_unit.mv[REF_LIST_1].mv_union = pu_ptr->mv[REF_LIST_1].mv_union;
    3650             : 
    3651             :                         // Update Neighbor Arrays (Mode Type, mvs, SKIP)
    3652             :                         {
    3653       27428 :                             uint8_t skip_flag = (uint8_t)cu_ptr->skip_flag;
    3654       27428 :                             EncodePassUpdateInterModeNeighborArrays(
    3655             :                                 ep_mode_type_neighbor_array,
    3656             :                                 ep_mv_neighbor_array,
    3657             :                                 ep_skip_flag_neighbor_array,
    3658             :                                 &context_ptr->mv_unit,
    3659             :                                 &skip_flag,
    3660       27428 :                                 context_ptr->cu_origin_x,
    3661       27428 :                                 context_ptr->cu_origin_y,
    3662       27428 :                                 blk_geom->bwidth,
    3663       27428 :                                 blk_geom->bheight);
    3664             :                         }
    3665             :                     } // 2nd Partition Loop
    3666             : 
    3667             :                     // Update Recon Samples Neighbor Arrays -INTER-
    3668             : 
    3669       27426 :                     if (doRecon)
    3670       27426 :                         EncodePassUpdateReconSampleNeighborArrays(
    3671             :                             ep_luma_recon_neighbor_array,
    3672             :                             ep_cb_recon_neighbor_array,
    3673             :                             ep_cr_recon_neighbor_array,
    3674             :                             recon_buffer,
    3675       27426 :                             context_ptr->cu_origin_x,
    3676       27426 :                             context_ptr->cu_origin_y,
    3677       27426 :                             context_ptr->blk_geom->bwidth,
    3678       27426 :                             context_ptr->blk_geom->bheight,
    3679       27426 :                             context_ptr->blk_geom->bwidth_uv,
    3680       27426 :                             context_ptr->blk_geom->bheight_uv,
    3681       27426 :                             context_ptr->blk_geom->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK,
    3682             :                             is16bit);
    3683             : #if TWO_PASS
    3684             :                     // Collect the referenced area per 64x64
    3685       27427 :                     if (sequence_control_set_ptr->use_output_stat_file) {
    3686           0 :                         if (cu_ptr->prediction_unit_array->ref_frame_index_l0 >= 0) {
    3687           0 :                             eb_block_on_mutex(refObj0->referenced_area_mutex);
    3688             :                             {
    3689           0 :                                 if (context_ptr->mv_unit.pred_direction == UNI_PRED_LIST_0 || context_ptr->mv_unit.pred_direction == BI_PRED) {
    3690             :                                     //List0-Y
    3691           0 :                                     uint16_t origin_x = MAX(0, (int16_t)context_ptr->cu_origin_x + (context_ptr->mv_unit.mv[REF_LIST_0].x >> 3));
    3692           0 :                                     uint16_t origin_y = MAX(0, (int16_t)context_ptr->cu_origin_y + (context_ptr->mv_unit.mv[REF_LIST_0].y >> 3));
    3693           0 :                                     origin_x = MIN(origin_x, sequence_control_set_ptr->seq_header.max_frame_width - blk_geom->bwidth);
    3694           0 :                                     origin_y = MIN(origin_y, sequence_control_set_ptr->seq_header.max_frame_height - blk_geom->bheight);
    3695           0 :                                     uint16_t sb_origin_x = origin_x / context_ptr->sb_sz * context_ptr->sb_sz;
    3696           0 :                                     uint16_t sb_origin_y = origin_y / context_ptr->sb_sz * context_ptr->sb_sz;
    3697           0 :                                     uint32_t pic_width_in_sb = (sequence_control_set_ptr->seq_header.max_frame_width + sequence_control_set_ptr->sb_sz - 1) / sequence_control_set_ptr->sb_sz;
    3698           0 :                                     uint16_t sb_index = sb_origin_x / context_ptr->sb_sz + pic_width_in_sb * (sb_origin_y / context_ptr->sb_sz);
    3699             :                                     uint16_t width, height, weight;
    3700           0 :                                     weight = 1 << (4 - picture_control_set_ptr->parent_pcs_ptr->temporal_layer_index);
    3701             : 
    3702           0 :                                     width = MIN(sb_origin_x + context_ptr->sb_sz, origin_x + blk_geom->bwidth) - origin_x;
    3703           0 :                                     height = MIN(sb_origin_y + context_ptr->sb_sz, origin_y + blk_geom->bheight) - origin_y;
    3704           0 :                                     refObj0->stat_struct.referenced_area[sb_index] += width * height*weight;
    3705             : 
    3706           0 :                                     if (origin_x + blk_geom->bwidth > sb_origin_x + context_ptr->sb_sz) {
    3707           0 :                                         sb_origin_x = (origin_x / context_ptr->sb_sz + 1)* context_ptr->sb_sz;
    3708           0 :                                         sb_origin_y = origin_y / context_ptr->sb_sz * context_ptr->sb_sz;
    3709           0 :                                         sb_index = sb_origin_x / context_ptr->sb_sz + pic_width_in_sb * (sb_origin_y / context_ptr->sb_sz);
    3710           0 :                                         width = origin_x + blk_geom->bwidth - MAX(sb_origin_x, origin_x);
    3711           0 :                                         height = MIN(sb_origin_y + context_ptr->sb_sz, origin_y + blk_geom->bheight) - origin_y;
    3712           0 :                                         refObj0->stat_struct.referenced_area[sb_index] += width * height*weight;
    3713             :                                     }
    3714           0 :                                     if (origin_y + blk_geom->bheight > sb_origin_y + context_ptr->sb_sz) {
    3715           0 :                                         sb_origin_x = (origin_x / context_ptr->sb_sz)* context_ptr->sb_sz;
    3716           0 :                                         sb_origin_y = (origin_y / context_ptr->sb_sz + 1) * context_ptr->sb_sz;
    3717           0 :                                         sb_index = sb_origin_x / context_ptr->sb_sz + pic_width_in_sb * (sb_origin_y / context_ptr->sb_sz);
    3718           0 :                                         width = MIN(sb_origin_x + context_ptr->sb_sz, origin_x + blk_geom->bwidth) - origin_x;
    3719           0 :                                         height = origin_y + blk_geom->bheight - MAX(sb_origin_y, origin_y);
    3720           0 :                                         refObj0->stat_struct.referenced_area[sb_index] += width * height*weight;
    3721             :                                     }
    3722           0 :                                     if (origin_x + blk_geom->bwidth > sb_origin_x + context_ptr->sb_sz &&
    3723           0 :                                         origin_y + blk_geom->bheight > sb_origin_y + context_ptr->sb_sz) {
    3724           0 :                                         sb_origin_x = (origin_x / context_ptr->sb_sz + 1)* context_ptr->sb_sz;
    3725           0 :                                         sb_origin_y = (origin_y / context_ptr->sb_sz + 1) * context_ptr->sb_sz;
    3726           0 :                                         sb_index = sb_origin_x / context_ptr->sb_sz + pic_width_in_sb * (sb_origin_y / context_ptr->sb_sz);
    3727           0 :                                         width = origin_x + blk_geom->bwidth - MAX(sb_origin_x, origin_x);
    3728           0 :                                         height = origin_y + blk_geom->bheight - MAX(sb_origin_y, origin_y);
    3729           0 :                                         refObj0->stat_struct.referenced_area[sb_index] += width * height*weight;
    3730             :                                     }
    3731             :                                 }
    3732             :                             }
    3733           0 :                             eb_release_mutex(refObj0->referenced_area_mutex);
    3734             :                         }
    3735             : 
    3736           0 :                         if (cu_ptr->prediction_unit_array->ref_frame_index_l1 >= 0) {
    3737           0 :                             eb_block_on_mutex(refObj1->referenced_area_mutex);
    3738           0 :                             if (context_ptr->mv_unit.pred_direction == UNI_PRED_LIST_1 || context_ptr->mv_unit.pred_direction == BI_PRED) {
    3739             :                                 //List1-Y
    3740           0 :                                 uint16_t origin_x = MAX(0, (int16_t)context_ptr->cu_origin_x + (context_ptr->mv_unit.mv[REF_LIST_1].x >> 3));
    3741           0 :                                 uint16_t origin_y = MAX(0, (int16_t)context_ptr->cu_origin_y + (context_ptr->mv_unit.mv[REF_LIST_1].y >> 3));
    3742           0 :                                 origin_x = MIN(origin_x, sequence_control_set_ptr->seq_header.max_frame_width - blk_geom->bwidth);
    3743           0 :                                 origin_y = MIN(origin_y, sequence_control_set_ptr->seq_header.max_frame_height - blk_geom->bheight);
    3744           0 :                                 uint16_t sb_origin_x = origin_x / context_ptr->sb_sz * context_ptr->sb_sz;
    3745           0 :                                 uint16_t sb_origin_y = origin_y / context_ptr->sb_sz * context_ptr->sb_sz;
    3746           0 :                                 uint32_t pic_width_in_sb = (sequence_control_set_ptr->seq_header.max_frame_width + sequence_control_set_ptr->sb_sz - 1) / sequence_control_set_ptr->sb_sz;
    3747           0 :                                 uint16_t sb_index = sb_origin_x / context_ptr->sb_sz + pic_width_in_sb * (sb_origin_y / context_ptr->sb_sz);
    3748             :                                 uint16_t width, height, weight;
    3749           0 :                                 weight = 1 << (4 - picture_control_set_ptr->parent_pcs_ptr->temporal_layer_index);
    3750             : 
    3751           0 :                                 width = MIN(sb_origin_x + context_ptr->sb_sz, origin_x + blk_geom->bwidth) - origin_x;
    3752           0 :                                 height = MIN(sb_origin_y + context_ptr->sb_sz, origin_y + blk_geom->bheight) - origin_y;
    3753           0 :                                 refObj1->stat_struct.referenced_area[sb_index] += width * height*weight;
    3754           0 :                                 if (origin_x + blk_geom->bwidth > sb_origin_x + context_ptr->sb_sz) {
    3755           0 :                                     sb_origin_x = (origin_x / context_ptr->sb_sz + 1)* context_ptr->sb_sz;
    3756           0 :                                     sb_origin_y = origin_y / context_ptr->sb_sz * context_ptr->sb_sz;
    3757           0 :                                     sb_index = sb_origin_x / context_ptr->sb_sz + pic_width_in_sb * (sb_origin_y / context_ptr->sb_sz);
    3758           0 :                                     width = origin_x + blk_geom->bwidth - MAX(sb_origin_x, origin_x);
    3759           0 :                                     height = MIN(sb_origin_y + context_ptr->sb_sz, origin_y + blk_geom->bheight) - origin_y;
    3760           0 :                                     refObj1->stat_struct.referenced_area[sb_index] += width * height*weight;
    3761             :                                 }
    3762           0 :                                 if (origin_y + blk_geom->bheight > sb_origin_y + context_ptr->sb_sz) {
    3763           0 :                                     sb_origin_x = (origin_x / context_ptr->sb_sz)* context_ptr->sb_sz;
    3764           0 :                                     sb_origin_y = (origin_y / context_ptr->sb_sz + 1) * context_ptr->sb_sz;
    3765           0 :                                     sb_index = sb_origin_x / context_ptr->sb_sz + pic_width_in_sb * (sb_origin_y / context_ptr->sb_sz);
    3766           0 :                                     width = MIN(sb_origin_x + context_ptr->sb_sz, origin_x + blk_geom->bwidth) - origin_x;
    3767           0 :                                     height = origin_y + blk_geom->bheight - MAX(sb_origin_y, origin_y);
    3768           0 :                                     refObj1->stat_struct.referenced_area[sb_index] += width * height*weight;
    3769             :                                 }
    3770           0 :                                 if (origin_x + blk_geom->bwidth > sb_origin_x + context_ptr->sb_sz &&
    3771           0 :                                     origin_y + blk_geom->bheight > sb_origin_y + context_ptr->sb_sz) {
    3772           0 :                                     sb_origin_x = (origin_x / context_ptr->sb_sz + 1)* context_ptr->sb_sz;
    3773           0 :                                     sb_origin_y = (origin_y / context_ptr->sb_sz + 1) * context_ptr->sb_sz;
    3774           0 :                                     sb_index = sb_origin_x / context_ptr->sb_sz + pic_width_in_sb * (sb_origin_y / context_ptr->sb_sz);
    3775           0 :                                     width = origin_x + blk_geom->bwidth - MAX(sb_origin_x, origin_x);
    3776           0 :                                     height = origin_y + blk_geom->bheight - MAX(sb_origin_y, origin_y);
    3777           0 :                                     refObj1->stat_struct.referenced_area[sb_index] += width * height*weight;
    3778             :                                 }
    3779             :                             }
    3780           0 :                             eb_release_mutex(refObj1->referenced_area_mutex);
    3781             :                         }
    3782             :                     }
    3783             : #endif
    3784             :                 }
    3785             :                 else {
    3786           1 :                     CHECK_REPORT_ERROR_NC(
    3787             :                         encode_context_ptr->app_callback_ptr,
    3788             :                         EB_ENC_CL_ERROR2);
    3789             :                 }
    3790             : 
    3791       33845 :                 update_av1_mi_map(
    3792             :                     cu_ptr,
    3793       33845 :                     context_ptr->cu_origin_x,
    3794       33845 :                     context_ptr->cu_origin_y,
    3795             :                     blk_geom,
    3796             :                     picture_control_set_ptr);
    3797             : 
    3798             :                 if (dlfEnableFlag)
    3799             :                 {
    3800             :                 }
    3801             : 
    3802             :                 {
    3803             :                     {
    3804             :                         // Set the PU Loop Variables
    3805       33844 :                         pu_ptr = cu_ptr->prediction_unit_array;
    3806             :                         // Set MvUnit
    3807       33844 :                         context_ptr->mv_unit.pred_direction = (uint8_t)pu_ptr->inter_pred_direction_index;
    3808       33844 :                         context_ptr->mv_unit.mv[REF_LIST_0].mv_union = pu_ptr->mv[REF_LIST_0].mv_union;
    3809       33844 :                         context_ptr->mv_unit.mv[REF_LIST_1].mv_union = pu_ptr->mv[REF_LIST_1].mv_union;
    3810             :                     }
    3811             :                 }
    3812             : 
    3813             :                 {
    3814       33844 :                     CodingUnit *src_cu = &context_ptr->md_context->md_cu_arr_nsq[d1_itr];
    3815             : 
    3816       33844 :                     CodingUnit *dst_cu = &sb_ptr->final_cu_arr[final_cu_itr++];
    3817             : #if PAL_SUP
    3818       33844 :                     move_cu_data(picture_control_set_ptr, context_ptr,src_cu, dst_cu);
    3819             : #else
    3820             :                     move_cu_data(src_cu, dst_cu);
    3821             : #endif
    3822             :                 }
    3823       33837 :                 if (sequence_control_set_ptr->mfmv_enabled && picture_control_set_ptr->slice_type != I_SLICE && picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) {
    3824        9165 :                     uint32_t mi_stride = picture_control_set_ptr->mi_stride;
    3825        9165 :                     int32_t mi_row = context_ptr->cu_origin_y >> MI_SIZE_LOG2;
    3826        9165 :                     int32_t mi_col = context_ptr->cu_origin_x >> MI_SIZE_LOG2;
    3827        9165 :                     const int32_t offset = mi_row * mi_stride + mi_col;
    3828        9165 :                     ModeInfo *miPtr = *(picture_control_set_ptr->mi_grid_base + offset);
    3829        9165 :                     const int x_mis = AOMMIN(context_ptr->blk_geom->bwidth, picture_control_set_ptr->parent_pcs_ptr->av1_cm->mi_cols - mi_col);
    3830        9165 :                     const int y_mis = AOMMIN(context_ptr->blk_geom->bheight, picture_control_set_ptr->parent_pcs_ptr->av1_cm->mi_rows - mi_row);
    3831        9165 :                     EbReferenceObject *obj_l0 = (EbReferenceObject*)picture_control_set_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr;
    3832             : 
    3833        9165 :                     av1_copy_frame_mvs(picture_control_set_ptr, picture_control_set_ptr->parent_pcs_ptr->av1_cm, miPtr->mbmi,
    3834             :                         mi_row, mi_col, x_mis, y_mis, obj_l0);
    3835             :                 }
    3836             :             }
    3837       29198 :             blk_it += ns_depth_offset[sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128][context_ptr->blk_geom->depth];
    3838             :         }
    3839             :         else
    3840       97900 :             blk_it += d1_depth_offset[sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128][context_ptr->blk_geom->depth];
    3841             :     } // CU Loop
    3842             : #if AV1_LF
    3843             :     // First Pass Deblocking
    3844        7187 :     if (dlfEnableFlag && picture_control_set_ptr->parent_pcs_ptr->loop_filter_mode == 1) {
    3845        1799 :         if (picture_control_set_ptr->parent_pcs_ptr->frm_hdr.loop_filter_params.filter_level[0] || picture_control_set_ptr->parent_pcs_ptr->frm_hdr.loop_filter_params.filter_level[1]) {
    3846        1799 :             uint8_t LastCol = ((sb_origin_x)+sb_width == sequence_control_set_ptr->seq_header.max_frame_width) ? 1 : 0;
    3847        1799 :             loop_filter_sb(
    3848             :                 recon_buffer,
    3849             :                 picture_control_set_ptr,
    3850             :                 NULL,
    3851        1799 :                 sb_origin_y >> 2,
    3852        1799 :                 sb_origin_x >> 2,
    3853             :                 0,
    3854             :                 3,
    3855             :                 LastCol);
    3856             :         }
    3857             :     }
    3858             : #endif
    3859             : 
    3860        7200 :     return;
    3861             : }
    3862             : 
    3863             : #if NO_ENCDEC
    3864             : EB_EXTERN void no_enc_dec_pass(
    3865             :     SequenceControlSet    *sequence_control_set_ptr,
    3866             :     PictureControlSet     *picture_control_set_ptr,
    3867             :     LargestCodingUnit     *sb_ptr,
    3868             :     uint32_t                   tbAddr,
    3869             :     uint32_t                   sb_origin_x,
    3870             :     uint32_t                   sb_origin_y,
    3871             :     uint32_t                   sb_qp,
    3872             :     EncDecContext         *context_ptr)
    3873             : {
    3874             :     context_ptr->coded_area_sb = 0;
    3875             :     context_ptr->coded_area_sb_uv = 0;
    3876             : 
    3877             :     uint32_t      final_cu_itr = 0;
    3878             : 
    3879             :     uint32_t    blk_it = 0;
    3880             : 
    3881             :     while (blk_it < sequence_control_set_ptr->max_block_cnt) {
    3882             :         CodingUnit  *cu_ptr = context_ptr->cu_ptr = &context_ptr->md_context->md_cu_arr_nsq[blk_it];
    3883             :         PartitionType part = cu_ptr->part;
    3884             :         const BlockGeom * blk_geom = context_ptr->blk_geom = get_blk_geom_mds(blk_it);
    3885             : 
    3886             :         sb_ptr->cu_partition_array[blk_it] = context_ptr->md_context->md_cu_arr_nsq[blk_it].part;
    3887             : 
    3888             :         if (part != PARTITION_SPLIT) {
    3889             :             int32_t offset_d1 = ns_blk_offset[(int32_t)part]; //cu_ptr->best_d1_blk; // TOCKECK
    3890             :             int32_t num_d1_block = ns_blk_num[(int32_t)part]; // context_ptr->blk_geom->totns; // TOCKECK
    3891             : 
    3892             :             for (int32_t d1_itr = blk_it + offset_d1; d1_itr < blk_it + offset_d1 + num_d1_block; d1_itr++) {
    3893             :                 const BlockGeom * blk_geom = context_ptr->blk_geom = get_blk_geom_mds(d1_itr);
    3894             :                 CodingUnit            *cu_ptr = context_ptr->cu_ptr = &context_ptr->md_context->md_cu_arr_nsq[d1_itr];
    3895             : 
    3896             :                 cu_ptr->delta_qp = 0;
    3897             :                 cu_ptr->qp = picture_control_set_ptr->picture_qp;
    3898             :                 sb_ptr->qp = picture_control_set_ptr->picture_qp;
    3899             :                 cu_ptr->org_delta_qp = cu_ptr->delta_qp;
    3900             : 
    3901             :                 {
    3902             :                     CodingUnit *src_cu = &context_ptr->md_context->md_cu_arr_nsq[d1_itr];
    3903             :                     CodingUnit *dst_cu = &sb_ptr->final_cu_arr[final_cu_itr++];
    3904             : 
    3905             :                     move_cu_data(src_cu, dst_cu);
    3906             :                 }
    3907             : 
    3908             :                 //copy coeff
    3909             :                 int32_t txb_1d_offset = 0, txb_1d_offset_uv = 0;
    3910             : 
    3911             :                 int32_t txb_itr = 0;
    3912             :                 do
    3913             :                 {
    3914             :                     uint32_t  bwidth = context_ptr->blk_geom->tx_width[txb_itr] < 64 ? context_ptr->blk_geom->tx_width[txb_itr] : 32;
    3915             :                     uint32_t  bheight = context_ptr->blk_geom->tx_height[txb_itr] < 64 ? context_ptr->blk_geom->tx_height[txb_itr] : 32;
    3916             : 
    3917             :                     int32_t* src_ptr = &(((int32_t*)context_ptr->cu_ptr->coeff_tmp->buffer_y)[txb_1d_offset]);
    3918             :                     int32_t* dst_ptr = &(((int32_t*)sb_ptr->quantized_coeff->buffer_y)[context_ptr->coded_area_sb]);
    3919             : 
    3920             :                     uint32_t j;
    3921             :                     for (j = 0; j < bheight; j++)
    3922             :                         memcpy(dst_ptr + j * bwidth, src_ptr + j * bwidth, bwidth * sizeof(int32_t));
    3923             :                     if (context_ptr->blk_geom->has_uv)
    3924             :                     {
    3925             :                         // Cb
    3926             :                         bwidth = context_ptr->blk_geom->tx_width_uv[txb_itr];
    3927             :                         bheight = context_ptr->blk_geom->tx_height_uv[txb_itr];
    3928             : 
    3929             :                         src_ptr = &(((int32_t*)context_ptr->cu_ptr->coeff_tmp->buffer_cb)[txb_1d_offset_uv]);
    3930             :                         dst_ptr = &(((int32_t*)sb_ptr->quantized_coeff->buffer_cb)[context_ptr->coded_area_sb_uv]);
    3931             : 
    3932             :                         for (j = 0; j < bheight; j++)
    3933             :                             memcpy(dst_ptr + j * bwidth, src_ptr + j * bwidth, bwidth * sizeof(int32_t));
    3934             :                         //Cr
    3935             :                         src_ptr = &(((int32_t*)context_ptr->cu_ptr->coeff_tmp->buffer_cr)[txb_1d_offset_uv]);
    3936             :                         dst_ptr = &(((int32_t*)sb_ptr->quantized_coeff->buffer_cr)[context_ptr->coded_area_sb_uv]);
    3937             : 
    3938             :                         for (j = 0; j < bheight; j++)
    3939             :                             memcpy(dst_ptr + j * bwidth, src_ptr + j * bwidth, bwidth * sizeof(int32_t));
    3940             :                     }
    3941             : 
    3942             :                     context_ptr->coded_area_sb += context_ptr->blk_geom->tx_width[txb_itr] * context_ptr->blk_geom->tx_height[txb_itr];
    3943             :                     if (context_ptr->blk_geom->has_uv)
    3944             :                         context_ptr->coded_area_sb_uv += context_ptr->blk_geom->tx_width_uv[txb_itr] * context_ptr->blk_geom->tx_height_uv[txb_itr];
    3945             : 
    3946             :                     txb_1d_offset += context_ptr->blk_geom->tx_width[txb_itr] * context_ptr->blk_geom->tx_height[txb_itr];
    3947             :                     if (context_ptr->blk_geom->has_uv)
    3948             :                         txb_1d_offset_uv += context_ptr->blk_geom->tx_width_uv[txb_itr] * context_ptr->blk_geom->tx_height_uv[txb_itr];
    3949             : 
    3950             :                     txb_itr++;
    3951             :                 } while (txb_itr < context_ptr->blk_geom->txb_count);
    3952             : 
    3953             :                 //copy recon
    3954             :                 {
    3955             :                     EbPictureBufferDesc          *ref_pic;
    3956             :                     if (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag)
    3957             :                     {
    3958             :                         EbReferenceObject* refObj = (EbReferenceObject*)picture_control_set_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr;
    3959             :                         ref_pic = refObj->reference_picture;
    3960             :                     }
    3961             :                     else
    3962             :                         ref_pic = picture_control_set_ptr->recon_picture_ptr;
    3963             :                     context_ptr->cu_origin_x = sb_origin_x + context_ptr->blk_geom->origin_x;
    3964             :                     context_ptr->cu_origin_y = sb_origin_y + context_ptr->blk_geom->origin_y;
    3965             : 
    3966             :                     uint32_t  bwidth = context_ptr->blk_geom->bwidth;
    3967             :                     uint32_t  bheight = context_ptr->blk_geom->bheight;
    3968             : 
    3969             :                     uint8_t* src_ptr = &(((uint8_t*)context_ptr->cu_ptr->recon_tmp->buffer_y)[0]);
    3970             :                     uint8_t* dst_ptr = ref_pic->buffer_y + ref_pic->origin_x + context_ptr->cu_origin_x + (ref_pic->origin_y + context_ptr->cu_origin_y)*ref_pic->stride_y;
    3971             : 
    3972             :                     uint32_t j;
    3973             :                     for (j = 0; j < bheight; j++)
    3974             :                         memcpy(dst_ptr + j * ref_pic->stride_y, src_ptr + j * 128, bwidth * sizeof(uint8_t));
    3975             :                     if (context_ptr->blk_geom->has_uv)
    3976             :                     {
    3977             :                         bwidth = context_ptr->blk_geom->bwidth_uv;
    3978             :                         bheight = context_ptr->blk_geom->bheight_uv;
    3979             : 
    3980             :                         src_ptr = &(((uint8_t*)context_ptr->cu_ptr->recon_tmp->buffer_cb)[0]);
    3981             : 
    3982             :                         dst_ptr = ref_pic->buffer_cb + ref_pic->origin_x / 2 + ((context_ptr->cu_origin_x >> 3) << 3) / 2 + (ref_pic->origin_y / 2 + ((context_ptr->cu_origin_y >> 3) << 3) / 2)*ref_pic->stride_cb;
    3983             : 
    3984             :                         for (j = 0; j < bheight; j++)
    3985             :                             memcpy(dst_ptr + j * ref_pic->stride_cb, src_ptr + j * 64, bwidth * sizeof(uint8_t));
    3986             :                         src_ptr = &(((uint8_t*)context_ptr->cu_ptr->recon_tmp->buffer_cr)[0]);
    3987             : 
    3988             :                         dst_ptr = ref_pic->buffer_cr + ref_pic->origin_x / 2 + ((context_ptr->cu_origin_x >> 3) << 3) / 2 + (ref_pic->origin_y / 2 + ((context_ptr->cu_origin_y >> 3) << 3) / 2)*ref_pic->stride_cr;
    3989             : 
    3990             :                         for (j = 0; j < bheight; j++)
    3991             :                             memcpy(dst_ptr + j * ref_pic->stride_cr, src_ptr + j * 64, bwidth * sizeof(uint8_t));
    3992             :                     }
    3993             :                 }
    3994             :             }
    3995             :             blk_it += ns_depth_offset[sequence_control_set_ptr->sb_size == BLOCK_128X128][context_ptr->blk_geom->depth];
    3996             :         }
    3997             :         else
    3998             :             blk_it += d1_depth_offset[sequence_control_set_ptr->sb_size == BLOCK_128X128][context_ptr->blk_geom->depth];
    3999             :     } // CU Loop
    4000             : 
    4001             :     return;
    4002             : }
    4003             : #endif

Generated by: LCOV version 1.14