LCOV - code coverage report
Current view: top level - Codec - EbPictureAnalysisProcess.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 856 2114 40.5 %
Date: 2019-11-25 17:38:06 Functions: 26 50 52.0 %

          Line data    Source code
       1             : /*
       2             : * Copyright(c) 2019 Intel Corporation
       3             : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
       4             : */
       5             : 
       6             : #include <stdlib.h>
       7             : #include <string.h>
       8             : 
       9             : #include "aom_dsp_rtcd.h"
      10             : #include "EbDefinitions.h"
      11             : #include "EbSystemResourceManager.h"
      12             : #include "EbPictureControlSet.h"
      13             : #include "EbSequenceControlSet.h"
      14             : #include "EbPictureBufferDesc.h"
      15             : 
      16             : #include "EbResourceCoordinationResults.h"
      17             : #include "EbPictureAnalysisProcess.h"
      18             : #include "EbPictureAnalysisResults.h"
      19             : #include "EbMcp.h"
      20             : #include "EbMotionEstimation.h"
      21             : #include "EbReferenceObject.h"
      22             : 
      23             : #include "EbComputeMean.h"
      24             : #include "EbMeSadCalculation.h"
      25             : #include "EbComputeMean_SSE2.h"
      26             : #include "EbCombinedAveragingSAD_Intrinsic_AVX2.h"
      27             : 
      28             : #define VARIANCE_PRECISION        16
      29             : #define  LCU_LOW_VAR_TH                5
      30             : #define  PIC_LOW_VAR_PERCENTAGE_TH    60
      31             : #define    FLAT_MAX_VAR            50
      32             : #define FLAT_MAX_VAR_DECIM        (50-00)
      33             : #define    NOISE_MIN_LEVEL            70000//120000
      34             : #define NOISE_MIN_LEVEL_DECIM   (70000+000000)//(120000+000000)
      35             : #define    NOISE_MIN_LEVEL_M6_M7       120000
      36             : #define NOISE_MIN_LEVEL_DECIM_M6_M7    (120000+000000)
      37             : #define DENOISER_QP_TH            29
      38             : #define DENOISER_BITRATE_TH        14000000
      39             : #define SAMPLE_THRESHOLD_PRECENT_BORDER_LINE      15
      40             : #define SAMPLE_THRESHOLD_PRECENT_TWO_BORDER_LINES 10
      41             : 
      42           8 : static void picture_analysis_context_dctor(EbPtr p)
      43             : {
      44           8 :     PictureAnalysisContext *obj = (PictureAnalysisContext*)p;
      45           8 :     EB_DELETE(obj->noise_picture_ptr);
      46           8 :     EB_DELETE(obj->denoised_picture_ptr);
      47           8 : }
      48             : /************************************************
      49             : * Picture Analysis Context Constructor
      50             : ************************************************/
      51           8 : EbErrorType picture_analysis_context_ctor(
      52             :     PictureAnalysisContext *context_ptr,
      53             :     EbPictureBufferDescInitData * input_picture_buffer_desc_init_data,
      54             :     EbBool                         denoise_flag,
      55             :     EbFifo *resource_coordination_results_input_fifo_ptr,
      56             :     EbFifo *picture_analysis_results_output_fifo_ptr)
      57             : {
      58           8 :     context_ptr->resource_coordination_results_input_fifo_ptr = resource_coordination_results_input_fifo_ptr;
      59           8 :     context_ptr->picture_analysis_results_output_fifo_ptr = picture_analysis_results_output_fifo_ptr;
      60             : 
      61           8 :     context_ptr->dctor = picture_analysis_context_dctor;
      62             : 
      63           8 :     if (denoise_flag == EB_TRUE) {
      64             :         //denoised
      65             :         // If 420/422, re-use luma for chroma
      66             :         // If 444, re-use luma for Cr
      67           8 :         if (input_picture_buffer_desc_init_data->color_format != EB_YUV444) {
      68           8 :             input_picture_buffer_desc_init_data->buffer_enable_mask = PICTURE_BUFFER_DESC_Y_FLAG;
      69             :         } else
      70           0 :             input_picture_buffer_desc_init_data->buffer_enable_mask = PICTURE_BUFFER_DESC_Y_FLAG | PICTURE_BUFFER_DESC_Cb_FLAG;
      71           8 :         EB_NEW(
      72             :             context_ptr->denoised_picture_ptr,
      73             :             eb_picture_buffer_desc_ctor,
      74             :             (EbPtr)input_picture_buffer_desc_init_data);
      75             : 
      76           8 :         if (input_picture_buffer_desc_init_data->color_format != EB_YUV444) {
      77           8 :             context_ptr->denoised_picture_ptr->buffer_cb = context_ptr->denoised_picture_ptr->buffer_y;
      78           8 :             context_ptr->denoised_picture_ptr->buffer_cr = context_ptr->denoised_picture_ptr->buffer_y + context_ptr->denoised_picture_ptr->chroma_size;
      79             :         } else
      80           0 :             context_ptr->denoised_picture_ptr->buffer_cr = context_ptr->denoised_picture_ptr->buffer_y;
      81             :         // noise
      82           8 :         input_picture_buffer_desc_init_data->max_height = BLOCK_SIZE_64;
      83           8 :         input_picture_buffer_desc_init_data->buffer_enable_mask = PICTURE_BUFFER_DESC_Y_FLAG;
      84             : 
      85           8 :         EB_NEW(
      86             :             context_ptr->noise_picture_ptr,
      87             :             eb_picture_buffer_desc_ctor,
      88             :             (EbPtr)input_picture_buffer_desc_init_data);
      89             :     }
      90           8 :     return EB_ErrorNone;
      91             : }
      92           0 : void DownSampleChroma(EbPictureBufferDesc* input_picture_ptr, EbPictureBufferDesc* outputPicturePtr)
      93             : {
      94           0 :     uint32_t input_color_format = input_picture_ptr->color_format;
      95           0 :     const uint16_t input_subsampling_x = (input_color_format == EB_YUV444 ? 1 : 2) - 1;
      96           0 :     const uint16_t input_subsampling_y = (input_color_format >= EB_YUV422 ? 1 : 2) - 1;
      97             : 
      98           0 :     uint32_t output_color_format = outputPicturePtr->color_format;
      99           0 :     const uint16_t output_subsampling_x = (output_color_format == EB_YUV444 ? 1 : 2) - 1;
     100           0 :     const uint16_t output_subsampling_y = (output_color_format >= EB_YUV422 ? 1 : 2) - 1;
     101             : 
     102             :     uint32_t stride_in, strideOut;
     103             :     uint32_t inputOriginIndex, outputOriginIndex;
     104             : 
     105             :     uint8_t *ptrIn;
     106             :     uint8_t *ptrOut;
     107             : 
     108             :     uint32_t ii, jj;
     109             : 
     110             :     //Cb
     111             :     {
     112           0 :         stride_in = input_picture_ptr->stride_cb;
     113           0 :         inputOriginIndex = (input_picture_ptr->origin_x >> input_subsampling_x) +
     114           0 :             (input_picture_ptr->origin_y >> input_subsampling_y)  * input_picture_ptr->stride_cb;
     115           0 :         ptrIn = &(input_picture_ptr->buffer_cb[inputOriginIndex]);
     116             : 
     117           0 :         strideOut = outputPicturePtr->stride_cb;
     118           0 :         outputOriginIndex = (outputPicturePtr->origin_x >> output_subsampling_x) +
     119           0 :             (outputPicturePtr->origin_y >> output_subsampling_y)  * outputPicturePtr->stride_cb;
     120           0 :         ptrOut = &(outputPicturePtr->buffer_cb[outputOriginIndex]);
     121             : 
     122           0 :         for (jj = 0; jj < (uint32_t)(outputPicturePtr->height >> output_subsampling_y); jj++) {
     123           0 :             for (ii = 0; ii < (uint32_t)(outputPicturePtr->width >> output_subsampling_x); ii++) {
     124           0 :                 ptrOut[ii + jj * strideOut] =
     125           0 :                     ptrIn[(ii << (1 - input_subsampling_x)) +
     126           0 :                     (jj << (1 - input_subsampling_y)) * stride_in];
     127             :             }
     128             :         }
     129             :     }
     130             : 
     131             :     //Cr
     132             :     {
     133           0 :         stride_in = input_picture_ptr->stride_cr;
     134           0 :         inputOriginIndex = (input_picture_ptr->origin_x >> input_subsampling_x) + (input_picture_ptr->origin_y >> input_subsampling_y)  * input_picture_ptr->stride_cr;
     135           0 :         ptrIn = &(input_picture_ptr->buffer_cr[inputOriginIndex]);
     136             : 
     137           0 :         strideOut = outputPicturePtr->stride_cr;
     138           0 :         outputOriginIndex = (outputPicturePtr->origin_x >> output_subsampling_x) + (outputPicturePtr->origin_y >> output_subsampling_y)  * outputPicturePtr->stride_cr;
     139           0 :         ptrOut = &(outputPicturePtr->buffer_cr[outputOriginIndex]);
     140             : 
     141           0 :         for (jj = 0; jj < (uint32_t)(outputPicturePtr->height >> output_subsampling_y); jj++) {
     142           0 :             for (ii = 0; ii < (uint32_t)(outputPicturePtr->width >> output_subsampling_x); ii++) {
     143           0 :                 ptrOut[ii + jj * strideOut] =
     144           0 :                     ptrIn[(ii << (1 - input_subsampling_x)) +
     145           0 :                     (jj << (1 - input_subsampling_y)) * stride_in];
     146             :             }
     147             :         }
     148             :     }
     149           0 : }
     150             : 
     151             : /************************************************
     152             :  * Picture Analysis Context Destructor
     153             :  ************************************************/
     154             :   /********************************************
     155             :     * decimation_2d
     156             :     *      decimates the input
     157             :     ********************************************/
     158        6155 : void decimation_2d(
     159             :     uint8_t *  input_samples,      // input parameter, input samples Ptr
     160             :     uint32_t   input_stride,       // input parameter, input stride
     161             :     uint32_t   input_area_width,   // input parameter, input area width
     162             :     uint32_t   input_area_height,  // input parameter, input area height
     163             :     uint8_t *  decim_samples,      // output parameter, decimated samples Ptr
     164             :     uint32_t   decim_stride,       // input parameter, output stride
     165             :     uint32_t   decim_step)         // input parameter, decimation amount in pixels
     166             : {
     167             :     uint32_t horizontal_index;
     168             :     uint32_t vertical_index;
     169        6155 :     uint32_t input_stripe_stride = input_stride * decim_step;
     170             : 
     171      134597 :     for (vertical_index = 0; vertical_index < input_area_height; vertical_index += decim_step) {
     172     9383800 :         for (horizontal_index = 0; horizontal_index < input_area_width; horizontal_index += decim_step)
     173     9255360 :             decim_samples[(horizontal_index >> (decim_step >> 1))] = input_samples[horizontal_index];
     174             : 
     175      128442 :         input_samples += input_stripe_stride;
     176      128442 :         decim_samples += decim_stride;
     177             :     }
     178             : 
     179        6155 :     return;
     180             : }
     181             : 
     182             : /********************************************
     183             :  * downsample_2d
     184             :  *      downsamples the input
     185             :  * Alternative implementation to decimation_2d that performs filtering (2x2, 0-phase)
     186             :  ********************************************/
     187         128 : void downsample_2d(
     188             :     uint8_t *  input_samples,      // input parameter, input samples Ptr
     189             :     uint32_t   input_stride,       // input parameter, input stride
     190             :     uint32_t   input_area_width,    // input parameter, input area width
     191             :     uint32_t   input_area_height,   // input parameter, input area height
     192             :     uint8_t *  decim_samples,      // output parameter, decimated samples Ptr
     193             :     uint32_t   decim_stride,       // input parameter, output stride
     194             :     uint32_t   decim_step)        // input parameter, decimation amount in pixels
     195             : {
     196             : 
     197             :     uint32_t horizontal_index;
     198             :     uint32_t vertical_index;
     199         128 :     uint32_t input_stripe_stride = input_stride * decim_step;
     200             :     uint32_t decim_horizontal_index;
     201         128 :     const uint32_t half_decim_step = decim_step >> 1;
     202             : 
     203       17264 :     for (input_samples += half_decim_step * input_stride, vertical_index = half_decim_step; vertical_index < input_area_height; vertical_index += decim_step) {
     204       17136 :         uint8_t *prev_input_line = input_samples - input_stride;
     205     3804880 :         for (horizontal_index = half_decim_step, decim_horizontal_index = 0; horizontal_index < input_area_width; horizontal_index += decim_step, decim_horizontal_index++) {
     206     3787740 :             uint32_t sum = (uint32_t)prev_input_line[horizontal_index - 1] + (uint32_t)prev_input_line[horizontal_index] + (uint32_t)input_samples[horizontal_index - 1] + (uint32_t)input_samples[horizontal_index];
     207     3787740 :             decim_samples[decim_horizontal_index] = (sum + 2) >> 2;
     208             : 
     209             :         }
     210       17136 :         input_samples += input_stripe_stride;
     211       17136 :         decim_samples += decim_stride;
     212             :     }
     213             : 
     214         128 :     return;
     215             : }
     216             : 
     217             : /********************************************
     218             : * CalculateHistogram
     219             : *      creates n-bins histogram for the input
     220             : ********************************************/
     221        5756 : void CalculateHistogram(
     222             :     uint8_t *  input_samples,      // input parameter, input samples Ptr
     223             :     uint32_t   input_area_width,    // input parameter, input area width
     224             :     uint32_t   input_area_height,   // input parameter, input area height
     225             :     uint32_t   stride,            // input parameter, input stride
     226             :     uint8_t    decim_step,         // input parameter, area height
     227             :     uint32_t  *histogram,            // output parameter, output histogram
     228             :     uint64_t  *sum)
     229             : {
     230             :     uint32_t horizontal_index;
     231             :     uint32_t vertical_index;
     232        5756 :     *sum = 0;
     233             : 
     234       91814 :     for (vertical_index = 0; vertical_index < input_area_height; vertical_index += decim_step) {
     235     2509550 :         for (horizontal_index = 0; horizontal_index < input_area_width; horizontal_index += decim_step) {
     236     2423490 :             ++(histogram[input_samples[horizontal_index]]);
     237     2423490 :             *sum += input_samples[horizontal_index];
     238             :         }
     239       86058 :         input_samples += (stride << (decim_step >> 1));
     240             :     }
     241             : 
     242        5756 :     return;
     243             : }
     244             : 
     245           0 : uint64_t ComputeVariance32x32(
     246             :     EbPictureBufferDesc       *input_padded_picture_ptr,         // input parameter, Input Padded Picture
     247             :     uint32_t                       inputLumaOriginIndex,          // input parameter, SB index, used to point to source/reference samples
     248             :     uint64_t                        *variance8x8) {
     249             :     uint32_t blockIndex;
     250             : 
     251             :     uint64_t mean_of8x8_blocks[16];
     252             :     uint64_t meanOf8x8SquaredValuesBlocks[16];
     253             : 
     254             :     uint64_t meanOf16x16Blocks[4];
     255             :     uint64_t meanOf16x16SquaredValuesBlocks[4];
     256             : 
     257             :     uint64_t meanOf32x32Blocks;
     258             :     uint64_t meanOf32x32SquaredValuesBlocks;
     259             :     /////////////////////////////////////////////
     260             :     // (0,0)
     261           0 :     blockIndex = inputLumaOriginIndex;
     262             : 
     263           0 :     mean_of8x8_blocks[0] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     264           0 :     meanOf8x8SquaredValuesBlocks[0] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     265             : 
     266             :     // (0,1)
     267           0 :     blockIndex = blockIndex + 8;
     268           0 :     mean_of8x8_blocks[1] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     269           0 :     meanOf8x8SquaredValuesBlocks[1] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     270             : 
     271             :     // (0,2)
     272           0 :     blockIndex = blockIndex + 8;
     273           0 :     mean_of8x8_blocks[2] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     274           0 :     meanOf8x8SquaredValuesBlocks[2] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     275             : 
     276             :     // (0,3)
     277           0 :     blockIndex = blockIndex + 8;
     278           0 :     mean_of8x8_blocks[3] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     279           0 :     meanOf8x8SquaredValuesBlocks[3] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     280             : 
     281             :     // (1,0)
     282           0 :     blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3);
     283           0 :     mean_of8x8_blocks[4] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     284           0 :     meanOf8x8SquaredValuesBlocks[4] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     285             : 
     286             :     // (1,1)
     287           0 :     blockIndex = blockIndex + 8;
     288           0 :     mean_of8x8_blocks[5] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     289           0 :     meanOf8x8SquaredValuesBlocks[5] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     290             : 
     291             :     // (1,2)
     292           0 :     blockIndex = blockIndex + 8;
     293           0 :     mean_of8x8_blocks[6] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     294           0 :     meanOf8x8SquaredValuesBlocks[6] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     295             : 
     296             :     // (1,3)
     297           0 :     blockIndex = blockIndex + 8;
     298           0 :     mean_of8x8_blocks[7] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     299           0 :     meanOf8x8SquaredValuesBlocks[7] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     300             : 
     301             :     // (2,0)
     302           0 :     blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 4);
     303           0 :     mean_of8x8_blocks[8] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     304           0 :     meanOf8x8SquaredValuesBlocks[8] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     305             : 
     306             :     // (2,1)
     307           0 :     blockIndex = blockIndex + 8;
     308           0 :     mean_of8x8_blocks[9] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     309           0 :     meanOf8x8SquaredValuesBlocks[9] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     310             : 
     311             :     // (2,2)
     312           0 :     blockIndex = blockIndex + 8;
     313           0 :     mean_of8x8_blocks[10] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     314           0 :     meanOf8x8SquaredValuesBlocks[10] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     315             : 
     316             :     // (2,3)
     317           0 :     blockIndex = blockIndex + 8;
     318           0 :     mean_of8x8_blocks[11] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     319           0 :     meanOf8x8SquaredValuesBlocks[11] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     320             : 
     321             :     // (3,0)
     322           0 :     blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3) + (input_padded_picture_ptr->stride_y << 4);
     323           0 :     mean_of8x8_blocks[12] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     324           0 :     meanOf8x8SquaredValuesBlocks[12] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     325             : 
     326             :     // (3,1)
     327           0 :     blockIndex = blockIndex + 8;
     328           0 :     mean_of8x8_blocks[13] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     329           0 :     meanOf8x8SquaredValuesBlocks[13] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     330             : 
     331             :     // (3,2)
     332           0 :     blockIndex = blockIndex + 8;
     333           0 :     mean_of8x8_blocks[14] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     334           0 :     meanOf8x8SquaredValuesBlocks[14] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     335             : 
     336             :     // (3,3)
     337           0 :     blockIndex = blockIndex + 8;
     338           0 :     mean_of8x8_blocks[15] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     339           0 :     meanOf8x8SquaredValuesBlocks[15] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     340             : 
     341             :     /////////////////////////////////////////////
     342             : 
     343           0 :     variance8x8[0] = meanOf8x8SquaredValuesBlocks[0] - (mean_of8x8_blocks[0] * mean_of8x8_blocks[0]);
     344           0 :     variance8x8[1] = meanOf8x8SquaredValuesBlocks[1] - (mean_of8x8_blocks[1] * mean_of8x8_blocks[1]);
     345           0 :     variance8x8[2] = meanOf8x8SquaredValuesBlocks[2] - (mean_of8x8_blocks[2] * mean_of8x8_blocks[2]);
     346           0 :     variance8x8[3] = meanOf8x8SquaredValuesBlocks[3] - (mean_of8x8_blocks[3] * mean_of8x8_blocks[3]);
     347           0 :     variance8x8[4] = meanOf8x8SquaredValuesBlocks[4] - (mean_of8x8_blocks[4] * mean_of8x8_blocks[4]);
     348           0 :     variance8x8[5] = meanOf8x8SquaredValuesBlocks[5] - (mean_of8x8_blocks[5] * mean_of8x8_blocks[5]);
     349           0 :     variance8x8[6] = meanOf8x8SquaredValuesBlocks[6] - (mean_of8x8_blocks[6] * mean_of8x8_blocks[6]);
     350           0 :     variance8x8[7] = meanOf8x8SquaredValuesBlocks[7] - (mean_of8x8_blocks[7] * mean_of8x8_blocks[7]);
     351           0 :     variance8x8[8] = meanOf8x8SquaredValuesBlocks[8] - (mean_of8x8_blocks[8] * mean_of8x8_blocks[8]);
     352           0 :     variance8x8[9] = meanOf8x8SquaredValuesBlocks[9] - (mean_of8x8_blocks[9] * mean_of8x8_blocks[9]);
     353           0 :     variance8x8[10] = meanOf8x8SquaredValuesBlocks[10] - (mean_of8x8_blocks[10] * mean_of8x8_blocks[10]);
     354           0 :     variance8x8[11] = meanOf8x8SquaredValuesBlocks[11] - (mean_of8x8_blocks[11] * mean_of8x8_blocks[11]);
     355           0 :     variance8x8[12] = meanOf8x8SquaredValuesBlocks[12] - (mean_of8x8_blocks[12] * mean_of8x8_blocks[12]);
     356           0 :     variance8x8[13] = meanOf8x8SquaredValuesBlocks[13] - (mean_of8x8_blocks[13] * mean_of8x8_blocks[13]);
     357           0 :     variance8x8[14] = meanOf8x8SquaredValuesBlocks[14] - (mean_of8x8_blocks[14] * mean_of8x8_blocks[14]);
     358           0 :     variance8x8[15] = meanOf8x8SquaredValuesBlocks[15] - (mean_of8x8_blocks[15] * mean_of8x8_blocks[15]);
     359             : 
     360             :     // 16x16
     361           0 :     meanOf16x16Blocks[0] = (mean_of8x8_blocks[0] + mean_of8x8_blocks[1] + mean_of8x8_blocks[8] + mean_of8x8_blocks[9]) >> 2;
     362           0 :     meanOf16x16Blocks[1] = (mean_of8x8_blocks[2] + mean_of8x8_blocks[3] + mean_of8x8_blocks[10] + mean_of8x8_blocks[11]) >> 2;
     363           0 :     meanOf16x16Blocks[2] = (mean_of8x8_blocks[4] + mean_of8x8_blocks[5] + mean_of8x8_blocks[12] + mean_of8x8_blocks[13]) >> 2;
     364           0 :     meanOf16x16Blocks[3] = (mean_of8x8_blocks[6] + mean_of8x8_blocks[7] + mean_of8x8_blocks[14] + mean_of8x8_blocks[15]) >> 2;
     365             : 
     366           0 :     meanOf16x16SquaredValuesBlocks[0] = (meanOf8x8SquaredValuesBlocks[0] + meanOf8x8SquaredValuesBlocks[1] + meanOf8x8SquaredValuesBlocks[8] + meanOf8x8SquaredValuesBlocks[9]) >> 2;
     367           0 :     meanOf16x16SquaredValuesBlocks[1] = (meanOf8x8SquaredValuesBlocks[2] + meanOf8x8SquaredValuesBlocks[3] + meanOf8x8SquaredValuesBlocks[10] + meanOf8x8SquaredValuesBlocks[11]) >> 2;
     368           0 :     meanOf16x16SquaredValuesBlocks[2] = (meanOf8x8SquaredValuesBlocks[4] + meanOf8x8SquaredValuesBlocks[5] + meanOf8x8SquaredValuesBlocks[12] + meanOf8x8SquaredValuesBlocks[13]) >> 2;
     369           0 :     meanOf16x16SquaredValuesBlocks[3] = (meanOf8x8SquaredValuesBlocks[6] + meanOf8x8SquaredValuesBlocks[7] + meanOf8x8SquaredValuesBlocks[14] + meanOf8x8SquaredValuesBlocks[15]) >> 2;
     370             : 
     371             :     // 32x32
     372           0 :     meanOf32x32Blocks = (meanOf16x16Blocks[0] + meanOf16x16Blocks[1] + meanOf16x16Blocks[2] + meanOf16x16Blocks[3]) >> 2;
     373             : 
     374           0 :     meanOf32x32SquaredValuesBlocks = (meanOf16x16SquaredValuesBlocks[0] + meanOf16x16SquaredValuesBlocks[1] + meanOf16x16SquaredValuesBlocks[2] + meanOf16x16SquaredValuesBlocks[3]) >> 2;
     375             : 
     376           0 :     return (meanOf32x32SquaredValuesBlocks - (meanOf32x32Blocks * meanOf32x32Blocks));
     377             : }
     378             : 
     379           0 : uint64_t ComputeVariance16x16(
     380             :     EbPictureBufferDesc       *input_padded_picture_ptr,         // input parameter, Input Padded Picture
     381             :     uint32_t                       inputLumaOriginIndex,          // input parameter, SB index, used to point to source/reference samples
     382             :     uint64_t                        *variance8x8)
     383             : {
     384             :     uint32_t blockIndex;
     385             : 
     386             :     uint64_t mean_of8x8_blocks[4];
     387             :     uint64_t meanOf8x8SquaredValuesBlocks[4];
     388             : 
     389             :     uint64_t meanOf16x16Blocks;
     390             :     uint64_t meanOf16x16SquaredValuesBlocks;
     391             : 
     392             :     // (0,0)
     393           0 :     blockIndex = inputLumaOriginIndex;
     394             : 
     395           0 :     mean_of8x8_blocks[0] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     396           0 :     meanOf8x8SquaredValuesBlocks[0] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     397             : 
     398             :     // (0,1)
     399           0 :     blockIndex = blockIndex + 8;
     400           0 :     mean_of8x8_blocks[1] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     401           0 :     meanOf8x8SquaredValuesBlocks[1] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     402             : 
     403             :     // (1,0)
     404           0 :     blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3);
     405           0 :     mean_of8x8_blocks[2] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     406           0 :     meanOf8x8SquaredValuesBlocks[2] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     407             : 
     408             :     // (1,1)
     409           0 :     blockIndex = blockIndex + 8;
     410           0 :     mean_of8x8_blocks[3] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     411           0 :     meanOf8x8SquaredValuesBlocks[3] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     412             : 
     413           0 :     variance8x8[0] = meanOf8x8SquaredValuesBlocks[0] - (mean_of8x8_blocks[0] * mean_of8x8_blocks[0]);
     414           0 :     variance8x8[1] = meanOf8x8SquaredValuesBlocks[1] - (mean_of8x8_blocks[1] * mean_of8x8_blocks[1]);
     415           0 :     variance8x8[2] = meanOf8x8SquaredValuesBlocks[2] - (mean_of8x8_blocks[2] * mean_of8x8_blocks[2]);
     416           0 :     variance8x8[3] = meanOf8x8SquaredValuesBlocks[3] - (mean_of8x8_blocks[3] * mean_of8x8_blocks[3]);
     417             : 
     418             :     // 16x16
     419           0 :     meanOf16x16Blocks = (mean_of8x8_blocks[0] + mean_of8x8_blocks[1] + mean_of8x8_blocks[2] + mean_of8x8_blocks[3]) >> 2;
     420           0 :     meanOf16x16SquaredValuesBlocks = (meanOf8x8SquaredValuesBlocks[0] + meanOf8x8SquaredValuesBlocks[1] + meanOf8x8SquaredValuesBlocks[2] + meanOf8x8SquaredValuesBlocks[3]) >> 2;
     421             : 
     422           0 :     return (meanOf16x16SquaredValuesBlocks - (meanOf16x16Blocks * meanOf16x16Blocks));
     423             : }
     424             : 
     425           0 : uint64_t compute_sub_mean_c(
     426             :     uint8_t* input_samples,     /**< input parameter, input samples Ptr */
     427             :     uint32_t input_stride,      /**< input parameter, input stride */
     428             :     uint32_t input_area_width,  /**< input parameter, input area width */
     429             :     uint32_t input_area_height) /**< input parameter, input area height */
     430             : {
     431             :     uint32_t hi, vi;
     432           0 :     uint64_t block_mean = 0;
     433           0 :     uint16_t skip = 0;
     434             : 
     435           0 :     for (vi = 0; skip < input_area_height; skip = vi + vi) {
     436           0 :         for (hi = 0; hi < input_area_width; hi++) {
     437           0 :             block_mean += input_samples[hi];
     438             :         }
     439           0 :         input_samples += 2 * input_stride;
     440           0 :         vi++;
     441             :     }
     442             : 
     443           0 :     block_mean = block_mean << 3;  // (VARIANCE_PRECISION >> 1)) /
     444             :                                    // (input_area_width * input_area_height/2)
     445             : 
     446           0 :     return block_mean;
     447             : }
     448             : 
     449           0 : uint64_t compute_sub_mean_squared_values_c(
     450             :     uint8_t* input_samples,     /**< input parameter, input samples Ptr */
     451             :     uint32_t input_stride,      /**< input parameter, input stride */
     452             :     uint32_t input_area_width,  /**< input parameter, input area width */
     453             :     uint32_t input_area_height) /**< input parameter, input area height */
     454             : {
     455             :     uint32_t hi, vi;
     456           0 :     uint64_t block_mean = 0;
     457           0 :     uint16_t skip = 0;
     458             : 
     459           0 :     for (vi = 0; skip < input_area_height; skip = vi + vi) {
     460           0 :         for (hi = 0; hi < input_area_width; hi++) {
     461           0 :             block_mean += input_samples[hi] * input_samples[hi];
     462             :         }
     463           0 :         input_samples += 2 * input_stride;
     464           0 :         vi++;
     465             :     }
     466             : 
     467           0 :     block_mean =
     468             :         block_mean
     469             :         << 11;  // VARIANCE_PRECISION) / (input_area_width * input_area_height);
     470             : 
     471           0 :     return block_mean;
     472             : }
     473             : 
     474           0 : void compute_interm_var_four8x8_c(
     475             :     uint8_t *  input_samples,
     476             :     uint16_t   input_stride,
     477             :     uint64_t * mean_of8x8_blocks,      // mean of four  8x8
     478             :     uint64_t * mean_of_squared8x8_blocks)  // meanSquared
     479             : {
     480           0 :     uint32_t blockIndex = 0;
     481             :     // (0,1)
     482           0 :     mean_of8x8_blocks[0] = compute_sub_mean_c(
     483             :         input_samples + blockIndex, input_stride, 8, 8);
     484           0 :     mean_of_squared8x8_blocks[0] = compute_sub_mean_squared_values_c(input_samples + blockIndex, input_stride, 8, 8);
     485             : 
     486             :     // (0,2)
     487           0 :     blockIndex = blockIndex + 8;
     488           0 :     mean_of8x8_blocks[1] = compute_sub_mean_c(input_samples + blockIndex, input_stride, 8, 8);
     489           0 :     mean_of_squared8x8_blocks[1] = compute_sub_mean_squared_values_c(input_samples + blockIndex, input_stride, 8, 8);
     490             : 
     491             :     // (0,3)
     492           0 :     blockIndex = blockIndex + 8;
     493           0 :     mean_of8x8_blocks[2] = compute_sub_mean_c(input_samples + blockIndex, input_stride, 8, 8);
     494           0 :     mean_of_squared8x8_blocks[2] = compute_sub_mean_squared_values_c(input_samples + blockIndex, input_stride, 8, 8);
     495             : 
     496             :     // (0,4)
     497           0 :     blockIndex = blockIndex + 8;
     498           0 :     mean_of8x8_blocks[3] = compute_sub_mean_c(input_samples + blockIndex, input_stride, 8, 8);
     499           0 :     mean_of_squared8x8_blocks[3] = compute_sub_mean_squared_values_c(input_samples + blockIndex, input_stride, 8, 8);
     500           0 : }
     501             : 
     502             : /*******************************************
     503             : ComputeVariance64x64
     504             : this function is exactly same as
     505             : PictureAnalysisComputeVarianceLcu excpet it
     506             : does not store data for every block,
     507             : just returns the 64x64 data point
     508             : *******************************************/
     509           0 : uint64_t ComputeVariance64x64(
     510             :     SequenceControlSet        *sequence_control_set_ptr,
     511             :     EbPictureBufferDesc       *input_padded_picture_ptr,         // input parameter, Input Padded Picture
     512             :     uint32_t                       inputLumaOriginIndex,          // input parameter, SB index, used to point to source/reference samples
     513             :     uint64_t                        *variance32x32)
     514             : {
     515             :     uint32_t blockIndex;
     516             : 
     517             :     uint64_t mean_of8x8_blocks[64];
     518             :     uint64_t meanOf8x8SquaredValuesBlocks[64];
     519             : 
     520             :     uint64_t meanOf16x16Blocks[16];
     521             :     uint64_t meanOf16x16SquaredValuesBlocks[16];
     522             : 
     523             :     uint64_t meanOf32x32Blocks[4];
     524             :     uint64_t meanOf32x32SquaredValuesBlocks[4];
     525             : 
     526             :     uint64_t meanOf64x64Blocks;
     527             :     uint64_t meanOf64x64SquaredValuesBlocks;
     528             : 
     529             :     // (0,0)
     530           0 :     blockIndex = inputLumaOriginIndex;
     531           0 :     const uint16_t stride_y = input_padded_picture_ptr->stride_y;
     532           0 :     if (sequence_control_set_ptr->block_mean_calc_prec == BLOCK_MEAN_PREC_FULL) {
     533           0 :         mean_of8x8_blocks[0] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     534           0 :         meanOf8x8SquaredValuesBlocks[0] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     535             : 
     536             :         // (0,1)
     537           0 :         blockIndex = blockIndex + 8;
     538           0 :         mean_of8x8_blocks[1] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     539           0 :         meanOf8x8SquaredValuesBlocks[1] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     540             : 
     541             :         // (0,2)
     542           0 :         blockIndex = blockIndex + 8;
     543           0 :         mean_of8x8_blocks[2] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     544           0 :         meanOf8x8SquaredValuesBlocks[2] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     545             : 
     546             :         // (0,3)
     547           0 :         blockIndex = blockIndex + 8;
     548           0 :         mean_of8x8_blocks[3] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     549           0 :         meanOf8x8SquaredValuesBlocks[3] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     550             : 
     551             :         // (0,4)
     552           0 :         blockIndex = blockIndex + 8;
     553           0 :         mean_of8x8_blocks[4] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     554           0 :         meanOf8x8SquaredValuesBlocks[4] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     555             : 
     556             :         // (0,5)
     557           0 :         blockIndex = blockIndex + 8;
     558           0 :         mean_of8x8_blocks[5] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     559           0 :         meanOf8x8SquaredValuesBlocks[5] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     560             : 
     561             :         // (0,6)
     562           0 :         blockIndex = blockIndex + 8;
     563           0 :         mean_of8x8_blocks[6] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     564           0 :         meanOf8x8SquaredValuesBlocks[6] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     565             : 
     566             :         // (0,7)
     567           0 :         blockIndex = blockIndex + 8;
     568           0 :         mean_of8x8_blocks[7] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     569           0 :         meanOf8x8SquaredValuesBlocks[7] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     570             : 
     571             :         // (1,0)
     572           0 :         blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3);
     573           0 :         mean_of8x8_blocks[8] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     574           0 :         meanOf8x8SquaredValuesBlocks[8] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     575             : 
     576             :         // (1,1)
     577           0 :         blockIndex = blockIndex + 8;
     578           0 :         mean_of8x8_blocks[9] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     579           0 :         meanOf8x8SquaredValuesBlocks[9] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     580             : 
     581             :         // (1,2)
     582           0 :         blockIndex = blockIndex + 8;
     583           0 :         mean_of8x8_blocks[10] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     584           0 :         meanOf8x8SquaredValuesBlocks[10] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     585             : 
     586             :         // (1,3)
     587           0 :         blockIndex = blockIndex + 8;
     588           0 :         mean_of8x8_blocks[11] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     589           0 :         meanOf8x8SquaredValuesBlocks[11] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     590             : 
     591             :         // (1,4)
     592           0 :         blockIndex = blockIndex + 8;
     593           0 :         mean_of8x8_blocks[12] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     594           0 :         meanOf8x8SquaredValuesBlocks[12] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     595             : 
     596             :         // (1,5)
     597           0 :         blockIndex = blockIndex + 8;
     598           0 :         mean_of8x8_blocks[13] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     599           0 :         meanOf8x8SquaredValuesBlocks[13] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     600             : 
     601             :         // (1,6)
     602           0 :         blockIndex = blockIndex + 8;
     603           0 :         mean_of8x8_blocks[14] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     604           0 :         meanOf8x8SquaredValuesBlocks[14] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     605             : 
     606             :         // (1,7)
     607           0 :         blockIndex = blockIndex + 8;
     608           0 :         mean_of8x8_blocks[15] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     609           0 :         meanOf8x8SquaredValuesBlocks[15] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     610             : 
     611             :         // (2,0)
     612           0 :         blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 4);
     613           0 :         mean_of8x8_blocks[16] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     614           0 :         meanOf8x8SquaredValuesBlocks[16] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     615             : 
     616             :         // (2,1)
     617           0 :         blockIndex = blockIndex + 8;
     618           0 :         mean_of8x8_blocks[17] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     619           0 :         meanOf8x8SquaredValuesBlocks[17] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     620             : 
     621             :         // (2,2)
     622           0 :         blockIndex = blockIndex + 8;
     623           0 :         mean_of8x8_blocks[18] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     624           0 :         meanOf8x8SquaredValuesBlocks[18] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     625             : 
     626             :         // (2,3)
     627           0 :         blockIndex = blockIndex + 8;
     628           0 :         mean_of8x8_blocks[19] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     629           0 :         meanOf8x8SquaredValuesBlocks[19] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     630             : 
     631             :         /// (2,4)
     632           0 :         blockIndex = blockIndex + 8;
     633           0 :         mean_of8x8_blocks[20] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     634           0 :         meanOf8x8SquaredValuesBlocks[20] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     635             : 
     636             :         // (2,5)
     637           0 :         blockIndex = blockIndex + 8;
     638           0 :         mean_of8x8_blocks[21] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     639           0 :         meanOf8x8SquaredValuesBlocks[21] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     640             : 
     641             :         // (2,6)
     642           0 :         blockIndex = blockIndex + 8;
     643           0 :         mean_of8x8_blocks[22] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     644           0 :         meanOf8x8SquaredValuesBlocks[22] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     645             : 
     646             :         // (2,7)
     647           0 :         blockIndex = blockIndex + 8;
     648           0 :         mean_of8x8_blocks[23] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     649           0 :         meanOf8x8SquaredValuesBlocks[23] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     650             : 
     651             :         // (3,0)
     652           0 :         blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3) + (input_padded_picture_ptr->stride_y << 4);
     653           0 :         mean_of8x8_blocks[24] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     654           0 :         meanOf8x8SquaredValuesBlocks[24] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     655             : 
     656             :         // (3,1)
     657           0 :         blockIndex = blockIndex + 8;
     658           0 :         mean_of8x8_blocks[25] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     659           0 :         meanOf8x8SquaredValuesBlocks[25] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     660             : 
     661             :         // (3,2)
     662           0 :         blockIndex = blockIndex + 8;
     663           0 :         mean_of8x8_blocks[26] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     664           0 :         meanOf8x8SquaredValuesBlocks[26] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     665             : 
     666             :         // (3,3)
     667           0 :         blockIndex = blockIndex + 8;
     668           0 :         mean_of8x8_blocks[27] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     669           0 :         meanOf8x8SquaredValuesBlocks[27] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     670             : 
     671             :         // (3,4)
     672           0 :         blockIndex = blockIndex + 8;
     673           0 :         mean_of8x8_blocks[28] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     674           0 :         meanOf8x8SquaredValuesBlocks[28] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     675             : 
     676             :         // (3,5)
     677           0 :         blockIndex = blockIndex + 8;
     678           0 :         mean_of8x8_blocks[29] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     679           0 :         meanOf8x8SquaredValuesBlocks[29] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     680             : 
     681             :         // (3,6)
     682           0 :         blockIndex = blockIndex + 8;
     683           0 :         mean_of8x8_blocks[30] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     684           0 :         meanOf8x8SquaredValuesBlocks[30] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     685             : 
     686             :         // (3,7)
     687           0 :         blockIndex = blockIndex + 8;
     688           0 :         mean_of8x8_blocks[31] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     689           0 :         meanOf8x8SquaredValuesBlocks[31] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     690             : 
     691             :         // (4,0)
     692           0 :         blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 5);
     693           0 :         mean_of8x8_blocks[32] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     694           0 :         meanOf8x8SquaredValuesBlocks[32] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     695             : 
     696             :         // (4,1)
     697           0 :         blockIndex = blockIndex + 8;
     698           0 :         mean_of8x8_blocks[33] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     699           0 :         meanOf8x8SquaredValuesBlocks[33] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     700             : 
     701             :         // (4,2)
     702           0 :         blockIndex = blockIndex + 8;
     703           0 :         mean_of8x8_blocks[34] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     704           0 :         meanOf8x8SquaredValuesBlocks[34] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     705             : 
     706             :         // (4,3)
     707           0 :         blockIndex = blockIndex + 8;
     708           0 :         mean_of8x8_blocks[35] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     709           0 :         meanOf8x8SquaredValuesBlocks[35] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     710             : 
     711             :         // (4,4)
     712           0 :         blockIndex = blockIndex + 8;
     713           0 :         mean_of8x8_blocks[36] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     714           0 :         meanOf8x8SquaredValuesBlocks[36] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     715             : 
     716             :         // (4,5)
     717           0 :         blockIndex = blockIndex + 8;
     718           0 :         mean_of8x8_blocks[37] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     719           0 :         meanOf8x8SquaredValuesBlocks[37] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     720             : 
     721             :         // (4,6)
     722           0 :         blockIndex = blockIndex + 8;
     723           0 :         mean_of8x8_blocks[38] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     724           0 :         meanOf8x8SquaredValuesBlocks[38] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     725             : 
     726             :         // (4,7)
     727           0 :         blockIndex = blockIndex + 8;
     728           0 :         mean_of8x8_blocks[39] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     729           0 :         meanOf8x8SquaredValuesBlocks[39] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     730             : 
     731             :         // (5,0)
     732           0 :         blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3) + (input_padded_picture_ptr->stride_y << 5);
     733           0 :         mean_of8x8_blocks[40] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     734           0 :         meanOf8x8SquaredValuesBlocks[40] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     735             : 
     736             :         // (5,1)
     737           0 :         blockIndex = blockIndex + 8;
     738           0 :         mean_of8x8_blocks[41] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     739           0 :         meanOf8x8SquaredValuesBlocks[41] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     740             : 
     741             :         // (5,2)
     742           0 :         blockIndex = blockIndex + 8;
     743           0 :         mean_of8x8_blocks[42] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     744           0 :         meanOf8x8SquaredValuesBlocks[42] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     745             : 
     746             :         // (5,3)
     747           0 :         blockIndex = blockIndex + 8;
     748           0 :         mean_of8x8_blocks[43] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     749           0 :         meanOf8x8SquaredValuesBlocks[43] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     750             : 
     751             :         // (5,4)
     752           0 :         blockIndex = blockIndex + 8;
     753           0 :         mean_of8x8_blocks[44] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     754           0 :         meanOf8x8SquaredValuesBlocks[44] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     755             : 
     756             :         // (5,5)
     757           0 :         blockIndex = blockIndex + 8;
     758           0 :         mean_of8x8_blocks[45] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     759           0 :         meanOf8x8SquaredValuesBlocks[45] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     760             : 
     761             :         // (5,6)
     762           0 :         blockIndex = blockIndex + 8;
     763           0 :         mean_of8x8_blocks[46] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     764           0 :         meanOf8x8SquaredValuesBlocks[46] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     765             : 
     766             :         // (5,7)
     767           0 :         blockIndex = blockIndex + 8;
     768           0 :         mean_of8x8_blocks[47] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     769           0 :         meanOf8x8SquaredValuesBlocks[47] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     770             : 
     771             :         // (6,0)
     772           0 :         blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 4) + (input_padded_picture_ptr->stride_y << 5);
     773           0 :         mean_of8x8_blocks[48] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     774           0 :         meanOf8x8SquaredValuesBlocks[48] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     775             : 
     776             :         // (6,1)
     777           0 :         blockIndex = blockIndex + 8;
     778           0 :         mean_of8x8_blocks[49] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     779           0 :         meanOf8x8SquaredValuesBlocks[49] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     780             : 
     781             :         // (6,2)
     782           0 :         blockIndex = blockIndex + 8;
     783           0 :         mean_of8x8_blocks[50] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     784           0 :         meanOf8x8SquaredValuesBlocks[50] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     785             : 
     786             :         // (6,3)
     787           0 :         blockIndex = blockIndex + 8;
     788           0 :         mean_of8x8_blocks[51] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     789           0 :         meanOf8x8SquaredValuesBlocks[51] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     790             : 
     791             :         // (6,4)
     792           0 :         blockIndex = blockIndex + 8;
     793           0 :         mean_of8x8_blocks[52] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     794           0 :         meanOf8x8SquaredValuesBlocks[52] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     795             : 
     796             :         // (6,5)
     797           0 :         blockIndex = blockIndex + 8;
     798           0 :         mean_of8x8_blocks[53] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     799           0 :         meanOf8x8SquaredValuesBlocks[53] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     800             : 
     801             :         // (6,6)
     802           0 :         blockIndex = blockIndex + 8;
     803           0 :         mean_of8x8_blocks[54] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     804           0 :         meanOf8x8SquaredValuesBlocks[54] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     805             : 
     806             :         // (6,7)
     807           0 :         blockIndex = blockIndex + 8;
     808           0 :         mean_of8x8_blocks[55] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     809           0 :         meanOf8x8SquaredValuesBlocks[55] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     810             : 
     811             :         // (7,0)
     812           0 :         blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3) + (input_padded_picture_ptr->stride_y << 4) + (input_padded_picture_ptr->stride_y << 5);
     813           0 :         mean_of8x8_blocks[56] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     814           0 :         meanOf8x8SquaredValuesBlocks[56] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     815             : 
     816             :         // (7,1)
     817           0 :         blockIndex = blockIndex + 8;
     818           0 :         mean_of8x8_blocks[57] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     819           0 :         meanOf8x8SquaredValuesBlocks[57] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     820             : 
     821             :         // (7,2)
     822           0 :         blockIndex = blockIndex + 8;
     823           0 :         mean_of8x8_blocks[58] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     824           0 :         meanOf8x8SquaredValuesBlocks[58] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     825             : 
     826             :         // (7,3)
     827           0 :         blockIndex = blockIndex + 8;
     828           0 :         mean_of8x8_blocks[59] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     829           0 :         meanOf8x8SquaredValuesBlocks[59] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     830             : 
     831             :         // (7,4)
     832           0 :         blockIndex = blockIndex + 8;
     833           0 :         mean_of8x8_blocks[60] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     834           0 :         meanOf8x8SquaredValuesBlocks[60] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     835             : 
     836             :         // (7,5)
     837           0 :         blockIndex = blockIndex + 8;
     838           0 :         mean_of8x8_blocks[61] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     839           0 :         meanOf8x8SquaredValuesBlocks[61] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     840             : 
     841             :         // (7,6)
     842           0 :         blockIndex = blockIndex + 8;
     843           0 :         mean_of8x8_blocks[62] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     844           0 :         meanOf8x8SquaredValuesBlocks[62] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     845             : 
     846             :         // (7,7)
     847           0 :         blockIndex = blockIndex + 8;
     848           0 :         mean_of8x8_blocks[63] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     849           0 :         meanOf8x8SquaredValuesBlocks[63] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
     850             :     }
     851             : 
     852             :     else {
     853             : 
     854           0 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[0], &meanOf8x8SquaredValuesBlocks[0]);
     855             : 
     856             :         // (0,1)
     857           0 :         blockIndex = blockIndex + 32;
     858             : 
     859           0 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[4], &meanOf8x8SquaredValuesBlocks[4]);
     860             :         // (0,5)
     861           0 :         blockIndex = blockIndex + 24;
     862             : 
     863             :         // (1,0)
     864           0 :         blockIndex = inputLumaOriginIndex + (stride_y << 3);
     865             : 
     866           0 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[8], &meanOf8x8SquaredValuesBlocks[8]);
     867             : 
     868             :         // (1,1)
     869           0 :         blockIndex = blockIndex + 32;
     870             : 
     871           0 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[12], &meanOf8x8SquaredValuesBlocks[12]);
     872             : 
     873             :         // (1,5)
     874           0 :         blockIndex = blockIndex + 24;
     875             : 
     876             :         // (2,0)
     877           0 :         blockIndex = inputLumaOriginIndex + (stride_y << 4);
     878             : 
     879           0 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[16], &meanOf8x8SquaredValuesBlocks[16]);
     880             : 
     881             :         // (2,1)
     882           0 :         blockIndex = blockIndex + 32;
     883             : 
     884           0 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[20], &meanOf8x8SquaredValuesBlocks[20]);
     885             : 
     886             :         // (2,5)
     887           0 :         blockIndex = blockIndex + 24;
     888             : 
     889             :         // (3,0)
     890           0 :         blockIndex = inputLumaOriginIndex + (stride_y << 3) + (stride_y << 4);
     891             : 
     892           0 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[24], &meanOf8x8SquaredValuesBlocks[24]);
     893             : 
     894             :         // (3,1)
     895           0 :         blockIndex = blockIndex + 32;
     896             : 
     897           0 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[28], &meanOf8x8SquaredValuesBlocks[28]);
     898             : 
     899             :         // (3,5)
     900           0 :         blockIndex = blockIndex + 24;
     901             : 
     902             :         // (4,0)
     903           0 :         blockIndex = inputLumaOriginIndex + (stride_y << 5);
     904             : 
     905           0 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[32], &meanOf8x8SquaredValuesBlocks[32]);
     906             : 
     907             :         // (4,1)
     908           0 :         blockIndex = blockIndex + 32;
     909             : 
     910           0 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[36], &meanOf8x8SquaredValuesBlocks[36]);
     911             : 
     912             :         // (4,5)
     913           0 :         blockIndex = blockIndex + 24;
     914             : 
     915             :         // (5,0)
     916           0 :         blockIndex = inputLumaOriginIndex + (stride_y << 3) + (stride_y << 5);
     917             : 
     918           0 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[40], &meanOf8x8SquaredValuesBlocks[40]);
     919             : 
     920             :         // (5,1)
     921           0 :         blockIndex = blockIndex + 32;
     922             : 
     923           0 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[44], &meanOf8x8SquaredValuesBlocks[44]);
     924             : 
     925             :         // (5,5)
     926           0 :         blockIndex = blockIndex + 24;
     927             : 
     928             :         // (6,0)
     929           0 :         blockIndex = inputLumaOriginIndex + (stride_y << 4) + (stride_y << 5);
     930             : 
     931           0 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[48], &meanOf8x8SquaredValuesBlocks[48]);
     932             : 
     933             :         // (6,1)
     934           0 :         blockIndex = blockIndex + 32;
     935             : 
     936           0 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[52], &meanOf8x8SquaredValuesBlocks[52]);
     937             : 
     938             :         // (6,5)
     939           0 :         blockIndex = blockIndex + 24;
     940             : 
     941             :         // (7,0)
     942           0 :         blockIndex = inputLumaOriginIndex + (stride_y << 3) + (stride_y << 4) + (stride_y << 5);
     943             : 
     944           0 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[56], &meanOf8x8SquaredValuesBlocks[56]);
     945             : 
     946             :         // (7,1)
     947           0 :         blockIndex = blockIndex + 32;
     948             : 
     949           0 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[60], &meanOf8x8SquaredValuesBlocks[60]);
     950             : 
     951             :     }
     952             : 
     953             :     // 16x16
     954           0 :     meanOf16x16Blocks[0] = (mean_of8x8_blocks[0] + mean_of8x8_blocks[1] + mean_of8x8_blocks[8] + mean_of8x8_blocks[9]) >> 2;
     955           0 :     meanOf16x16Blocks[1] = (mean_of8x8_blocks[2] + mean_of8x8_blocks[3] + mean_of8x8_blocks[10] + mean_of8x8_blocks[11]) >> 2;
     956           0 :     meanOf16x16Blocks[2] = (mean_of8x8_blocks[4] + mean_of8x8_blocks[5] + mean_of8x8_blocks[12] + mean_of8x8_blocks[13]) >> 2;
     957           0 :     meanOf16x16Blocks[3] = (mean_of8x8_blocks[6] + mean_of8x8_blocks[7] + mean_of8x8_blocks[14] + mean_of8x8_blocks[15]) >> 2;
     958             : 
     959           0 :     meanOf16x16Blocks[4] = (mean_of8x8_blocks[16] + mean_of8x8_blocks[17] + mean_of8x8_blocks[24] + mean_of8x8_blocks[25]) >> 2;
     960           0 :     meanOf16x16Blocks[5] = (mean_of8x8_blocks[18] + mean_of8x8_blocks[19] + mean_of8x8_blocks[26] + mean_of8x8_blocks[27]) >> 2;
     961           0 :     meanOf16x16Blocks[6] = (mean_of8x8_blocks[20] + mean_of8x8_blocks[21] + mean_of8x8_blocks[28] + mean_of8x8_blocks[29]) >> 2;
     962           0 :     meanOf16x16Blocks[7] = (mean_of8x8_blocks[22] + mean_of8x8_blocks[23] + mean_of8x8_blocks[30] + mean_of8x8_blocks[31]) >> 2;
     963             : 
     964           0 :     meanOf16x16Blocks[8] = (mean_of8x8_blocks[32] + mean_of8x8_blocks[33] + mean_of8x8_blocks[40] + mean_of8x8_blocks[41]) >> 2;
     965           0 :     meanOf16x16Blocks[9] = (mean_of8x8_blocks[34] + mean_of8x8_blocks[35] + mean_of8x8_blocks[42] + mean_of8x8_blocks[43]) >> 2;
     966           0 :     meanOf16x16Blocks[10] = (mean_of8x8_blocks[36] + mean_of8x8_blocks[37] + mean_of8x8_blocks[44] + mean_of8x8_blocks[45]) >> 2;
     967           0 :     meanOf16x16Blocks[11] = (mean_of8x8_blocks[38] + mean_of8x8_blocks[39] + mean_of8x8_blocks[46] + mean_of8x8_blocks[47]) >> 2;
     968             : 
     969           0 :     meanOf16x16Blocks[12] = (mean_of8x8_blocks[48] + mean_of8x8_blocks[49] + mean_of8x8_blocks[56] + mean_of8x8_blocks[57]) >> 2;
     970           0 :     meanOf16x16Blocks[13] = (mean_of8x8_blocks[50] + mean_of8x8_blocks[51] + mean_of8x8_blocks[58] + mean_of8x8_blocks[59]) >> 2;
     971           0 :     meanOf16x16Blocks[14] = (mean_of8x8_blocks[52] + mean_of8x8_blocks[53] + mean_of8x8_blocks[60] + mean_of8x8_blocks[61]) >> 2;
     972           0 :     meanOf16x16Blocks[15] = (mean_of8x8_blocks[54] + mean_of8x8_blocks[55] + mean_of8x8_blocks[62] + mean_of8x8_blocks[63]) >> 2;
     973             : 
     974           0 :     meanOf16x16SquaredValuesBlocks[0] = (meanOf8x8SquaredValuesBlocks[0] + meanOf8x8SquaredValuesBlocks[1] + meanOf8x8SquaredValuesBlocks[8] + meanOf8x8SquaredValuesBlocks[9]) >> 2;
     975           0 :     meanOf16x16SquaredValuesBlocks[1] = (meanOf8x8SquaredValuesBlocks[2] + meanOf8x8SquaredValuesBlocks[3] + meanOf8x8SquaredValuesBlocks[10] + meanOf8x8SquaredValuesBlocks[11]) >> 2;
     976           0 :     meanOf16x16SquaredValuesBlocks[2] = (meanOf8x8SquaredValuesBlocks[4] + meanOf8x8SquaredValuesBlocks[5] + meanOf8x8SquaredValuesBlocks[12] + meanOf8x8SquaredValuesBlocks[13]) >> 2;
     977           0 :     meanOf16x16SquaredValuesBlocks[3] = (meanOf8x8SquaredValuesBlocks[6] + meanOf8x8SquaredValuesBlocks[7] + meanOf8x8SquaredValuesBlocks[14] + meanOf8x8SquaredValuesBlocks[15]) >> 2;
     978             : 
     979           0 :     meanOf16x16SquaredValuesBlocks[4] = (meanOf8x8SquaredValuesBlocks[16] + meanOf8x8SquaredValuesBlocks[17] + meanOf8x8SquaredValuesBlocks[24] + meanOf8x8SquaredValuesBlocks[25]) >> 2;
     980           0 :     meanOf16x16SquaredValuesBlocks[5] = (meanOf8x8SquaredValuesBlocks[18] + meanOf8x8SquaredValuesBlocks[19] + meanOf8x8SquaredValuesBlocks[26] + meanOf8x8SquaredValuesBlocks[27]) >> 2;
     981           0 :     meanOf16x16SquaredValuesBlocks[6] = (meanOf8x8SquaredValuesBlocks[20] + meanOf8x8SquaredValuesBlocks[21] + meanOf8x8SquaredValuesBlocks[28] + meanOf8x8SquaredValuesBlocks[29]) >> 2;
     982           0 :     meanOf16x16SquaredValuesBlocks[7] = (meanOf8x8SquaredValuesBlocks[22] + meanOf8x8SquaredValuesBlocks[23] + meanOf8x8SquaredValuesBlocks[30] + meanOf8x8SquaredValuesBlocks[31]) >> 2;
     983             : 
     984           0 :     meanOf16x16SquaredValuesBlocks[8] = (meanOf8x8SquaredValuesBlocks[32] + meanOf8x8SquaredValuesBlocks[33] + meanOf8x8SquaredValuesBlocks[40] + meanOf8x8SquaredValuesBlocks[41]) >> 2;
     985           0 :     meanOf16x16SquaredValuesBlocks[9] = (meanOf8x8SquaredValuesBlocks[34] + meanOf8x8SquaredValuesBlocks[35] + meanOf8x8SquaredValuesBlocks[42] + meanOf8x8SquaredValuesBlocks[43]) >> 2;
     986           0 :     meanOf16x16SquaredValuesBlocks[10] = (meanOf8x8SquaredValuesBlocks[36] + meanOf8x8SquaredValuesBlocks[37] + meanOf8x8SquaredValuesBlocks[44] + meanOf8x8SquaredValuesBlocks[45]) >> 2;
     987           0 :     meanOf16x16SquaredValuesBlocks[11] = (meanOf8x8SquaredValuesBlocks[38] + meanOf8x8SquaredValuesBlocks[39] + meanOf8x8SquaredValuesBlocks[46] + meanOf8x8SquaredValuesBlocks[47]) >> 2;
     988             : 
     989           0 :     meanOf16x16SquaredValuesBlocks[12] = (meanOf8x8SquaredValuesBlocks[48] + meanOf8x8SquaredValuesBlocks[49] + meanOf8x8SquaredValuesBlocks[56] + meanOf8x8SquaredValuesBlocks[57]) >> 2;
     990           0 :     meanOf16x16SquaredValuesBlocks[13] = (meanOf8x8SquaredValuesBlocks[50] + meanOf8x8SquaredValuesBlocks[51] + meanOf8x8SquaredValuesBlocks[58] + meanOf8x8SquaredValuesBlocks[59]) >> 2;
     991           0 :     meanOf16x16SquaredValuesBlocks[14] = (meanOf8x8SquaredValuesBlocks[52] + meanOf8x8SquaredValuesBlocks[53] + meanOf8x8SquaredValuesBlocks[60] + meanOf8x8SquaredValuesBlocks[61]) >> 2;
     992           0 :     meanOf16x16SquaredValuesBlocks[15] = (meanOf8x8SquaredValuesBlocks[54] + meanOf8x8SquaredValuesBlocks[55] + meanOf8x8SquaredValuesBlocks[62] + meanOf8x8SquaredValuesBlocks[63]) >> 2;
     993             : 
     994             :     // 32x32
     995           0 :     meanOf32x32Blocks[0] = (meanOf16x16Blocks[0] + meanOf16x16Blocks[1] + meanOf16x16Blocks[4] + meanOf16x16Blocks[5]) >> 2;
     996           0 :     meanOf32x32Blocks[1] = (meanOf16x16Blocks[2] + meanOf16x16Blocks[3] + meanOf16x16Blocks[6] + meanOf16x16Blocks[7]) >> 2;
     997           0 :     meanOf32x32Blocks[2] = (meanOf16x16Blocks[8] + meanOf16x16Blocks[9] + meanOf16x16Blocks[12] + meanOf16x16Blocks[13]) >> 2;
     998           0 :     meanOf32x32Blocks[3] = (meanOf16x16Blocks[10] + meanOf16x16Blocks[11] + meanOf16x16Blocks[14] + meanOf16x16Blocks[15]) >> 2;
     999             : 
    1000           0 :     meanOf32x32SquaredValuesBlocks[0] = (meanOf16x16SquaredValuesBlocks[0] + meanOf16x16SquaredValuesBlocks[1] + meanOf16x16SquaredValuesBlocks[4] + meanOf16x16SquaredValuesBlocks[5]) >> 2;
    1001           0 :     meanOf32x32SquaredValuesBlocks[1] = (meanOf16x16SquaredValuesBlocks[2] + meanOf16x16SquaredValuesBlocks[3] + meanOf16x16SquaredValuesBlocks[6] + meanOf16x16SquaredValuesBlocks[7]) >> 2;
    1002           0 :     meanOf32x32SquaredValuesBlocks[2] = (meanOf16x16SquaredValuesBlocks[8] + meanOf16x16SquaredValuesBlocks[9] + meanOf16x16SquaredValuesBlocks[12] + meanOf16x16SquaredValuesBlocks[13]) >> 2;
    1003           0 :     meanOf32x32SquaredValuesBlocks[3] = (meanOf16x16SquaredValuesBlocks[10] + meanOf16x16SquaredValuesBlocks[11] + meanOf16x16SquaredValuesBlocks[14] + meanOf16x16SquaredValuesBlocks[15]) >> 2;
    1004             : 
    1005           0 :     variance32x32[0] = meanOf32x32SquaredValuesBlocks[0] - (meanOf32x32Blocks[0] * meanOf32x32Blocks[0]);
    1006           0 :     variance32x32[1] = meanOf32x32SquaredValuesBlocks[1] - (meanOf32x32Blocks[1] * meanOf32x32Blocks[1]);
    1007           0 :     variance32x32[2] = meanOf32x32SquaredValuesBlocks[2] - (meanOf32x32Blocks[2] * meanOf32x32Blocks[2]);
    1008           0 :     variance32x32[3] = meanOf32x32SquaredValuesBlocks[3] - (meanOf32x32Blocks[3] * meanOf32x32Blocks[3]);
    1009             : 
    1010             :     // 64x64
    1011           0 :     meanOf64x64Blocks = (meanOf32x32Blocks[0] + meanOf32x32Blocks[1] + meanOf32x32Blocks[2] + meanOf32x32Blocks[3]) >> 2;
    1012           0 :     meanOf64x64SquaredValuesBlocks = (meanOf32x32SquaredValuesBlocks[0] + meanOf32x32SquaredValuesBlocks[1] + meanOf32x32SquaredValuesBlocks[2] + meanOf32x32SquaredValuesBlocks[3]) >> 2;
    1013             : 
    1014           0 :     return (meanOf64x64SquaredValuesBlocks - (meanOf64x64Blocks * meanOf64x64Blocks));
    1015             : }
    1016             : 
    1017           0 : uint8_t  getFilteredTypes(uint8_t  *ptr,
    1018             :     uint32_t  stride,
    1019             :     uint8_t   filterType)
    1020             : {
    1021           0 :     uint8_t *p = ptr - 1 - stride;
    1022             : 
    1023           0 :     uint32_t a = 0;
    1024             : 
    1025           0 :     if (filterType == 0) {
    1026             :         //Luma
    1027           0 :         a = (p[1] +
    1028           0 :             p[0 + stride] + 4 * p[1 + stride] + p[2 + stride] +
    1029           0 :             p[1 + 2 * stride]) / 8;
    1030             :     }
    1031           0 :     else if (filterType == 1) {
    1032           0 :         a = (2 * p[1] +
    1033           0 :             2 * p[0 + stride] + 4 * p[1 + stride] + 2 * p[2 + stride] +
    1034           0 :             2 * p[1 + 2 * stride]);
    1035             : 
    1036           0 :         a = (((uint32_t)((a * 2730) >> 14) + 1) >> 1) & 0xFFFF;
    1037             : 
    1038             :         //fixed point version of a=a/12 to mimic x86 instruction _mm256_mulhrs_epi16;
    1039             :         //a= (a*2730)>>15;
    1040             :     }
    1041           0 :     else if (filterType == 2) {
    1042           0 :         a = (4 * p[1] +
    1043           0 :             4 * p[0 + stride] + 4 * p[1 + stride] + 4 * p[2 + stride] +
    1044           0 :             4 * p[1 + 2 * stride]) / 20;
    1045             :     }
    1046           0 :     else if (filterType == 3) {
    1047           0 :         a = (1 * p[0] + 1 * p[1] + 1 * p[2] +
    1048           0 :             1 * p[0 + stride] + 4 * p[1 + stride] + 1 * p[2 + stride] +
    1049           0 :             1 * p[0 + 2 * stride] + 1 * p[1 + 2 * stride] + 1 * p[2 + 2 * stride]) / 12;
    1050             :     }
    1051           0 :     else if (filterType == 4) {
    1052             :         //gaussian matrix(Chroma)
    1053           0 :         a = (1 * p[0] + 2 * p[1] + 1 * p[2] +
    1054           0 :             2 * p[0 + stride] + 4 * p[1 + stride] + 2 * p[2 + stride] +
    1055           0 :             1 * p[0 + 2 * stride] + 2 * p[1 + 2 * stride] + 1 * p[2 + 2 * stride]) / 16;
    1056             :     }
    1057           0 :     else if (filterType == 5) {
    1058           0 :         a = (2 * p[0] + 2 * p[1] + 2 * p[2] +
    1059           0 :             2 * p[0 + stride] + 4 * p[1 + stride] + 2 * p[2 + stride] +
    1060           0 :             2 * p[0 + 2 * stride] + 2 * p[1 + 2 * stride] + 2 * p[2 + 2 * stride]) / 20;
    1061             :     }
    1062           0 :     else if (filterType == 6) {
    1063           0 :         a = (4 * p[0] + 4 * p[1] + 4 * p[2] +
    1064           0 :             4 * p[0 + stride] + 4 * p[1 + stride] + 4 * p[2 + stride] +
    1065           0 :             4 * p[0 + 2 * stride] + 4 * p[1 + 2 * stride] + 4 * p[2 + 2 * stride]) / 36;
    1066             :     }
    1067             : 
    1068           0 :     return  (uint8_t)CLIP3EQ(0, 255, a);
    1069             : }
    1070             : 
    1071             : /*******************************************
    1072             : * noise_extract_luma_strong
    1073             : *  strong filter Luma.
    1074             : *******************************************/
    1075           0 : void noise_extract_luma_strong_c(
    1076             :     EbPictureBufferDesc       *input_picture_ptr,
    1077             :     EbPictureBufferDesc       *denoised_picture_ptr,
    1078             :     uint32_t                       sb_origin_y
    1079             :     , uint32_t                       sb_origin_x
    1080             : )
    1081             : {
    1082             :     uint32_t  ii, jj;
    1083             :     uint32_t  picHeight, sb_height;
    1084             :     uint32_t  picWidth;
    1085             :     uint32_t  inputOriginIndex;
    1086             :     uint32_t  inputOriginIndexPad;
    1087             : 
    1088             :     uint8_t *ptrIn;
    1089             :     uint32_t stride_in;
    1090             :     uint8_t *ptr_denoised;
    1091             : 
    1092             :     uint32_t strideOut;
    1093           0 :     uint32_t idx = (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width) ? sb_origin_x : 0;
    1094             : 
    1095             :     //Luma
    1096             :     {
    1097           0 :         picHeight = input_picture_ptr->height;
    1098           0 :         picWidth = input_picture_ptr->width;
    1099           0 :         sb_height = MIN(BLOCK_SIZE_64, picHeight - sb_origin_y);
    1100             : 
    1101           0 :         stride_in = input_picture_ptr->stride_y;
    1102           0 :         inputOriginIndex = input_picture_ptr->origin_x + (input_picture_ptr->origin_y + sb_origin_y)* input_picture_ptr->stride_y;
    1103           0 :         ptrIn = &(input_picture_ptr->buffer_y[inputOriginIndex]);
    1104             : 
    1105           0 :         inputOriginIndexPad = denoised_picture_ptr->origin_x + (denoised_picture_ptr->origin_y + sb_origin_y) * denoised_picture_ptr->stride_y;
    1106           0 :         strideOut = denoised_picture_ptr->stride_y;
    1107           0 :         ptr_denoised = &(denoised_picture_ptr->buffer_y[inputOriginIndexPad]);
    1108             : 
    1109           0 :         for (jj = 0; jj < sb_height; jj++) {
    1110           0 :             for (ii = idx; ii < picWidth; ii++) {
    1111           0 :                 if ((jj > 0 || sb_origin_y > 0) && (jj < sb_height - 1 || sb_origin_y + sb_height < picHeight) && ii > 0 && ii < picWidth - 1)
    1112           0 :                     ptr_denoised[ii + jj * strideOut] = getFilteredTypes(&ptrIn[ii + jj * stride_in], stride_in, 4);
    1113             :                 else
    1114           0 :                     ptr_denoised[ii + jj * strideOut] = ptrIn[ii + jj * stride_in];
    1115             :             }
    1116             :         }
    1117             :     }
    1118           0 : }
    1119             : /*******************************************
    1120             : * noise_extract_chroma_strong
    1121             : *  strong filter chroma.
    1122             : *******************************************/
    1123           0 : void noise_extract_chroma_strong_c(
    1124             :     EbPictureBufferDesc       *input_picture_ptr,
    1125             :     EbPictureBufferDesc       *denoised_picture_ptr,
    1126             :     uint32_t                       sb_origin_y
    1127             :     , uint32_t                       sb_origin_x
    1128             : )
    1129             : {
    1130             :     uint32_t  ii, jj;
    1131             :     uint32_t  picHeight, sb_height;
    1132             :     uint32_t  picWidth;
    1133             :     uint32_t  inputOriginIndex;
    1134             :     uint32_t  inputOriginIndexPad;
    1135             : 
    1136             :     uint8_t *ptrIn;
    1137             :     uint32_t stride_in;
    1138             :     uint8_t *ptr_denoised;
    1139             : 
    1140             :     uint32_t strideOut;
    1141           0 :     uint32_t idx = (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width) ? sb_origin_x : 0;
    1142             : 
    1143           0 :     uint32_t color_format = input_picture_ptr->color_format;
    1144           0 :     const uint16_t subsampling_x = (color_format == EB_YUV444 ? 1 : 2) - 1;
    1145           0 :     const uint16_t subsampling_y = (color_format >= EB_YUV422 ? 1 : 2) - 1;
    1146             : 
    1147             :     //Cb
    1148             :     {
    1149           0 :         picHeight = input_picture_ptr->height >> subsampling_y;
    1150           0 :         picWidth = input_picture_ptr->width >> subsampling_x;
    1151           0 :         sb_height = MIN(BLOCK_SIZE_64 >> subsampling_y, picHeight - sb_origin_y);
    1152             : 
    1153           0 :         stride_in = input_picture_ptr->stride_cb;
    1154           0 :         inputOriginIndex = (input_picture_ptr->origin_x >> subsampling_x) + ((input_picture_ptr->origin_y >> subsampling_y) + sb_origin_y) * input_picture_ptr->stride_cb;
    1155           0 :         ptrIn = &(input_picture_ptr->buffer_cb[inputOriginIndex]);
    1156             : 
    1157           0 :         inputOriginIndexPad = (denoised_picture_ptr->origin_x >> subsampling_x) + ((denoised_picture_ptr->origin_y >> subsampling_y) + sb_origin_y) * denoised_picture_ptr->stride_cb;
    1158           0 :         strideOut = denoised_picture_ptr->stride_cb;
    1159           0 :         ptr_denoised = &(denoised_picture_ptr->buffer_cb[inputOriginIndexPad]);
    1160             : 
    1161           0 :         for (jj = 0; jj < sb_height; jj++) {
    1162           0 :             for (ii = idx; ii < picWidth; ii++) {
    1163           0 :                 if ((jj > 0 || sb_origin_y > 0) && (jj < sb_height - 1 || (sb_origin_y + sb_height) < picHeight) && ii > 0 && ii < picWidth - 1)
    1164           0 :                     ptr_denoised[ii + jj * strideOut] = getFilteredTypes(&ptrIn[ii + jj * stride_in], stride_in, 6);
    1165             :                 else
    1166           0 :                     ptr_denoised[ii + jj * strideOut] = ptrIn[ii + jj * stride_in];
    1167             :             }
    1168             :         }
    1169             :     }
    1170             : 
    1171             :     //Cr
    1172             :     {
    1173           0 :         picHeight = input_picture_ptr->height >> subsampling_y;
    1174           0 :         picWidth = input_picture_ptr->width >> subsampling_x;
    1175           0 :         sb_height = MIN(BLOCK_SIZE_64 >> subsampling_y, picHeight - sb_origin_y);
    1176             : 
    1177           0 :         stride_in = input_picture_ptr->stride_cr;
    1178           0 :         inputOriginIndex = (input_picture_ptr->origin_x >> subsampling_x) + ((input_picture_ptr->origin_y >> subsampling_y) + sb_origin_y) * input_picture_ptr->stride_cr;
    1179             : 
    1180           0 :         ptrIn = &(input_picture_ptr->buffer_cr[inputOriginIndex]);
    1181             : 
    1182           0 :         inputOriginIndexPad = (denoised_picture_ptr->origin_x >> subsampling_x) + ((denoised_picture_ptr->origin_y >> subsampling_y) + sb_origin_y) * denoised_picture_ptr->stride_cr;
    1183           0 :         strideOut = denoised_picture_ptr->stride_cr;
    1184           0 :         ptr_denoised = &(denoised_picture_ptr->buffer_cr[inputOriginIndexPad]);
    1185             : 
    1186           0 :         for (jj = 0; jj < sb_height; jj++) {
    1187           0 :             for (ii = idx; ii < picWidth; ii++) {
    1188           0 :                 if ((jj > 0 || sb_origin_y > 0) && (jj < sb_height - 1 || (sb_origin_y + sb_height) < picHeight) && ii > 0 && ii < picWidth - 1)
    1189           0 :                     ptr_denoised[ii + jj * strideOut] = getFilteredTypes(&ptrIn[ii + jj * stride_in], stride_in, 6);
    1190             :                 else
    1191           0 :                     ptr_denoised[ii + jj * strideOut] = ptrIn[ii + jj * stride_in];
    1192             :             }
    1193             :         }
    1194             :     }
    1195           0 : }
    1196             : 
    1197             : /*******************************************
    1198             : * noise_extract_chroma_weak
    1199             : *  weak filter chroma.
    1200             : *******************************************/
    1201           0 : void noise_extract_chroma_weak_c(
    1202             :     EbPictureBufferDesc       *input_picture_ptr,
    1203             :     EbPictureBufferDesc       *denoised_picture_ptr,
    1204             :     uint32_t                       sb_origin_y
    1205             :     , uint32_t                       sb_origin_x
    1206             : )
    1207             : {
    1208             :     uint32_t  ii, jj;
    1209             :     uint32_t  picHeight, sb_height;
    1210             :     uint32_t  picWidth;
    1211             :     uint32_t  inputOriginIndex;
    1212             :     uint32_t  inputOriginIndexPad;
    1213             : 
    1214             :     uint8_t *ptrIn;
    1215             :     uint32_t stride_in;
    1216             :     uint8_t *ptr_denoised;
    1217             : 
    1218             :     uint32_t strideOut;
    1219             : 
    1220           0 :     uint32_t idx = (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width) ? sb_origin_x : 0;
    1221             : 
    1222           0 :     uint32_t color_format = input_picture_ptr->color_format;
    1223           0 :     const uint16_t subsampling_x = (color_format == EB_YUV444 ? 1 : 2) - 1;
    1224           0 :     const uint16_t subsampling_y = (color_format >= EB_YUV422 ? 1 : 2) - 1;
    1225             : 
    1226             :     //Cb
    1227             :     {
    1228           0 :         picHeight = input_picture_ptr->height >> subsampling_y;
    1229           0 :         picWidth = input_picture_ptr->width >> subsampling_x;
    1230           0 :         sb_height = MIN(BLOCK_SIZE_64 >> subsampling_y, picHeight - sb_origin_y);
    1231             : 
    1232           0 :         stride_in = input_picture_ptr->stride_cb;
    1233           0 :         inputOriginIndex = (input_picture_ptr->origin_x >> subsampling_x) + ((input_picture_ptr->origin_y >> subsampling_y) + sb_origin_y) * input_picture_ptr->stride_cb;
    1234             : 
    1235           0 :         ptrIn = &(input_picture_ptr->buffer_cb[inputOriginIndex]);
    1236             : 
    1237           0 :         inputOriginIndexPad = (denoised_picture_ptr->origin_x >> subsampling_x) + ((denoised_picture_ptr->origin_y >> subsampling_y) + sb_origin_y) * denoised_picture_ptr->stride_cb;
    1238             : 
    1239           0 :         strideOut = denoised_picture_ptr->stride_cb;
    1240           0 :         ptr_denoised = &(denoised_picture_ptr->buffer_cb[inputOriginIndexPad]);
    1241             : 
    1242           0 :         for (jj = 0; jj < sb_height; jj++) {
    1243           0 :             for (ii = idx; ii < picWidth; ii++) {
    1244           0 :                 if ((jj > 0 || sb_origin_y > 0) && (jj < sb_height - 1 || (sb_origin_y + sb_height) < picHeight) && ii > 0 && ii < picWidth - 1)
    1245           0 :                     ptr_denoised[ii + jj * strideOut] = getFilteredTypes(&ptrIn[ii + jj * stride_in], stride_in, 4);
    1246             :                 else
    1247           0 :                     ptr_denoised[ii + jj * strideOut] = ptrIn[ii + jj * stride_in];
    1248             :             }
    1249             :         }
    1250             :     }
    1251             : 
    1252             :     //Cr
    1253             :     {
    1254           0 :         picHeight = input_picture_ptr->height >> subsampling_y;
    1255           0 :         picWidth = input_picture_ptr->width >> subsampling_x;
    1256           0 :         sb_height = MIN(BLOCK_SIZE_64 >> subsampling_y, picHeight - sb_origin_y);
    1257             : 
    1258           0 :         stride_in = input_picture_ptr->stride_cr;
    1259           0 :         inputOriginIndex = (input_picture_ptr->origin_x >> subsampling_x) + ((input_picture_ptr->origin_y >> subsampling_y) + sb_origin_y) * input_picture_ptr->stride_cr;
    1260           0 :         ptrIn = &(input_picture_ptr->buffer_cr[inputOriginIndex]);
    1261             : 
    1262           0 :         inputOriginIndexPad = (denoised_picture_ptr->origin_x >> subsampling_x) + ((denoised_picture_ptr->origin_y >> subsampling_y) + sb_origin_y) * denoised_picture_ptr->stride_cr;
    1263           0 :         strideOut = denoised_picture_ptr->stride_cr;
    1264           0 :         ptr_denoised = &(denoised_picture_ptr->buffer_cr[inputOriginIndexPad]);
    1265             : 
    1266           0 :         for (jj = 0; jj < sb_height; jj++) {
    1267           0 :             for (ii = idx; ii < picWidth; ii++) {
    1268           0 :                 if ((jj > 0 || sb_origin_y > 0) && (jj < sb_height - 1 || (sb_origin_y + sb_height) < picHeight) && ii > 0 && ii < picWidth - 1)
    1269           0 :                     ptr_denoised[ii + jj * strideOut] = getFilteredTypes(&ptrIn[ii + jj * stride_in], stride_in, 4);
    1270             :                 else
    1271           0 :                     ptr_denoised[ii + jj * strideOut] = ptrIn[ii + jj * stride_in];
    1272             :             }
    1273             :         }
    1274             :     }
    1275           0 : }
    1276             : 
    1277             : /*******************************************
    1278             : * noise_extract_luma_weak
    1279             : *  weak filter Luma and store noise.
    1280             : *******************************************/
    1281           0 : void noise_extract_luma_weak_c(
    1282             :     EbPictureBufferDesc       *input_picture_ptr,
    1283             :     EbPictureBufferDesc       *denoised_picture_ptr,
    1284             :     EbPictureBufferDesc       *noise_picture_ptr,
    1285             :     uint32_t                       sb_origin_y
    1286             :     , uint32_t                         sb_origin_x
    1287             : )
    1288             : {
    1289             :     uint32_t  ii, jj;
    1290             :     uint32_t  picHeight, sb_height;
    1291             :     uint32_t  picWidth;
    1292             :     uint32_t  inputOriginIndex;
    1293             :     uint32_t  inputOriginIndexPad;
    1294             :     uint32_t  noiseOriginIndex;
    1295             : 
    1296             :     uint8_t *ptrIn;
    1297             :     uint32_t stride_in;
    1298             :     uint8_t *ptr_denoised;
    1299             : 
    1300             :     uint8_t *ptr_noise;
    1301             :     uint32_t strideOut;
    1302             : 
    1303           0 :     uint32_t idx = (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width) ? sb_origin_x : 0;
    1304             : 
    1305             :     //Luma
    1306             :     {
    1307           0 :         picHeight = input_picture_ptr->height;
    1308           0 :         picWidth = input_picture_ptr->width;
    1309           0 :         sb_height = MIN(BLOCK_SIZE_64, picHeight - sb_origin_y);
    1310             : 
    1311           0 :         stride_in = input_picture_ptr->stride_y;
    1312           0 :         inputOriginIndex = input_picture_ptr->origin_x + (input_picture_ptr->origin_y + sb_origin_y) * input_picture_ptr->stride_y;
    1313           0 :         ptrIn = &(input_picture_ptr->buffer_y[inputOriginIndex]);
    1314             : 
    1315           0 :         inputOriginIndexPad = denoised_picture_ptr->origin_x + (denoised_picture_ptr->origin_y + sb_origin_y) * denoised_picture_ptr->stride_y;
    1316           0 :         strideOut = denoised_picture_ptr->stride_y;
    1317           0 :         ptr_denoised = &(denoised_picture_ptr->buffer_y[inputOriginIndexPad]);
    1318             : 
    1319           0 :         noiseOriginIndex = noise_picture_ptr->origin_x + noise_picture_ptr->origin_y * noise_picture_ptr->stride_y;
    1320           0 :         ptr_noise = &(noise_picture_ptr->buffer_y[noiseOriginIndex]);
    1321             : 
    1322           0 :         for (jj = 0; jj < sb_height; jj++) {
    1323           0 :             for (ii = idx; ii < picWidth; ii++) {
    1324           0 :                 if ((jj > 0 || sb_origin_y > 0) && (jj < sb_height - 1 || sb_origin_y + sb_height < picHeight) && ii > 0 && ii < picWidth - 1) {
    1325           0 :                     ptr_denoised[ii + jj * strideOut] = getFilteredTypes(&ptrIn[ii + jj * stride_in], stride_in, 0);
    1326           0 :                     ptr_noise[ii + jj * strideOut] = CLIP3EQ(0, 255, ptrIn[ii + jj * stride_in] - ptr_denoised[ii + jj * strideOut]);
    1327             :                 }
    1328             :                 else {
    1329           0 :                     ptr_denoised[ii + jj * strideOut] = ptrIn[ii + jj * stride_in];
    1330           0 :                     ptr_noise[ii + jj * strideOut] = 0;
    1331             :                 }
    1332             :             }
    1333             :         }
    1334             :     }
    1335           0 : }
    1336             : 
    1337           0 : void noise_extract_luma_weak_lcu_c(
    1338             :     EbPictureBufferDesc       *input_picture_ptr,
    1339             :     EbPictureBufferDesc       *denoised_picture_ptr,
    1340             :     EbPictureBufferDesc       *noise_picture_ptr,
    1341             :     uint32_t                       sb_origin_y
    1342             :     , uint32_t                         sb_origin_x
    1343             : )
    1344             : {
    1345             :     uint32_t  ii, jj;
    1346             :     uint32_t  picHeight, sb_height;
    1347             :     uint32_t  picWidth, sb_width;
    1348             :     uint32_t  inputOriginIndex;
    1349             :     uint32_t  inputOriginIndexPad;
    1350             :     uint32_t  noiseOriginIndex;
    1351             : 
    1352             :     uint8_t *ptrIn;
    1353             :     uint32_t stride_in;
    1354             :     uint8_t *ptr_denoised;
    1355             : 
    1356             :     uint8_t *ptr_noise;
    1357             :     uint32_t strideOut;
    1358             : 
    1359           0 :     uint32_t idx = (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width) ? sb_origin_x : 0;
    1360             : 
    1361             :     //Luma
    1362             :     {
    1363           0 :         picHeight = input_picture_ptr->height;
    1364           0 :         picWidth = input_picture_ptr->width;
    1365           0 :         sb_height = MIN(BLOCK_SIZE_64, picHeight - sb_origin_y);
    1366           0 :         sb_width = MIN(BLOCK_SIZE_64, picWidth - sb_origin_x);
    1367             : 
    1368           0 :         stride_in = input_picture_ptr->stride_y;
    1369           0 :         inputOriginIndex = input_picture_ptr->origin_x + sb_origin_x + (input_picture_ptr->origin_y + sb_origin_y) * input_picture_ptr->stride_y;
    1370           0 :         ptrIn = &(input_picture_ptr->buffer_y[inputOriginIndex]);
    1371             : 
    1372           0 :         inputOriginIndexPad = denoised_picture_ptr->origin_x + sb_origin_x + (denoised_picture_ptr->origin_y + sb_origin_y) * denoised_picture_ptr->stride_y;
    1373           0 :         strideOut = denoised_picture_ptr->stride_y;
    1374           0 :         ptr_denoised = &(denoised_picture_ptr->buffer_y[inputOriginIndexPad]);
    1375             : 
    1376           0 :         noiseOriginIndex = noise_picture_ptr->origin_x + sb_origin_x + noise_picture_ptr->origin_y * noise_picture_ptr->stride_y;
    1377           0 :         ptr_noise = &(noise_picture_ptr->buffer_y[noiseOriginIndex]);
    1378             : 
    1379           0 :         for (jj = 0; jj < sb_height; jj++) {
    1380           0 :             for (ii = idx; ii < sb_width; ii++) {
    1381           0 :                 if ((jj > 0 || sb_origin_y > 0) && (jj < sb_height - 1 || sb_origin_y + sb_height < picHeight) && (ii > 0 || sb_origin_x > 0) && (ii + sb_origin_x) < picWidth - 1/* & ii < sb_width - 1*/) {
    1382           0 :                     ptr_denoised[ii + jj * strideOut] = getFilteredTypes(&ptrIn[ii + jj * stride_in], stride_in, 0);
    1383           0 :                     ptr_noise[ii + jj * strideOut] = CLIP3EQ(0, 255, ptrIn[ii + jj * stride_in] - ptr_denoised[ii + jj * strideOut]);
    1384             :                 }
    1385             :                 else {
    1386           0 :                     ptr_denoised[ii + jj * strideOut] = ptrIn[ii + jj * stride_in];
    1387           0 :                     ptr_noise[ii + jj * strideOut] = 0;
    1388             :                 }
    1389             :             }
    1390             :         }
    1391             :     }
    1392           0 : }
    1393             : 
    1394        1200 : EbErrorType ZeroOutChromaBlockMean(
    1395             :     PictureParentControlSet   *picture_control_set_ptr,          // input parameter, Picture Control Set Ptr
    1396             :     uint32_t                       lcuCodingOrder                // input parameter, SB address
    1397             : )
    1398             : {
    1399        1200 :     EbErrorType return_error = EB_ErrorNone;
    1400             :     // 16x16 mean
    1401        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_0] = 0;
    1402        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_1] = 0;
    1403        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_2] = 0;
    1404        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_3] = 0;
    1405        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_4] = 0;
    1406        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_5] = 0;
    1407        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_6] = 0;
    1408        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_7] = 0;
    1409        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_8] = 0;
    1410        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_9] = 0;
    1411        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_10] = 0;
    1412        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_11] = 0;
    1413        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_12] = 0;
    1414        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_13] = 0;
    1415        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_14] = 0;
    1416        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_15] = 0;
    1417             : 
    1418        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_0] = 0;
    1419        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_1] = 0;
    1420        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_2] = 0;
    1421        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_3] = 0;
    1422        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_4] = 0;
    1423        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_5] = 0;
    1424        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_6] = 0;
    1425        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_7] = 0;
    1426        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_8] = 0;
    1427        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_9] = 0;
    1428        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_10] = 0;
    1429        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_11] = 0;
    1430        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_12] = 0;
    1431        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_13] = 0;
    1432        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_14] = 0;
    1433        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_15] = 0;
    1434             : 
    1435             :     // 32x32 mean
    1436        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_0] = 0;
    1437        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_1] = 0;
    1438        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_2] = 0;
    1439        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_3] = 0;
    1440             : 
    1441        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_0] = 0;
    1442        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_1] = 0;
    1443        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_2] = 0;
    1444        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_3] = 0;
    1445             : 
    1446             :     // 64x64 mean
    1447        1200 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_64x64] = 0;
    1448        1200 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_64x64] = 0;
    1449             : 
    1450        1200 :     return return_error;
    1451             : }
    1452             : /*******************************************
    1453             : * ComputeChromaBlockMean
    1454             : *   computes the chroma block mean for 64x64, 32x32 and 16x16 CUs inside the tree block
    1455             : *******************************************/
    1456        5985 : EbErrorType ComputeChromaBlockMean(
    1457             :     SequenceControlSet        *sequence_control_set_ptr,
    1458             :     PictureParentControlSet   *picture_control_set_ptr,          // input parameter, Picture Control Set Ptr
    1459             :     EbPictureBufferDesc       *input_padded_picture_ptr,         // input parameter, Input Padded Picture
    1460             :     uint32_t                       lcuCodingOrder,                // input parameter, SB address
    1461             :     uint32_t                       inputCbOriginIndex,            // input parameter, SB index, used to point to source/reference samples
    1462             :     uint32_t                       inputCrOriginIndex)            // input parameter, SB index, used to point to source/reference samples
    1463             : {
    1464        5985 :     EbErrorType return_error = EB_ErrorNone;
    1465             : 
    1466             :     uint32_t cbBlockIndex, crBlockIndex;
    1467             : 
    1468             :     uint64_t cbMeanOf16x16Blocks[16];
    1469             :     uint64_t crMeanOf16x16Blocks[16];
    1470             : 
    1471             :     uint64_t cbMeanOf32x32Blocks[4];
    1472             :     uint64_t crMeanOf32x32Blocks[4];
    1473             : 
    1474             :     uint64_t cbMeanOf64x64Blocks;
    1475             :     uint64_t crMeanOf64x64Blocks;
    1476             : 
    1477             :     // (0,0) 16x16 block
    1478        5985 :     cbBlockIndex = inputCbOriginIndex;
    1479        5985 :     crBlockIndex = inputCrOriginIndex;
    1480        5985 :     if (sequence_control_set_ptr->block_mean_calc_prec == BLOCK_MEAN_PREC_FULL) {
    1481           0 :         cbMeanOf16x16Blocks[0] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
    1482           0 :         crMeanOf16x16Blocks[0] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
    1483             : 
    1484             :         // (0,1)
    1485           0 :         cbBlockIndex = cbBlockIndex + 8;
    1486           0 :         crBlockIndex = crBlockIndex + 8;
    1487           0 :         cbMeanOf16x16Blocks[1] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
    1488           0 :         crMeanOf16x16Blocks[1] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
    1489             : 
    1490             :         // (0,2)
    1491           0 :         cbBlockIndex = cbBlockIndex + 8;
    1492           0 :         crBlockIndex = crBlockIndex + 8;
    1493           0 :         cbMeanOf16x16Blocks[2] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
    1494           0 :         crMeanOf16x16Blocks[2] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
    1495             : 
    1496             :         // (0,3)
    1497           0 :         cbBlockIndex = cbBlockIndex + 8;
    1498           0 :         crBlockIndex = crBlockIndex + 8;
    1499           0 :         cbMeanOf16x16Blocks[3] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
    1500           0 :         crMeanOf16x16Blocks[3] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
    1501             : 
    1502             :         // (1,0)
    1503           0 :         cbBlockIndex = inputCbOriginIndex + (input_padded_picture_ptr->stride_cb << 3);
    1504           0 :         crBlockIndex = inputCrOriginIndex + (input_padded_picture_ptr->stride_cr << 3);
    1505           0 :         cbMeanOf16x16Blocks[4] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
    1506           0 :         crMeanOf16x16Blocks[4] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
    1507             : 
    1508             :         // (1,1)
    1509           0 :         cbBlockIndex = cbBlockIndex + 8;
    1510           0 :         crBlockIndex = crBlockIndex + 8;
    1511           0 :         cbMeanOf16x16Blocks[5] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
    1512           0 :         crMeanOf16x16Blocks[5] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
    1513             : 
    1514             :         // (1,2)
    1515           0 :         cbBlockIndex = cbBlockIndex + 8;
    1516           0 :         crBlockIndex = crBlockIndex + 8;
    1517           0 :         cbMeanOf16x16Blocks[6] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
    1518           0 :         crMeanOf16x16Blocks[6] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
    1519             : 
    1520             :         // (1,3)
    1521           0 :         cbBlockIndex = cbBlockIndex + 8;
    1522           0 :         crBlockIndex = crBlockIndex + 8;
    1523           0 :         cbMeanOf16x16Blocks[7] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
    1524           0 :         crMeanOf16x16Blocks[7] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
    1525             : 
    1526             :         // (2,0)
    1527           0 :         cbBlockIndex = inputCbOriginIndex + (input_padded_picture_ptr->stride_cb << 4);
    1528           0 :         crBlockIndex = inputCrOriginIndex + (input_padded_picture_ptr->stride_cr << 4);
    1529           0 :         cbMeanOf16x16Blocks[8] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
    1530           0 :         crMeanOf16x16Blocks[8] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
    1531             : 
    1532             :         // (2,1)
    1533           0 :         cbBlockIndex = cbBlockIndex + 8;
    1534           0 :         crBlockIndex = crBlockIndex + 8;
    1535           0 :         cbMeanOf16x16Blocks[9] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
    1536           0 :         crMeanOf16x16Blocks[9] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
    1537             : 
    1538             :         // (2,2)
    1539           0 :         cbBlockIndex = cbBlockIndex + 8;
    1540           0 :         crBlockIndex = crBlockIndex + 8;
    1541           0 :         cbMeanOf16x16Blocks[10] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
    1542           0 :         crMeanOf16x16Blocks[10] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
    1543             : 
    1544             :         // (2,3)
    1545           0 :         cbBlockIndex = cbBlockIndex + 8;
    1546           0 :         crBlockIndex = crBlockIndex + 8;
    1547           0 :         cbMeanOf16x16Blocks[11] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
    1548           0 :         crMeanOf16x16Blocks[11] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
    1549             : 
    1550             :         // (3,0)
    1551           0 :         cbBlockIndex = inputCbOriginIndex + (input_padded_picture_ptr->stride_cb * 24);
    1552           0 :         crBlockIndex = inputCrOriginIndex + (input_padded_picture_ptr->stride_cr * 24);
    1553           0 :         cbMeanOf16x16Blocks[12] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
    1554           0 :         crMeanOf16x16Blocks[12] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
    1555             : 
    1556             :         // (3,1)
    1557           0 :         cbBlockIndex = cbBlockIndex + 8;
    1558           0 :         crBlockIndex = crBlockIndex + 8;
    1559           0 :         cbMeanOf16x16Blocks[13] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
    1560           0 :         crMeanOf16x16Blocks[13] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
    1561             : 
    1562             :         // (3,2)
    1563           0 :         cbBlockIndex = cbBlockIndex + 8;
    1564           0 :         crBlockIndex = crBlockIndex + 8;
    1565           0 :         cbMeanOf16x16Blocks[14] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
    1566           0 :         crMeanOf16x16Blocks[14] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
    1567             : 
    1568             :         // (3,3)
    1569           0 :         cbBlockIndex = cbBlockIndex + 8;
    1570           0 :         crBlockIndex = crBlockIndex + 8;
    1571           0 :         cbMeanOf16x16Blocks[15] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
    1572           0 :         crMeanOf16x16Blocks[15] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
    1573             :     }
    1574             :     else {
    1575        5985 :         const uint16_t stride_cb = input_padded_picture_ptr->stride_cb;
    1576        5985 :         const uint16_t stride_cr = input_padded_picture_ptr->stride_cr;
    1577             : 
    1578        5985 :         cbMeanOf16x16Blocks[0] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
    1579        5994 :         crMeanOf16x16Blocks[0] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
    1580             : 
    1581             :         // (0,1)
    1582        5997 :         cbBlockIndex = cbBlockIndex + 8;
    1583        5997 :         crBlockIndex = crBlockIndex + 8;
    1584        5997 :         cbMeanOf16x16Blocks[1] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
    1585        5996 :         crMeanOf16x16Blocks[1] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
    1586             : 
    1587             :         // (0,2)
    1588        5997 :         cbBlockIndex = cbBlockIndex + 8;
    1589        5997 :         crBlockIndex = crBlockIndex + 8;
    1590        5997 :         cbMeanOf16x16Blocks[2] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
    1591        5999 :         crMeanOf16x16Blocks[2] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
    1592             : 
    1593             :         // (0,3)
    1594        5997 :         cbBlockIndex = cbBlockIndex + 8;
    1595        5997 :         crBlockIndex = crBlockIndex + 8;
    1596        5997 :         cbMeanOf16x16Blocks[3] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
    1597        5998 :         crMeanOf16x16Blocks[3] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
    1598             : 
    1599             :         // (1,0)
    1600        5998 :         cbBlockIndex = inputCbOriginIndex + (stride_cb << 3);
    1601        5998 :         crBlockIndex = inputCrOriginIndex + (stride_cr << 3);
    1602        5998 :         cbMeanOf16x16Blocks[4] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
    1603        5999 :         crMeanOf16x16Blocks[4] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
    1604             : 
    1605             :         // (1,1)
    1606        5996 :         cbBlockIndex = cbBlockIndex + 8;
    1607        5996 :         crBlockIndex = crBlockIndex + 8;
    1608        5996 :         cbMeanOf16x16Blocks[5] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
    1609        5996 :         crMeanOf16x16Blocks[5] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
    1610             : 
    1611             :         // (1,2)
    1612        5996 :         cbBlockIndex = cbBlockIndex + 8;
    1613        5996 :         crBlockIndex = crBlockIndex + 8;
    1614        5996 :         cbMeanOf16x16Blocks[6] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
    1615        5995 :         crMeanOf16x16Blocks[6] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
    1616             : 
    1617             :         // (1,3)
    1618        5995 :         cbBlockIndex = cbBlockIndex + 8;
    1619        5995 :         crBlockIndex = crBlockIndex + 8;
    1620        5995 :         cbMeanOf16x16Blocks[7] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
    1621        5994 :         crMeanOf16x16Blocks[7] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
    1622             : 
    1623             :         // (2,0)
    1624        5995 :         cbBlockIndex = inputCbOriginIndex + (stride_cb << 4);
    1625        5995 :         crBlockIndex = inputCrOriginIndex + (stride_cr << 4);
    1626        5995 :         cbMeanOf16x16Blocks[8] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
    1627        5996 :         crMeanOf16x16Blocks[8] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
    1628             : 
    1629             :         // (2,1)
    1630        5996 :         cbBlockIndex = cbBlockIndex + 8;
    1631        5996 :         crBlockIndex = crBlockIndex + 8;
    1632        5996 :         cbMeanOf16x16Blocks[9] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
    1633        5995 :         crMeanOf16x16Blocks[9] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
    1634             : 
    1635             :         // (2,2)
    1636        5996 :         cbBlockIndex = cbBlockIndex + 8;
    1637        5996 :         crBlockIndex = crBlockIndex + 8;
    1638        5996 :         cbMeanOf16x16Blocks[10] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
    1639        5998 :         crMeanOf16x16Blocks[10] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
    1640             : 
    1641             :         // (2,3)
    1642        5997 :         cbBlockIndex = cbBlockIndex + 8;
    1643        5997 :         crBlockIndex = crBlockIndex + 8;
    1644        5997 :         cbMeanOf16x16Blocks[11] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
    1645        6000 :         crMeanOf16x16Blocks[11] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
    1646             : 
    1647             :         // (3,0)
    1648        5998 :         cbBlockIndex = inputCbOriginIndex + (stride_cb * 24);
    1649        5998 :         crBlockIndex = inputCrOriginIndex + (stride_cr * 24);
    1650        5998 :         cbMeanOf16x16Blocks[12] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
    1651        5996 :         crMeanOf16x16Blocks[12] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
    1652             : 
    1653             :         // (3,1)
    1654        5998 :         cbBlockIndex = cbBlockIndex + 8;
    1655        5998 :         crBlockIndex = crBlockIndex + 8;
    1656        5998 :         cbMeanOf16x16Blocks[13] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
    1657        5999 :         crMeanOf16x16Blocks[13] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
    1658             : 
    1659             :         // (3,2)
    1660        5997 :         cbBlockIndex = cbBlockIndex + 8;
    1661        5997 :         crBlockIndex = crBlockIndex + 8;
    1662        5997 :         cbMeanOf16x16Blocks[14] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
    1663        5997 :         crMeanOf16x16Blocks[14] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
    1664             : 
    1665             :         // (3,3)
    1666        5998 :         cbBlockIndex = cbBlockIndex + 8;
    1667        5998 :         crBlockIndex = crBlockIndex + 8;
    1668        5998 :         cbMeanOf16x16Blocks[15] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
    1669        5999 :         crMeanOf16x16Blocks[15] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
    1670             :     }
    1671             : 
    1672             :     // 32x32
    1673        5995 :     cbMeanOf32x32Blocks[0] = (cbMeanOf16x16Blocks[0] + cbMeanOf16x16Blocks[1] + cbMeanOf16x16Blocks[4] + cbMeanOf16x16Blocks[5]) >> 2;
    1674        5995 :     crMeanOf32x32Blocks[0] = (crMeanOf16x16Blocks[0] + crMeanOf16x16Blocks[1] + crMeanOf16x16Blocks[4] + crMeanOf16x16Blocks[5]) >> 2;
    1675             : 
    1676        5995 :     cbMeanOf32x32Blocks[1] = (cbMeanOf16x16Blocks[2] + cbMeanOf16x16Blocks[3] + cbMeanOf16x16Blocks[6] + cbMeanOf16x16Blocks[7]) >> 2;
    1677        5995 :     crMeanOf32x32Blocks[1] = (crMeanOf16x16Blocks[2] + crMeanOf16x16Blocks[3] + crMeanOf16x16Blocks[6] + crMeanOf16x16Blocks[7]) >> 2;
    1678             : 
    1679        5995 :     cbMeanOf32x32Blocks[2] = (cbMeanOf16x16Blocks[8] + cbMeanOf16x16Blocks[9] + cbMeanOf16x16Blocks[12] + cbMeanOf16x16Blocks[13]) >> 2;
    1680        5995 :     crMeanOf32x32Blocks[2] = (crMeanOf16x16Blocks[8] + crMeanOf16x16Blocks[9] + crMeanOf16x16Blocks[12] + crMeanOf16x16Blocks[13]) >> 2;
    1681             : 
    1682        5995 :     cbMeanOf32x32Blocks[3] = (cbMeanOf16x16Blocks[10] + cbMeanOf16x16Blocks[11] + cbMeanOf16x16Blocks[14] + cbMeanOf16x16Blocks[15]) >> 2;
    1683        5995 :     crMeanOf32x32Blocks[3] = (crMeanOf16x16Blocks[10] + crMeanOf16x16Blocks[11] + crMeanOf16x16Blocks[14] + crMeanOf16x16Blocks[15]) >> 2;
    1684             : 
    1685             :     // 64x64
    1686        5995 :     cbMeanOf64x64Blocks = (cbMeanOf32x32Blocks[0] + cbMeanOf32x32Blocks[1] + cbMeanOf32x32Blocks[3] + cbMeanOf32x32Blocks[3]) >> 2;
    1687        5995 :     crMeanOf64x64Blocks = (crMeanOf32x32Blocks[0] + crMeanOf32x32Blocks[1] + crMeanOf32x32Blocks[3] + crMeanOf32x32Blocks[3]) >> 2;
    1688             :     // 16x16 mean
    1689        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_0] = (uint8_t)(cbMeanOf16x16Blocks[0] >> MEAN_PRECISION);
    1690        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_1] = (uint8_t)(cbMeanOf16x16Blocks[1] >> MEAN_PRECISION);
    1691        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_2] = (uint8_t)(cbMeanOf16x16Blocks[2] >> MEAN_PRECISION);
    1692        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_3] = (uint8_t)(cbMeanOf16x16Blocks[3] >> MEAN_PRECISION);
    1693        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_4] = (uint8_t)(cbMeanOf16x16Blocks[4] >> MEAN_PRECISION);
    1694        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_5] = (uint8_t)(cbMeanOf16x16Blocks[5] >> MEAN_PRECISION);
    1695        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_6] = (uint8_t)(cbMeanOf16x16Blocks[6] >> MEAN_PRECISION);
    1696        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_7] = (uint8_t)(cbMeanOf16x16Blocks[7] >> MEAN_PRECISION);
    1697        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_8] = (uint8_t)(cbMeanOf16x16Blocks[8] >> MEAN_PRECISION);
    1698        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_9] = (uint8_t)(cbMeanOf16x16Blocks[9] >> MEAN_PRECISION);
    1699        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_10] = (uint8_t)(cbMeanOf16x16Blocks[10] >> MEAN_PRECISION);
    1700        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_11] = (uint8_t)(cbMeanOf16x16Blocks[11] >> MEAN_PRECISION);
    1701        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_12] = (uint8_t)(cbMeanOf16x16Blocks[12] >> MEAN_PRECISION);
    1702        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_13] = (uint8_t)(cbMeanOf16x16Blocks[13] >> MEAN_PRECISION);
    1703        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_14] = (uint8_t)(cbMeanOf16x16Blocks[14] >> MEAN_PRECISION);
    1704        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_15] = (uint8_t)(cbMeanOf16x16Blocks[15] >> MEAN_PRECISION);
    1705             : 
    1706        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_0] = (uint8_t)(crMeanOf16x16Blocks[0] >> MEAN_PRECISION);
    1707        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_1] = (uint8_t)(crMeanOf16x16Blocks[1] >> MEAN_PRECISION);
    1708        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_2] = (uint8_t)(crMeanOf16x16Blocks[2] >> MEAN_PRECISION);
    1709        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_3] = (uint8_t)(crMeanOf16x16Blocks[3] >> MEAN_PRECISION);
    1710        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_4] = (uint8_t)(crMeanOf16x16Blocks[4] >> MEAN_PRECISION);
    1711        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_5] = (uint8_t)(crMeanOf16x16Blocks[5] >> MEAN_PRECISION);
    1712        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_6] = (uint8_t)(crMeanOf16x16Blocks[6] >> MEAN_PRECISION);
    1713        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_7] = (uint8_t)(crMeanOf16x16Blocks[7] >> MEAN_PRECISION);
    1714        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_8] = (uint8_t)(crMeanOf16x16Blocks[8] >> MEAN_PRECISION);
    1715        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_9] = (uint8_t)(crMeanOf16x16Blocks[9] >> MEAN_PRECISION);
    1716        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_10] = (uint8_t)(crMeanOf16x16Blocks[10] >> MEAN_PRECISION);
    1717        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_11] = (uint8_t)(crMeanOf16x16Blocks[11] >> MEAN_PRECISION);
    1718        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_12] = (uint8_t)(crMeanOf16x16Blocks[12] >> MEAN_PRECISION);
    1719        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_13] = (uint8_t)(crMeanOf16x16Blocks[13] >> MEAN_PRECISION);
    1720        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_14] = (uint8_t)(crMeanOf16x16Blocks[14] >> MEAN_PRECISION);
    1721        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_15] = (uint8_t)(crMeanOf16x16Blocks[15] >> MEAN_PRECISION);
    1722             : 
    1723             :     // 32x32 mean
    1724        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_0] = (uint8_t)(cbMeanOf32x32Blocks[0] >> MEAN_PRECISION);
    1725        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_1] = (uint8_t)(cbMeanOf32x32Blocks[1] >> MEAN_PRECISION);
    1726        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_2] = (uint8_t)(cbMeanOf32x32Blocks[2] >> MEAN_PRECISION);
    1727        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_3] = (uint8_t)(cbMeanOf32x32Blocks[3] >> MEAN_PRECISION);
    1728             : 
    1729        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_0] = (uint8_t)(crMeanOf32x32Blocks[0] >> MEAN_PRECISION);
    1730        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_1] = (uint8_t)(crMeanOf32x32Blocks[1] >> MEAN_PRECISION);
    1731        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_2] = (uint8_t)(crMeanOf32x32Blocks[2] >> MEAN_PRECISION);
    1732        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_3] = (uint8_t)(crMeanOf32x32Blocks[3] >> MEAN_PRECISION);
    1733             : 
    1734             :     // 64x64 mean
    1735        5995 :     picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_64x64] = (uint8_t)(cbMeanOf64x64Blocks >> MEAN_PRECISION);
    1736        5995 :     picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_64x64] = (uint8_t)(crMeanOf64x64Blocks >> MEAN_PRECISION);
    1737             : 
    1738        5995 :     return return_error;
    1739             : }
    1740             : 
    1741             : /*******************************************
    1742             : * ComputeBlockMeanComputeVariance
    1743             : *   computes the variance and the block mean of all CUs inside the tree block
    1744             : *******************************************/
    1745        7188 : EbErrorType ComputeBlockMeanComputeVariance(
    1746             :     SequenceControlSet        *sequence_control_set_ptr,
    1747             :     PictureParentControlSet   *picture_control_set_ptr,          // input parameter, Picture Control Set Ptr
    1748             :     EbPictureBufferDesc       *input_padded_picture_ptr,         // input parameter, Input Padded Picture
    1749             :     uint32_t                       sb_index,                // input parameter, SB address
    1750             :     uint32_t                       inputLumaOriginIndex)          // input parameter, SB index, used to point to source/reference samples
    1751             : {
    1752        7188 :     EbErrorType return_error = EB_ErrorNone;
    1753             : 
    1754             :     uint32_t blockIndex;
    1755             : 
    1756             :     uint64_t mean_of8x8_blocks[64];
    1757             :     uint64_t meanOf8x8SquaredValuesBlocks[64];
    1758             : 
    1759             :     uint64_t meanOf16x16Blocks[16];
    1760             :     uint64_t meanOf16x16SquaredValuesBlocks[16];
    1761             : 
    1762             :     uint64_t meanOf32x32Blocks[4];
    1763             :     uint64_t meanOf32x32SquaredValuesBlocks[4];
    1764             : 
    1765             :     uint64_t meanOf64x64Blocks;
    1766             :     uint64_t meanOf64x64SquaredValuesBlocks;
    1767             : 
    1768             :     // (0,0)
    1769        7188 :     blockIndex = inputLumaOriginIndex;
    1770        7188 :     if (sequence_control_set_ptr->block_mean_calc_prec == BLOCK_MEAN_PREC_FULL) {
    1771           0 :         mean_of8x8_blocks[0] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1772           0 :         meanOf8x8SquaredValuesBlocks[0] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1773             : 
    1774             :         // (0,1)
    1775           0 :         blockIndex = blockIndex + 8;
    1776           0 :         mean_of8x8_blocks[1] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1777           0 :         meanOf8x8SquaredValuesBlocks[1] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1778             : 
    1779             :         // (0,2)
    1780           0 :         blockIndex = blockIndex + 8;
    1781           0 :         mean_of8x8_blocks[2] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1782           0 :         meanOf8x8SquaredValuesBlocks[2] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1783             : 
    1784             :         // (0,3)
    1785           0 :         blockIndex = blockIndex + 8;
    1786           0 :         mean_of8x8_blocks[3] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1787           0 :         meanOf8x8SquaredValuesBlocks[3] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1788             : 
    1789             :         // (0,4)
    1790           0 :         blockIndex = blockIndex + 8;
    1791           0 :         mean_of8x8_blocks[4] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1792           0 :         meanOf8x8SquaredValuesBlocks[4] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1793             : 
    1794             :         // (0,5)
    1795           0 :         blockIndex = blockIndex + 8;
    1796           0 :         mean_of8x8_blocks[5] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1797           0 :         meanOf8x8SquaredValuesBlocks[5] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1798             : 
    1799             :         // (0,6)
    1800           0 :         blockIndex = blockIndex + 8;
    1801           0 :         mean_of8x8_blocks[6] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1802           0 :         meanOf8x8SquaredValuesBlocks[6] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1803             : 
    1804             :         // (0,7)
    1805           0 :         blockIndex = blockIndex + 8;
    1806           0 :         mean_of8x8_blocks[7] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1807           0 :         meanOf8x8SquaredValuesBlocks[7] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1808             : 
    1809             :         // (1,0)
    1810           0 :         blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3);
    1811           0 :         mean_of8x8_blocks[8] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1812           0 :         meanOf8x8SquaredValuesBlocks[8] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1813             : 
    1814             :         // (1,1)
    1815           0 :         blockIndex = blockIndex + 8;
    1816           0 :         mean_of8x8_blocks[9] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1817           0 :         meanOf8x8SquaredValuesBlocks[9] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1818             : 
    1819             :         // (1,2)
    1820           0 :         blockIndex = blockIndex + 8;
    1821           0 :         mean_of8x8_blocks[10] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1822           0 :         meanOf8x8SquaredValuesBlocks[10] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1823             : 
    1824             :         // (1,3)
    1825           0 :         blockIndex = blockIndex + 8;
    1826           0 :         mean_of8x8_blocks[11] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1827           0 :         meanOf8x8SquaredValuesBlocks[11] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1828             : 
    1829             :         // (1,4)
    1830           0 :         blockIndex = blockIndex + 8;
    1831           0 :         mean_of8x8_blocks[12] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1832           0 :         meanOf8x8SquaredValuesBlocks[12] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1833             : 
    1834             :         // (1,5)
    1835           0 :         blockIndex = blockIndex + 8;
    1836           0 :         mean_of8x8_blocks[13] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1837           0 :         meanOf8x8SquaredValuesBlocks[13] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1838             : 
    1839             :         // (1,6)
    1840           0 :         blockIndex = blockIndex + 8;
    1841           0 :         mean_of8x8_blocks[14] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1842           0 :         meanOf8x8SquaredValuesBlocks[14] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1843             : 
    1844             :         // (1,7)
    1845           0 :         blockIndex = blockIndex + 8;
    1846           0 :         mean_of8x8_blocks[15] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1847           0 :         meanOf8x8SquaredValuesBlocks[15] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1848             : 
    1849             :         // (2,0)
    1850           0 :         blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 4);
    1851           0 :         mean_of8x8_blocks[16] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1852           0 :         meanOf8x8SquaredValuesBlocks[16] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1853             : 
    1854             :         // (2,1)
    1855           0 :         blockIndex = blockIndex + 8;
    1856           0 :         mean_of8x8_blocks[17] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1857           0 :         meanOf8x8SquaredValuesBlocks[17] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1858             : 
    1859             :         // (2,2)
    1860           0 :         blockIndex = blockIndex + 8;
    1861           0 :         mean_of8x8_blocks[18] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1862           0 :         meanOf8x8SquaredValuesBlocks[18] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1863             : 
    1864             :         // (2,3)
    1865           0 :         blockIndex = blockIndex + 8;
    1866           0 :         mean_of8x8_blocks[19] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1867           0 :         meanOf8x8SquaredValuesBlocks[19] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1868             : 
    1869             :         /// (2,4)
    1870           0 :         blockIndex = blockIndex + 8;
    1871           0 :         mean_of8x8_blocks[20] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1872           0 :         meanOf8x8SquaredValuesBlocks[20] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1873             : 
    1874             :         // (2,5)
    1875           0 :         blockIndex = blockIndex + 8;
    1876           0 :         mean_of8x8_blocks[21] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1877           0 :         meanOf8x8SquaredValuesBlocks[21] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1878             : 
    1879             :         // (2,6)
    1880           0 :         blockIndex = blockIndex + 8;
    1881           0 :         mean_of8x8_blocks[22] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1882           0 :         meanOf8x8SquaredValuesBlocks[22] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1883             : 
    1884             :         // (2,7)
    1885           0 :         blockIndex = blockIndex + 8;
    1886           0 :         mean_of8x8_blocks[23] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1887           0 :         meanOf8x8SquaredValuesBlocks[23] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1888             : 
    1889             :         // (3,0)
    1890           0 :         blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3) + (input_padded_picture_ptr->stride_y << 4);
    1891           0 :         mean_of8x8_blocks[24] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1892           0 :         meanOf8x8SquaredValuesBlocks[24] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1893             : 
    1894             :         // (3,1)
    1895           0 :         blockIndex = blockIndex + 8;
    1896           0 :         mean_of8x8_blocks[25] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1897           0 :         meanOf8x8SquaredValuesBlocks[25] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1898             : 
    1899             :         // (3,2)
    1900           0 :         blockIndex = blockIndex + 8;
    1901           0 :         mean_of8x8_blocks[26] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1902           0 :         meanOf8x8SquaredValuesBlocks[26] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1903             : 
    1904             :         // (3,3)
    1905           0 :         blockIndex = blockIndex + 8;
    1906           0 :         mean_of8x8_blocks[27] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1907           0 :         meanOf8x8SquaredValuesBlocks[27] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1908             : 
    1909             :         // (3,4)
    1910           0 :         blockIndex = blockIndex + 8;
    1911           0 :         mean_of8x8_blocks[28] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1912           0 :         meanOf8x8SquaredValuesBlocks[28] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1913             : 
    1914             :         // (3,5)
    1915           0 :         blockIndex = blockIndex + 8;
    1916           0 :         mean_of8x8_blocks[29] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1917           0 :         meanOf8x8SquaredValuesBlocks[29] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1918             : 
    1919             :         // (3,6)
    1920           0 :         blockIndex = blockIndex + 8;
    1921           0 :         mean_of8x8_blocks[30] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1922           0 :         meanOf8x8SquaredValuesBlocks[30] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1923             : 
    1924             :         // (3,7)
    1925           0 :         blockIndex = blockIndex + 8;
    1926           0 :         mean_of8x8_blocks[31] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1927           0 :         meanOf8x8SquaredValuesBlocks[31] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1928             : 
    1929             :         // (4,0)
    1930           0 :         blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 5);
    1931           0 :         mean_of8x8_blocks[32] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1932           0 :         meanOf8x8SquaredValuesBlocks[32] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1933             : 
    1934             :         // (4,1)
    1935           0 :         blockIndex = blockIndex + 8;
    1936           0 :         mean_of8x8_blocks[33] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1937           0 :         meanOf8x8SquaredValuesBlocks[33] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1938             : 
    1939             :         // (4,2)
    1940           0 :         blockIndex = blockIndex + 8;
    1941           0 :         mean_of8x8_blocks[34] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1942           0 :         meanOf8x8SquaredValuesBlocks[34] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1943             : 
    1944             :         // (4,3)
    1945           0 :         blockIndex = blockIndex + 8;
    1946           0 :         mean_of8x8_blocks[35] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1947           0 :         meanOf8x8SquaredValuesBlocks[35] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1948             : 
    1949             :         // (4,4)
    1950           0 :         blockIndex = blockIndex + 8;
    1951           0 :         mean_of8x8_blocks[36] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1952           0 :         meanOf8x8SquaredValuesBlocks[36] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1953             : 
    1954             :         // (4,5)
    1955           0 :         blockIndex = blockIndex + 8;
    1956           0 :         mean_of8x8_blocks[37] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1957           0 :         meanOf8x8SquaredValuesBlocks[37] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1958             : 
    1959             :         // (4,6)
    1960           0 :         blockIndex = blockIndex + 8;
    1961           0 :         mean_of8x8_blocks[38] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1962           0 :         meanOf8x8SquaredValuesBlocks[38] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1963             : 
    1964             :         // (4,7)
    1965           0 :         blockIndex = blockIndex + 8;
    1966           0 :         mean_of8x8_blocks[39] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1967           0 :         meanOf8x8SquaredValuesBlocks[39] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1968             : 
    1969             :         // (5,0)
    1970           0 :         blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3) + (input_padded_picture_ptr->stride_y << 5);
    1971           0 :         mean_of8x8_blocks[40] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1972           0 :         meanOf8x8SquaredValuesBlocks[40] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1973             : 
    1974             :         // (5,1)
    1975           0 :         blockIndex = blockIndex + 8;
    1976           0 :         mean_of8x8_blocks[41] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1977           0 :         meanOf8x8SquaredValuesBlocks[41] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1978             : 
    1979             :         // (5,2)
    1980           0 :         blockIndex = blockIndex + 8;
    1981           0 :         mean_of8x8_blocks[42] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1982           0 :         meanOf8x8SquaredValuesBlocks[42] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1983             : 
    1984             :         // (5,3)
    1985           0 :         blockIndex = blockIndex + 8;
    1986           0 :         mean_of8x8_blocks[43] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1987           0 :         meanOf8x8SquaredValuesBlocks[43] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1988             : 
    1989             :         // (5,4)
    1990           0 :         blockIndex = blockIndex + 8;
    1991           0 :         mean_of8x8_blocks[44] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1992           0 :         meanOf8x8SquaredValuesBlocks[44] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1993             : 
    1994             :         // (5,5)
    1995           0 :         blockIndex = blockIndex + 8;
    1996           0 :         mean_of8x8_blocks[45] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1997           0 :         meanOf8x8SquaredValuesBlocks[45] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    1998             : 
    1999             :         // (5,6)
    2000           0 :         blockIndex = blockIndex + 8;
    2001           0 :         mean_of8x8_blocks[46] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2002           0 :         meanOf8x8SquaredValuesBlocks[46] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2003             : 
    2004             :         // (5,7)
    2005           0 :         blockIndex = blockIndex + 8;
    2006           0 :         mean_of8x8_blocks[47] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2007           0 :         meanOf8x8SquaredValuesBlocks[47] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2008             : 
    2009             :         // (6,0)
    2010           0 :         blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 4) + (input_padded_picture_ptr->stride_y << 5);
    2011           0 :         mean_of8x8_blocks[48] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2012           0 :         meanOf8x8SquaredValuesBlocks[48] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2013             : 
    2014             :         // (6,1)
    2015           0 :         blockIndex = blockIndex + 8;
    2016           0 :         mean_of8x8_blocks[49] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2017           0 :         meanOf8x8SquaredValuesBlocks[49] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2018             : 
    2019             :         // (6,2)
    2020           0 :         blockIndex = blockIndex + 8;
    2021           0 :         mean_of8x8_blocks[50] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2022           0 :         meanOf8x8SquaredValuesBlocks[50] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2023             : 
    2024             :         // (6,3)
    2025           0 :         blockIndex = blockIndex + 8;
    2026           0 :         mean_of8x8_blocks[51] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2027           0 :         meanOf8x8SquaredValuesBlocks[51] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2028             : 
    2029             :         // (6,4)
    2030           0 :         blockIndex = blockIndex + 8;
    2031           0 :         mean_of8x8_blocks[52] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2032           0 :         meanOf8x8SquaredValuesBlocks[52] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2033             : 
    2034             :         // (6,5)
    2035           0 :         blockIndex = blockIndex + 8;
    2036           0 :         mean_of8x8_blocks[53] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2037           0 :         meanOf8x8SquaredValuesBlocks[53] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2038             : 
    2039             :         // (6,6)
    2040           0 :         blockIndex = blockIndex + 8;
    2041           0 :         mean_of8x8_blocks[54] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2042           0 :         meanOf8x8SquaredValuesBlocks[54] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2043             : 
    2044             :         // (6,7)
    2045           0 :         blockIndex = blockIndex + 8;
    2046           0 :         mean_of8x8_blocks[55] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2047           0 :         meanOf8x8SquaredValuesBlocks[55] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2048             : 
    2049             :         // (7,0)
    2050           0 :         blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3) + (input_padded_picture_ptr->stride_y << 4) + (input_padded_picture_ptr->stride_y << 5);
    2051           0 :         mean_of8x8_blocks[56] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2052           0 :         meanOf8x8SquaredValuesBlocks[56] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2053             : 
    2054             :         // (7,1)
    2055           0 :         blockIndex = blockIndex + 8;
    2056           0 :         mean_of8x8_blocks[57] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2057           0 :         meanOf8x8SquaredValuesBlocks[57] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2058             : 
    2059             :         // (7,2)
    2060           0 :         blockIndex = blockIndex + 8;
    2061           0 :         mean_of8x8_blocks[58] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2062           0 :         meanOf8x8SquaredValuesBlocks[58] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2063             : 
    2064             :         // (7,3)
    2065           0 :         blockIndex = blockIndex + 8;
    2066           0 :         mean_of8x8_blocks[59] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2067           0 :         meanOf8x8SquaredValuesBlocks[59] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2068             : 
    2069             :         // (7,4)
    2070           0 :         blockIndex = blockIndex + 8;
    2071           0 :         mean_of8x8_blocks[60] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2072           0 :         meanOf8x8SquaredValuesBlocks[60] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2073             : 
    2074             :         // (7,5)
    2075           0 :         blockIndex = blockIndex + 8;
    2076           0 :         mean_of8x8_blocks[61] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2077           0 :         meanOf8x8SquaredValuesBlocks[61] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2078             : 
    2079             :         // (7,6)
    2080           0 :         blockIndex = blockIndex + 8;
    2081           0 :         mean_of8x8_blocks[62] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2082           0 :         meanOf8x8SquaredValuesBlocks[62] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2083             : 
    2084             :         // (7,7)
    2085           0 :         blockIndex = blockIndex + 8;
    2086           0 :         mean_of8x8_blocks[63] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2087           0 :         meanOf8x8SquaredValuesBlocks[63] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
    2088             :     }
    2089             :     else {
    2090        7188 :         const uint16_t stride_y = input_padded_picture_ptr->stride_y;
    2091             : 
    2092        7188 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[0], &meanOf8x8SquaredValuesBlocks[0]);
    2093             : 
    2094             :         // (0,1)
    2095        7189 :         blockIndex = blockIndex + 32;
    2096             : 
    2097        7189 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[4], &meanOf8x8SquaredValuesBlocks[4]);
    2098             : 
    2099             :         // (0,5)
    2100        7197 :         blockIndex = blockIndex + 24;
    2101             : 
    2102             :         // (1,0)
    2103        7197 :         blockIndex = inputLumaOriginIndex + (stride_y << 3);
    2104             : 
    2105        7197 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[8], &meanOf8x8SquaredValuesBlocks[8]);
    2106             : 
    2107             :         // (1,1)
    2108        7190 :         blockIndex = blockIndex + 32;
    2109             : 
    2110        7190 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[12], &meanOf8x8SquaredValuesBlocks[12]);
    2111             : 
    2112             :         // (1,5)
    2113        7193 :         blockIndex = blockIndex + 24;
    2114             : 
    2115             :         // (2,0)
    2116        7193 :         blockIndex = inputLumaOriginIndex + (stride_y << 4);
    2117             : 
    2118        7193 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[16], &meanOf8x8SquaredValuesBlocks[16]);
    2119             : 
    2120             :         // (2,1)
    2121        7192 :         blockIndex = blockIndex + 32;
    2122             : 
    2123        7192 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[20], &meanOf8x8SquaredValuesBlocks[20]);
    2124             : 
    2125             :         // (2,5)
    2126        7193 :         blockIndex = blockIndex + 24;
    2127             : 
    2128             :         // (3,0)
    2129        7193 :         blockIndex = inputLumaOriginIndex + (stride_y << 3) + (stride_y << 4);
    2130             : 
    2131        7193 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[24], &meanOf8x8SquaredValuesBlocks[24]);
    2132             : 
    2133             :         // (3,1)
    2134        7195 :         blockIndex = blockIndex + 32;
    2135             : 
    2136        7195 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[28], &meanOf8x8SquaredValuesBlocks[28]);
    2137             : 
    2138             :         // (3,5)
    2139        7197 :         blockIndex = blockIndex + 24;
    2140             : 
    2141             :         // (4,0)
    2142        7197 :         blockIndex = inputLumaOriginIndex + (stride_y << 5);
    2143             : 
    2144        7197 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[32], &meanOf8x8SquaredValuesBlocks[32]);
    2145             : 
    2146             :         // (4,1)
    2147        7195 :         blockIndex = blockIndex + 32;
    2148             : 
    2149        7195 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[36], &meanOf8x8SquaredValuesBlocks[36]);
    2150             : 
    2151             :         // (4,5)
    2152        7195 :         blockIndex = blockIndex + 24;
    2153             : 
    2154             :         // (5,0)
    2155        7195 :         blockIndex = inputLumaOriginIndex + (stride_y << 3) + (stride_y << 5);
    2156             : 
    2157        7195 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[40], &meanOf8x8SquaredValuesBlocks[40]);
    2158             : 
    2159             :         // (5,1)
    2160        7193 :         blockIndex = blockIndex + 32;
    2161             : 
    2162        7193 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[44], &meanOf8x8SquaredValuesBlocks[44]);
    2163             : 
    2164             :         // (5,5)
    2165        7196 :         blockIndex = blockIndex + 24;
    2166             : 
    2167             :         // (6,0)
    2168        7196 :         blockIndex = inputLumaOriginIndex + (stride_y << 4) + (stride_y << 5);
    2169             : 
    2170        7196 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[48], &meanOf8x8SquaredValuesBlocks[48]);
    2171             : 
    2172             :         // (6,1)
    2173        7192 :         blockIndex = blockIndex + 32;
    2174             : 
    2175        7192 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[52], &meanOf8x8SquaredValuesBlocks[52]);
    2176             : 
    2177             :         // (6,5)
    2178        7193 :         blockIndex = blockIndex + 24;
    2179             : 
    2180             :         // (7,0)
    2181        7193 :         blockIndex = inputLumaOriginIndex + (stride_y << 3) + (stride_y << 4) + (stride_y << 5);
    2182             : 
    2183        7193 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[56], &meanOf8x8SquaredValuesBlocks[56]);
    2184             : 
    2185             :         // (7,1)
    2186        7191 :         blockIndex = blockIndex + 32;
    2187             : 
    2188        7191 :         compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[60], &meanOf8x8SquaredValuesBlocks[60]);
    2189             : 
    2190             :     }
    2191             : 
    2192             :     // 16x16
    2193        7190 :     meanOf16x16Blocks[0] = (mean_of8x8_blocks[0] + mean_of8x8_blocks[1] + mean_of8x8_blocks[8] + mean_of8x8_blocks[9]) >> 2;
    2194        7190 :     meanOf16x16Blocks[1] = (mean_of8x8_blocks[2] + mean_of8x8_blocks[3] + mean_of8x8_blocks[10] + mean_of8x8_blocks[11]) >> 2;
    2195        7190 :     meanOf16x16Blocks[2] = (mean_of8x8_blocks[4] + mean_of8x8_blocks[5] + mean_of8x8_blocks[12] + mean_of8x8_blocks[13]) >> 2;
    2196        7190 :     meanOf16x16Blocks[3] = (mean_of8x8_blocks[6] + mean_of8x8_blocks[7] + mean_of8x8_blocks[14] + mean_of8x8_blocks[15]) >> 2;
    2197             : 
    2198        7190 :     meanOf16x16Blocks[4] = (mean_of8x8_blocks[16] + mean_of8x8_blocks[17] + mean_of8x8_blocks[24] + mean_of8x8_blocks[25]) >> 2;
    2199        7190 :     meanOf16x16Blocks[5] = (mean_of8x8_blocks[18] + mean_of8x8_blocks[19] + mean_of8x8_blocks[26] + mean_of8x8_blocks[27]) >> 2;
    2200        7190 :     meanOf16x16Blocks[6] = (mean_of8x8_blocks[20] + mean_of8x8_blocks[21] + mean_of8x8_blocks[28] + mean_of8x8_blocks[29]) >> 2;
    2201        7190 :     meanOf16x16Blocks[7] = (mean_of8x8_blocks[22] + mean_of8x8_blocks[23] + mean_of8x8_blocks[30] + mean_of8x8_blocks[31]) >> 2;
    2202             : 
    2203        7190 :     meanOf16x16Blocks[8] = (mean_of8x8_blocks[32] + mean_of8x8_blocks[33] + mean_of8x8_blocks[40] + mean_of8x8_blocks[41]) >> 2;
    2204        7190 :     meanOf16x16Blocks[9] = (mean_of8x8_blocks[34] + mean_of8x8_blocks[35] + mean_of8x8_blocks[42] + mean_of8x8_blocks[43]) >> 2;
    2205        7190 :     meanOf16x16Blocks[10] = (mean_of8x8_blocks[36] + mean_of8x8_blocks[37] + mean_of8x8_blocks[44] + mean_of8x8_blocks[45]) >> 2;
    2206        7190 :     meanOf16x16Blocks[11] = (mean_of8x8_blocks[38] + mean_of8x8_blocks[39] + mean_of8x8_blocks[46] + mean_of8x8_blocks[47]) >> 2;
    2207             : 
    2208        7190 :     meanOf16x16Blocks[12] = (mean_of8x8_blocks[48] + mean_of8x8_blocks[49] + mean_of8x8_blocks[56] + mean_of8x8_blocks[57]) >> 2;
    2209        7190 :     meanOf16x16Blocks[13] = (mean_of8x8_blocks[50] + mean_of8x8_blocks[51] + mean_of8x8_blocks[58] + mean_of8x8_blocks[59]) >> 2;
    2210        7190 :     meanOf16x16Blocks[14] = (mean_of8x8_blocks[52] + mean_of8x8_blocks[53] + mean_of8x8_blocks[60] + mean_of8x8_blocks[61]) >> 2;
    2211        7190 :     meanOf16x16Blocks[15] = (mean_of8x8_blocks[54] + mean_of8x8_blocks[55] + mean_of8x8_blocks[62] + mean_of8x8_blocks[63]) >> 2;
    2212             : 
    2213        7190 :     meanOf16x16SquaredValuesBlocks[0] = (meanOf8x8SquaredValuesBlocks[0] + meanOf8x8SquaredValuesBlocks[1] + meanOf8x8SquaredValuesBlocks[8] + meanOf8x8SquaredValuesBlocks[9]) >> 2;
    2214        7190 :     meanOf16x16SquaredValuesBlocks[1] = (meanOf8x8SquaredValuesBlocks[2] + meanOf8x8SquaredValuesBlocks[3] + meanOf8x8SquaredValuesBlocks[10] + meanOf8x8SquaredValuesBlocks[11]) >> 2;
    2215        7190 :     meanOf16x16SquaredValuesBlocks[2] = (meanOf8x8SquaredValuesBlocks[4] + meanOf8x8SquaredValuesBlocks[5] + meanOf8x8SquaredValuesBlocks[12] + meanOf8x8SquaredValuesBlocks[13]) >> 2;
    2216        7190 :     meanOf16x16SquaredValuesBlocks[3] = (meanOf8x8SquaredValuesBlocks[6] + meanOf8x8SquaredValuesBlocks[7] + meanOf8x8SquaredValuesBlocks[14] + meanOf8x8SquaredValuesBlocks[15]) >> 2;
    2217             : 
    2218        7190 :     meanOf16x16SquaredValuesBlocks[4] = (meanOf8x8SquaredValuesBlocks[16] + meanOf8x8SquaredValuesBlocks[17] + meanOf8x8SquaredValuesBlocks[24] + meanOf8x8SquaredValuesBlocks[25]) >> 2;
    2219        7190 :     meanOf16x16SquaredValuesBlocks[5] = (meanOf8x8SquaredValuesBlocks[18] + meanOf8x8SquaredValuesBlocks[19] + meanOf8x8SquaredValuesBlocks[26] + meanOf8x8SquaredValuesBlocks[27]) >> 2;
    2220        7190 :     meanOf16x16SquaredValuesBlocks[6] = (meanOf8x8SquaredValuesBlocks[20] + meanOf8x8SquaredValuesBlocks[21] + meanOf8x8SquaredValuesBlocks[28] + meanOf8x8SquaredValuesBlocks[29]) >> 2;
    2221        7190 :     meanOf16x16SquaredValuesBlocks[7] = (meanOf8x8SquaredValuesBlocks[22] + meanOf8x8SquaredValuesBlocks[23] + meanOf8x8SquaredValuesBlocks[30] + meanOf8x8SquaredValuesBlocks[31]) >> 2;
    2222             : 
    2223        7190 :     meanOf16x16SquaredValuesBlocks[8] = (meanOf8x8SquaredValuesBlocks[32] + meanOf8x8SquaredValuesBlocks[33] + meanOf8x8SquaredValuesBlocks[40] + meanOf8x8SquaredValuesBlocks[41]) >> 2;
    2224        7190 :     meanOf16x16SquaredValuesBlocks[9] = (meanOf8x8SquaredValuesBlocks[34] + meanOf8x8SquaredValuesBlocks[35] + meanOf8x8SquaredValuesBlocks[42] + meanOf8x8SquaredValuesBlocks[43]) >> 2;
    2225        7190 :     meanOf16x16SquaredValuesBlocks[10] = (meanOf8x8SquaredValuesBlocks[36] + meanOf8x8SquaredValuesBlocks[37] + meanOf8x8SquaredValuesBlocks[44] + meanOf8x8SquaredValuesBlocks[45]) >> 2;
    2226        7190 :     meanOf16x16SquaredValuesBlocks[11] = (meanOf8x8SquaredValuesBlocks[38] + meanOf8x8SquaredValuesBlocks[39] + meanOf8x8SquaredValuesBlocks[46] + meanOf8x8SquaredValuesBlocks[47]) >> 2;
    2227             : 
    2228        7190 :     meanOf16x16SquaredValuesBlocks[12] = (meanOf8x8SquaredValuesBlocks[48] + meanOf8x8SquaredValuesBlocks[49] + meanOf8x8SquaredValuesBlocks[56] + meanOf8x8SquaredValuesBlocks[57]) >> 2;
    2229        7190 :     meanOf16x16SquaredValuesBlocks[13] = (meanOf8x8SquaredValuesBlocks[50] + meanOf8x8SquaredValuesBlocks[51] + meanOf8x8SquaredValuesBlocks[58] + meanOf8x8SquaredValuesBlocks[59]) >> 2;
    2230        7190 :     meanOf16x16SquaredValuesBlocks[14] = (meanOf8x8SquaredValuesBlocks[52] + meanOf8x8SquaredValuesBlocks[53] + meanOf8x8SquaredValuesBlocks[60] + meanOf8x8SquaredValuesBlocks[61]) >> 2;
    2231        7190 :     meanOf16x16SquaredValuesBlocks[15] = (meanOf8x8SquaredValuesBlocks[54] + meanOf8x8SquaredValuesBlocks[55] + meanOf8x8SquaredValuesBlocks[62] + meanOf8x8SquaredValuesBlocks[63]) >> 2;
    2232             : 
    2233             :     // 32x32
    2234        7190 :     meanOf32x32Blocks[0] = (meanOf16x16Blocks[0] + meanOf16x16Blocks[1] + meanOf16x16Blocks[4] + meanOf16x16Blocks[5]) >> 2;
    2235        7190 :     meanOf32x32Blocks[1] = (meanOf16x16Blocks[2] + meanOf16x16Blocks[3] + meanOf16x16Blocks[6] + meanOf16x16Blocks[7]) >> 2;
    2236        7190 :     meanOf32x32Blocks[2] = (meanOf16x16Blocks[8] + meanOf16x16Blocks[9] + meanOf16x16Blocks[12] + meanOf16x16Blocks[13]) >> 2;
    2237        7190 :     meanOf32x32Blocks[3] = (meanOf16x16Blocks[10] + meanOf16x16Blocks[11] + meanOf16x16Blocks[14] + meanOf16x16Blocks[15]) >> 2;
    2238             : 
    2239        7190 :     meanOf32x32SquaredValuesBlocks[0] = (meanOf16x16SquaredValuesBlocks[0] + meanOf16x16SquaredValuesBlocks[1] + meanOf16x16SquaredValuesBlocks[4] + meanOf16x16SquaredValuesBlocks[5]) >> 2;
    2240        7190 :     meanOf32x32SquaredValuesBlocks[1] = (meanOf16x16SquaredValuesBlocks[2] + meanOf16x16SquaredValuesBlocks[3] + meanOf16x16SquaredValuesBlocks[6] + meanOf16x16SquaredValuesBlocks[7]) >> 2;
    2241        7190 :     meanOf32x32SquaredValuesBlocks[2] = (meanOf16x16SquaredValuesBlocks[8] + meanOf16x16SquaredValuesBlocks[9] + meanOf16x16SquaredValuesBlocks[12] + meanOf16x16SquaredValuesBlocks[13]) >> 2;
    2242        7190 :     meanOf32x32SquaredValuesBlocks[3] = (meanOf16x16SquaredValuesBlocks[10] + meanOf16x16SquaredValuesBlocks[11] + meanOf16x16SquaredValuesBlocks[14] + meanOf16x16SquaredValuesBlocks[15]) >> 2;
    2243             : 
    2244             :     // 64x64
    2245        7190 :     meanOf64x64Blocks = (meanOf32x32Blocks[0] + meanOf32x32Blocks[1] + meanOf32x32Blocks[2] + meanOf32x32Blocks[3]) >> 2;
    2246        7190 :     meanOf64x64SquaredValuesBlocks = (meanOf32x32SquaredValuesBlocks[0] + meanOf32x32SquaredValuesBlocks[1] + meanOf32x32SquaredValuesBlocks[2] + meanOf32x32SquaredValuesBlocks[3]) >> 2;
    2247             : 
    2248             :     // 8x8 means
    2249        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_0] = (uint8_t)(mean_of8x8_blocks[0] >> MEAN_PRECISION);
    2250        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_1] = (uint8_t)(mean_of8x8_blocks[1] >> MEAN_PRECISION);
    2251        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_2] = (uint8_t)(mean_of8x8_blocks[2] >> MEAN_PRECISION);
    2252        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_3] = (uint8_t)(mean_of8x8_blocks[3] >> MEAN_PRECISION);
    2253        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_4] = (uint8_t)(mean_of8x8_blocks[4] >> MEAN_PRECISION);
    2254        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_5] = (uint8_t)(mean_of8x8_blocks[5] >> MEAN_PRECISION);
    2255        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_6] = (uint8_t)(mean_of8x8_blocks[6] >> MEAN_PRECISION);
    2256        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_7] = (uint8_t)(mean_of8x8_blocks[7] >> MEAN_PRECISION);
    2257        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_8] = (uint8_t)(mean_of8x8_blocks[8] >> MEAN_PRECISION);
    2258        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_9] = (uint8_t)(mean_of8x8_blocks[9] >> MEAN_PRECISION);
    2259        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_10] = (uint8_t)(mean_of8x8_blocks[10] >> MEAN_PRECISION);
    2260        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_11] = (uint8_t)(mean_of8x8_blocks[11] >> MEAN_PRECISION);
    2261        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_12] = (uint8_t)(mean_of8x8_blocks[12] >> MEAN_PRECISION);
    2262        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_13] = (uint8_t)(mean_of8x8_blocks[13] >> MEAN_PRECISION);
    2263        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_14] = (uint8_t)(mean_of8x8_blocks[14] >> MEAN_PRECISION);
    2264        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_15] = (uint8_t)(mean_of8x8_blocks[15] >> MEAN_PRECISION);
    2265        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_16] = (uint8_t)(mean_of8x8_blocks[16] >> MEAN_PRECISION);
    2266        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_17] = (uint8_t)(mean_of8x8_blocks[17] >> MEAN_PRECISION);
    2267        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_18] = (uint8_t)(mean_of8x8_blocks[18] >> MEAN_PRECISION);
    2268        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_19] = (uint8_t)(mean_of8x8_blocks[19] >> MEAN_PRECISION);
    2269        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_20] = (uint8_t)(mean_of8x8_blocks[20] >> MEAN_PRECISION);
    2270        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_21] = (uint8_t)(mean_of8x8_blocks[21] >> MEAN_PRECISION);
    2271        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_22] = (uint8_t)(mean_of8x8_blocks[22] >> MEAN_PRECISION);
    2272        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_23] = (uint8_t)(mean_of8x8_blocks[23] >> MEAN_PRECISION);
    2273        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_24] = (uint8_t)(mean_of8x8_blocks[24] >> MEAN_PRECISION);
    2274        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_25] = (uint8_t)(mean_of8x8_blocks[25] >> MEAN_PRECISION);
    2275        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_26] = (uint8_t)(mean_of8x8_blocks[26] >> MEAN_PRECISION);
    2276        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_27] = (uint8_t)(mean_of8x8_blocks[27] >> MEAN_PRECISION);
    2277        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_28] = (uint8_t)(mean_of8x8_blocks[28] >> MEAN_PRECISION);
    2278        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_29] = (uint8_t)(mean_of8x8_blocks[29] >> MEAN_PRECISION);
    2279        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_30] = (uint8_t)(mean_of8x8_blocks[30] >> MEAN_PRECISION);
    2280        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_31] = (uint8_t)(mean_of8x8_blocks[31] >> MEAN_PRECISION);
    2281        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_32] = (uint8_t)(mean_of8x8_blocks[32] >> MEAN_PRECISION);
    2282        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_33] = (uint8_t)(mean_of8x8_blocks[33] >> MEAN_PRECISION);
    2283        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_34] = (uint8_t)(mean_of8x8_blocks[34] >> MEAN_PRECISION);
    2284        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_35] = (uint8_t)(mean_of8x8_blocks[35] >> MEAN_PRECISION);
    2285        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_36] = (uint8_t)(mean_of8x8_blocks[36] >> MEAN_PRECISION);
    2286        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_37] = (uint8_t)(mean_of8x8_blocks[37] >> MEAN_PRECISION);
    2287        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_38] = (uint8_t)(mean_of8x8_blocks[38] >> MEAN_PRECISION);
    2288        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_39] = (uint8_t)(mean_of8x8_blocks[39] >> MEAN_PRECISION);
    2289        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_40] = (uint8_t)(mean_of8x8_blocks[40] >> MEAN_PRECISION);
    2290        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_41] = (uint8_t)(mean_of8x8_blocks[41] >> MEAN_PRECISION);
    2291        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_42] = (uint8_t)(mean_of8x8_blocks[42] >> MEAN_PRECISION);
    2292        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_43] = (uint8_t)(mean_of8x8_blocks[43] >> MEAN_PRECISION);
    2293        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_44] = (uint8_t)(mean_of8x8_blocks[44] >> MEAN_PRECISION);
    2294        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_45] = (uint8_t)(mean_of8x8_blocks[45] >> MEAN_PRECISION);
    2295        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_46] = (uint8_t)(mean_of8x8_blocks[46] >> MEAN_PRECISION);
    2296        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_47] = (uint8_t)(mean_of8x8_blocks[47] >> MEAN_PRECISION);
    2297        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_48] = (uint8_t)(mean_of8x8_blocks[48] >> MEAN_PRECISION);
    2298        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_49] = (uint8_t)(mean_of8x8_blocks[49] >> MEAN_PRECISION);
    2299        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_50] = (uint8_t)(mean_of8x8_blocks[50] >> MEAN_PRECISION);
    2300        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_51] = (uint8_t)(mean_of8x8_blocks[51] >> MEAN_PRECISION);
    2301        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_52] = (uint8_t)(mean_of8x8_blocks[52] >> MEAN_PRECISION);
    2302        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_53] = (uint8_t)(mean_of8x8_blocks[53] >> MEAN_PRECISION);
    2303        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_54] = (uint8_t)(mean_of8x8_blocks[54] >> MEAN_PRECISION);
    2304        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_55] = (uint8_t)(mean_of8x8_blocks[55] >> MEAN_PRECISION);
    2305        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_56] = (uint8_t)(mean_of8x8_blocks[56] >> MEAN_PRECISION);
    2306        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_57] = (uint8_t)(mean_of8x8_blocks[57] >> MEAN_PRECISION);
    2307        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_58] = (uint8_t)(mean_of8x8_blocks[58] >> MEAN_PRECISION);
    2308        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_59] = (uint8_t)(mean_of8x8_blocks[59] >> MEAN_PRECISION);
    2309        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_60] = (uint8_t)(mean_of8x8_blocks[60] >> MEAN_PRECISION);
    2310        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_61] = (uint8_t)(mean_of8x8_blocks[61] >> MEAN_PRECISION);
    2311        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_62] = (uint8_t)(mean_of8x8_blocks[62] >> MEAN_PRECISION);
    2312        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_63] = (uint8_t)(mean_of8x8_blocks[63] >> MEAN_PRECISION);
    2313             : 
    2314             :     // 16x16 mean
    2315        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_0] = (uint8_t)(meanOf16x16Blocks[0] >> MEAN_PRECISION);
    2316        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_1] = (uint8_t)(meanOf16x16Blocks[1] >> MEAN_PRECISION);
    2317        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_2] = (uint8_t)(meanOf16x16Blocks[2] >> MEAN_PRECISION);
    2318        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_3] = (uint8_t)(meanOf16x16Blocks[3] >> MEAN_PRECISION);
    2319        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_4] = (uint8_t)(meanOf16x16Blocks[4] >> MEAN_PRECISION);
    2320        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_5] = (uint8_t)(meanOf16x16Blocks[5] >> MEAN_PRECISION);
    2321        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_6] = (uint8_t)(meanOf16x16Blocks[6] >> MEAN_PRECISION);
    2322        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_7] = (uint8_t)(meanOf16x16Blocks[7] >> MEAN_PRECISION);
    2323        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_8] = (uint8_t)(meanOf16x16Blocks[8] >> MEAN_PRECISION);
    2324        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_9] = (uint8_t)(meanOf16x16Blocks[9] >> MEAN_PRECISION);
    2325        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_10] = (uint8_t)(meanOf16x16Blocks[10] >> MEAN_PRECISION);
    2326        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_11] = (uint8_t)(meanOf16x16Blocks[11] >> MEAN_PRECISION);
    2327        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_12] = (uint8_t)(meanOf16x16Blocks[12] >> MEAN_PRECISION);
    2328        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_13] = (uint8_t)(meanOf16x16Blocks[13] >> MEAN_PRECISION);
    2329        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_14] = (uint8_t)(meanOf16x16Blocks[14] >> MEAN_PRECISION);
    2330        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_15] = (uint8_t)(meanOf16x16Blocks[15] >> MEAN_PRECISION);
    2331             : 
    2332             :     // 32x32 mean
    2333        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_32x32_0] = (uint8_t)(meanOf32x32Blocks[0] >> MEAN_PRECISION);
    2334        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_32x32_1] = (uint8_t)(meanOf32x32Blocks[1] >> MEAN_PRECISION);
    2335        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_32x32_2] = (uint8_t)(meanOf32x32Blocks[2] >> MEAN_PRECISION);
    2336        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_32x32_3] = (uint8_t)(meanOf32x32Blocks[3] >> MEAN_PRECISION);
    2337             : 
    2338             :     // 64x64 mean
    2339        7190 :     picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_64x64] = (uint8_t)(meanOf64x64Blocks >> MEAN_PRECISION);
    2340             : 
    2341             :     // 8x8 variances
    2342        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_0] = (uint16_t)((meanOf8x8SquaredValuesBlocks[0] - (mean_of8x8_blocks[0] * mean_of8x8_blocks[0])) >> VARIANCE_PRECISION);
    2343        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_1] = (uint16_t)((meanOf8x8SquaredValuesBlocks[1] - (mean_of8x8_blocks[1] * mean_of8x8_blocks[1])) >> VARIANCE_PRECISION);
    2344        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_2] = (uint16_t)((meanOf8x8SquaredValuesBlocks[2] - (mean_of8x8_blocks[2] * mean_of8x8_blocks[2])) >> VARIANCE_PRECISION);
    2345        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_3] = (uint16_t)((meanOf8x8SquaredValuesBlocks[3] - (mean_of8x8_blocks[3] * mean_of8x8_blocks[3])) >> VARIANCE_PRECISION);
    2346        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_4] = (uint16_t)((meanOf8x8SquaredValuesBlocks[4] - (mean_of8x8_blocks[4] * mean_of8x8_blocks[4])) >> VARIANCE_PRECISION);
    2347        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_5] = (uint16_t)((meanOf8x8SquaredValuesBlocks[5] - (mean_of8x8_blocks[5] * mean_of8x8_blocks[5])) >> VARIANCE_PRECISION);
    2348        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_6] = (uint16_t)((meanOf8x8SquaredValuesBlocks[6] - (mean_of8x8_blocks[6] * mean_of8x8_blocks[6])) >> VARIANCE_PRECISION);
    2349        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_7] = (uint16_t)((meanOf8x8SquaredValuesBlocks[7] - (mean_of8x8_blocks[7] * mean_of8x8_blocks[7])) >> VARIANCE_PRECISION);
    2350        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_8] = (uint16_t)((meanOf8x8SquaredValuesBlocks[8] - (mean_of8x8_blocks[8] * mean_of8x8_blocks[8])) >> VARIANCE_PRECISION);
    2351        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_9] = (uint16_t)((meanOf8x8SquaredValuesBlocks[9] - (mean_of8x8_blocks[9] * mean_of8x8_blocks[9])) >> VARIANCE_PRECISION);
    2352        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_10] = (uint16_t)((meanOf8x8SquaredValuesBlocks[10] - (mean_of8x8_blocks[10] * mean_of8x8_blocks[10])) >> VARIANCE_PRECISION);
    2353        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_11] = (uint16_t)((meanOf8x8SquaredValuesBlocks[11] - (mean_of8x8_blocks[11] * mean_of8x8_blocks[11])) >> VARIANCE_PRECISION);
    2354        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_12] = (uint16_t)((meanOf8x8SquaredValuesBlocks[12] - (mean_of8x8_blocks[12] * mean_of8x8_blocks[12])) >> VARIANCE_PRECISION);
    2355        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_13] = (uint16_t)((meanOf8x8SquaredValuesBlocks[13] - (mean_of8x8_blocks[13] * mean_of8x8_blocks[13])) >> VARIANCE_PRECISION);
    2356        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_14] = (uint16_t)((meanOf8x8SquaredValuesBlocks[14] - (mean_of8x8_blocks[14] * mean_of8x8_blocks[14])) >> VARIANCE_PRECISION);
    2357        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_15] = (uint16_t)((meanOf8x8SquaredValuesBlocks[15] - (mean_of8x8_blocks[15] * mean_of8x8_blocks[15])) >> VARIANCE_PRECISION);
    2358        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_16] = (uint16_t)((meanOf8x8SquaredValuesBlocks[16] - (mean_of8x8_blocks[16] * mean_of8x8_blocks[16])) >> VARIANCE_PRECISION);
    2359        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_17] = (uint16_t)((meanOf8x8SquaredValuesBlocks[17] - (mean_of8x8_blocks[17] * mean_of8x8_blocks[17])) >> VARIANCE_PRECISION);
    2360        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_18] = (uint16_t)((meanOf8x8SquaredValuesBlocks[18] - (mean_of8x8_blocks[18] * mean_of8x8_blocks[18])) >> VARIANCE_PRECISION);
    2361        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_19] = (uint16_t)((meanOf8x8SquaredValuesBlocks[19] - (mean_of8x8_blocks[19] * mean_of8x8_blocks[19])) >> VARIANCE_PRECISION);
    2362        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_20] = (uint16_t)((meanOf8x8SquaredValuesBlocks[20] - (mean_of8x8_blocks[20] * mean_of8x8_blocks[20])) >> VARIANCE_PRECISION);
    2363        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_21] = (uint16_t)((meanOf8x8SquaredValuesBlocks[21] - (mean_of8x8_blocks[21] * mean_of8x8_blocks[21])) >> VARIANCE_PRECISION);
    2364        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_22] = (uint16_t)((meanOf8x8SquaredValuesBlocks[22] - (mean_of8x8_blocks[22] * mean_of8x8_blocks[22])) >> VARIANCE_PRECISION);
    2365        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_23] = (uint16_t)((meanOf8x8SquaredValuesBlocks[23] - (mean_of8x8_blocks[23] * mean_of8x8_blocks[23])) >> VARIANCE_PRECISION);
    2366        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_24] = (uint16_t)((meanOf8x8SquaredValuesBlocks[24] - (mean_of8x8_blocks[24] * mean_of8x8_blocks[24])) >> VARIANCE_PRECISION);
    2367        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_25] = (uint16_t)((meanOf8x8SquaredValuesBlocks[25] - (mean_of8x8_blocks[25] * mean_of8x8_blocks[25])) >> VARIANCE_PRECISION);
    2368        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_26] = (uint16_t)((meanOf8x8SquaredValuesBlocks[26] - (mean_of8x8_blocks[26] * mean_of8x8_blocks[26])) >> VARIANCE_PRECISION);
    2369        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_27] = (uint16_t)((meanOf8x8SquaredValuesBlocks[27] - (mean_of8x8_blocks[27] * mean_of8x8_blocks[27])) >> VARIANCE_PRECISION);
    2370        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_28] = (uint16_t)((meanOf8x8SquaredValuesBlocks[28] - (mean_of8x8_blocks[28] * mean_of8x8_blocks[28])) >> VARIANCE_PRECISION);
    2371        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_29] = (uint16_t)((meanOf8x8SquaredValuesBlocks[29] - (mean_of8x8_blocks[29] * mean_of8x8_blocks[29])) >> VARIANCE_PRECISION);
    2372        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_30] = (uint16_t)((meanOf8x8SquaredValuesBlocks[30] - (mean_of8x8_blocks[30] * mean_of8x8_blocks[30])) >> VARIANCE_PRECISION);
    2373        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_31] = (uint16_t)((meanOf8x8SquaredValuesBlocks[31] - (mean_of8x8_blocks[31] * mean_of8x8_blocks[31])) >> VARIANCE_PRECISION);
    2374        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_32] = (uint16_t)((meanOf8x8SquaredValuesBlocks[32] - (mean_of8x8_blocks[32] * mean_of8x8_blocks[32])) >> VARIANCE_PRECISION);
    2375        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_33] = (uint16_t)((meanOf8x8SquaredValuesBlocks[33] - (mean_of8x8_blocks[33] * mean_of8x8_blocks[33])) >> VARIANCE_PRECISION);
    2376        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_34] = (uint16_t)((meanOf8x8SquaredValuesBlocks[34] - (mean_of8x8_blocks[34] * mean_of8x8_blocks[34])) >> VARIANCE_PRECISION);
    2377        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_35] = (uint16_t)((meanOf8x8SquaredValuesBlocks[35] - (mean_of8x8_blocks[35] * mean_of8x8_blocks[35])) >> VARIANCE_PRECISION);
    2378        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_36] = (uint16_t)((meanOf8x8SquaredValuesBlocks[36] - (mean_of8x8_blocks[36] * mean_of8x8_blocks[36])) >> VARIANCE_PRECISION);
    2379        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_37] = (uint16_t)((meanOf8x8SquaredValuesBlocks[37] - (mean_of8x8_blocks[37] * mean_of8x8_blocks[37])) >> VARIANCE_PRECISION);
    2380        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_38] = (uint16_t)((meanOf8x8SquaredValuesBlocks[38] - (mean_of8x8_blocks[38] * mean_of8x8_blocks[38])) >> VARIANCE_PRECISION);
    2381        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_39] = (uint16_t)((meanOf8x8SquaredValuesBlocks[39] - (mean_of8x8_blocks[39] * mean_of8x8_blocks[39])) >> VARIANCE_PRECISION);
    2382        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_40] = (uint16_t)((meanOf8x8SquaredValuesBlocks[40] - (mean_of8x8_blocks[40] * mean_of8x8_blocks[40])) >> VARIANCE_PRECISION);
    2383        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_41] = (uint16_t)((meanOf8x8SquaredValuesBlocks[41] - (mean_of8x8_blocks[41] * mean_of8x8_blocks[41])) >> VARIANCE_PRECISION);
    2384        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_42] = (uint16_t)((meanOf8x8SquaredValuesBlocks[42] - (mean_of8x8_blocks[42] * mean_of8x8_blocks[42])) >> VARIANCE_PRECISION);
    2385        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_43] = (uint16_t)((meanOf8x8SquaredValuesBlocks[43] - (mean_of8x8_blocks[43] * mean_of8x8_blocks[43])) >> VARIANCE_PRECISION);
    2386        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_44] = (uint16_t)((meanOf8x8SquaredValuesBlocks[44] - (mean_of8x8_blocks[44] * mean_of8x8_blocks[44])) >> VARIANCE_PRECISION);
    2387        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_45] = (uint16_t)((meanOf8x8SquaredValuesBlocks[45] - (mean_of8x8_blocks[45] * mean_of8x8_blocks[45])) >> VARIANCE_PRECISION);
    2388        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_46] = (uint16_t)((meanOf8x8SquaredValuesBlocks[46] - (mean_of8x8_blocks[46] * mean_of8x8_blocks[46])) >> VARIANCE_PRECISION);
    2389        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_47] = (uint16_t)((meanOf8x8SquaredValuesBlocks[47] - (mean_of8x8_blocks[47] * mean_of8x8_blocks[47])) >> VARIANCE_PRECISION);
    2390        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_48] = (uint16_t)((meanOf8x8SquaredValuesBlocks[48] - (mean_of8x8_blocks[48] * mean_of8x8_blocks[48])) >> VARIANCE_PRECISION);
    2391        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_49] = (uint16_t)((meanOf8x8SquaredValuesBlocks[49] - (mean_of8x8_blocks[49] * mean_of8x8_blocks[49])) >> VARIANCE_PRECISION);
    2392        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_50] = (uint16_t)((meanOf8x8SquaredValuesBlocks[50] - (mean_of8x8_blocks[50] * mean_of8x8_blocks[50])) >> VARIANCE_PRECISION);
    2393        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_51] = (uint16_t)((meanOf8x8SquaredValuesBlocks[51] - (mean_of8x8_blocks[51] * mean_of8x8_blocks[51])) >> VARIANCE_PRECISION);
    2394        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_52] = (uint16_t)((meanOf8x8SquaredValuesBlocks[52] - (mean_of8x8_blocks[52] * mean_of8x8_blocks[52])) >> VARIANCE_PRECISION);
    2395        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_53] = (uint16_t)((meanOf8x8SquaredValuesBlocks[53] - (mean_of8x8_blocks[53] * mean_of8x8_blocks[53])) >> VARIANCE_PRECISION);
    2396        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_54] = (uint16_t)((meanOf8x8SquaredValuesBlocks[54] - (mean_of8x8_blocks[54] * mean_of8x8_blocks[54])) >> VARIANCE_PRECISION);
    2397        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_55] = (uint16_t)((meanOf8x8SquaredValuesBlocks[55] - (mean_of8x8_blocks[55] * mean_of8x8_blocks[55])) >> VARIANCE_PRECISION);
    2398        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_56] = (uint16_t)((meanOf8x8SquaredValuesBlocks[56] - (mean_of8x8_blocks[56] * mean_of8x8_blocks[56])) >> VARIANCE_PRECISION);
    2399        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_57] = (uint16_t)((meanOf8x8SquaredValuesBlocks[57] - (mean_of8x8_blocks[57] * mean_of8x8_blocks[57])) >> VARIANCE_PRECISION);
    2400        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_58] = (uint16_t)((meanOf8x8SquaredValuesBlocks[58] - (mean_of8x8_blocks[58] * mean_of8x8_blocks[58])) >> VARIANCE_PRECISION);
    2401        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_59] = (uint16_t)((meanOf8x8SquaredValuesBlocks[59] - (mean_of8x8_blocks[59] * mean_of8x8_blocks[59])) >> VARIANCE_PRECISION);
    2402        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_60] = (uint16_t)((meanOf8x8SquaredValuesBlocks[60] - (mean_of8x8_blocks[60] * mean_of8x8_blocks[60])) >> VARIANCE_PRECISION);
    2403        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_61] = (uint16_t)((meanOf8x8SquaredValuesBlocks[61] - (mean_of8x8_blocks[61] * mean_of8x8_blocks[61])) >> VARIANCE_PRECISION);
    2404        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_62] = (uint16_t)((meanOf8x8SquaredValuesBlocks[62] - (mean_of8x8_blocks[62] * mean_of8x8_blocks[62])) >> VARIANCE_PRECISION);
    2405        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_63] = (uint16_t)((meanOf8x8SquaredValuesBlocks[63] - (mean_of8x8_blocks[63] * mean_of8x8_blocks[63])) >> VARIANCE_PRECISION);
    2406             : 
    2407             :     // 16x16 variances
    2408        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_0] = (uint16_t)((meanOf16x16SquaredValuesBlocks[0] - (meanOf16x16Blocks[0] * meanOf16x16Blocks[0])) >> VARIANCE_PRECISION);
    2409        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_1] = (uint16_t)((meanOf16x16SquaredValuesBlocks[1] - (meanOf16x16Blocks[1] * meanOf16x16Blocks[1])) >> VARIANCE_PRECISION);
    2410        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_2] = (uint16_t)((meanOf16x16SquaredValuesBlocks[2] - (meanOf16x16Blocks[2] * meanOf16x16Blocks[2])) >> VARIANCE_PRECISION);
    2411        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_3] = (uint16_t)((meanOf16x16SquaredValuesBlocks[3] - (meanOf16x16Blocks[3] * meanOf16x16Blocks[3])) >> VARIANCE_PRECISION);
    2412        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_4] = (uint16_t)((meanOf16x16SquaredValuesBlocks[4] - (meanOf16x16Blocks[4] * meanOf16x16Blocks[4])) >> VARIANCE_PRECISION);
    2413        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_5] = (uint16_t)((meanOf16x16SquaredValuesBlocks[5] - (meanOf16x16Blocks[5] * meanOf16x16Blocks[5])) >> VARIANCE_PRECISION);
    2414        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_6] = (uint16_t)((meanOf16x16SquaredValuesBlocks[6] - (meanOf16x16Blocks[6] * meanOf16x16Blocks[6])) >> VARIANCE_PRECISION);
    2415        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_7] = (uint16_t)((meanOf16x16SquaredValuesBlocks[7] - (meanOf16x16Blocks[7] * meanOf16x16Blocks[7])) >> VARIANCE_PRECISION);
    2416        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_8] = (uint16_t)((meanOf16x16SquaredValuesBlocks[8] - (meanOf16x16Blocks[8] * meanOf16x16Blocks[8])) >> VARIANCE_PRECISION);
    2417        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_9] = (uint16_t)((meanOf16x16SquaredValuesBlocks[9] - (meanOf16x16Blocks[9] * meanOf16x16Blocks[9])) >> VARIANCE_PRECISION);
    2418        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_10] = (uint16_t)((meanOf16x16SquaredValuesBlocks[10] - (meanOf16x16Blocks[10] * meanOf16x16Blocks[10])) >> VARIANCE_PRECISION);
    2419        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_11] = (uint16_t)((meanOf16x16SquaredValuesBlocks[11] - (meanOf16x16Blocks[11] * meanOf16x16Blocks[11])) >> VARIANCE_PRECISION);
    2420        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_12] = (uint16_t)((meanOf16x16SquaredValuesBlocks[12] - (meanOf16x16Blocks[12] * meanOf16x16Blocks[12])) >> VARIANCE_PRECISION);
    2421        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_13] = (uint16_t)((meanOf16x16SquaredValuesBlocks[13] - (meanOf16x16Blocks[13] * meanOf16x16Blocks[13])) >> VARIANCE_PRECISION);
    2422        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_14] = (uint16_t)((meanOf16x16SquaredValuesBlocks[14] - (meanOf16x16Blocks[14] * meanOf16x16Blocks[14])) >> VARIANCE_PRECISION);
    2423        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_15] = (uint16_t)((meanOf16x16SquaredValuesBlocks[15] - (meanOf16x16Blocks[15] * meanOf16x16Blocks[15])) >> VARIANCE_PRECISION);
    2424             : 
    2425             :     // 32x32 variances
    2426        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_32x32_0] = (uint16_t)((meanOf32x32SquaredValuesBlocks[0] - (meanOf32x32Blocks[0] * meanOf32x32Blocks[0])) >> VARIANCE_PRECISION);
    2427        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_32x32_1] = (uint16_t)((meanOf32x32SquaredValuesBlocks[1] - (meanOf32x32Blocks[1] * meanOf32x32Blocks[1])) >> VARIANCE_PRECISION);
    2428        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_32x32_2] = (uint16_t)((meanOf32x32SquaredValuesBlocks[2] - (meanOf32x32Blocks[2] * meanOf32x32Blocks[2])) >> VARIANCE_PRECISION);
    2429        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_32x32_3] = (uint16_t)((meanOf32x32SquaredValuesBlocks[3] - (meanOf32x32Blocks[3] * meanOf32x32Blocks[3])) >> VARIANCE_PRECISION);
    2430             : 
    2431             :     // 64x64 variance
    2432        7190 :     picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_64x64] = (uint16_t)((meanOf64x64SquaredValuesBlocks - (meanOf64x64Blocks * meanOf64x64Blocks)) >> VARIANCE_PRECISION);
    2433             : 
    2434        7190 :     return return_error;
    2435             : }
    2436             : 
    2437           0 : EbErrorType DenoiseInputPicture(
    2438             :     PictureAnalysisContext    *context_ptr,
    2439             :     SequenceControlSet        *sequence_control_set_ptr,
    2440             :     PictureParentControlSet   *picture_control_set_ptr,
    2441             :     uint32_t                       sb_total_count,
    2442             :     EbPictureBufferDesc       *input_picture_ptr,
    2443             :     EbPictureBufferDesc       *denoised_picture_ptr,
    2444             :     uint32_t                         picture_width_in_sb)
    2445             : {
    2446           0 :     EbErrorType return_error = EB_ErrorNone;
    2447             : 
    2448             :     uint32_t         lcuCodingOrder;
    2449             :     uint32_t       sb_origin_x;
    2450             :     uint32_t       sb_origin_y;
    2451             :     uint16_t       verticalIdx;
    2452             : 
    2453           0 :     uint32_t color_format = input_picture_ptr->color_format;
    2454           0 :     const uint16_t subsampling_x = (color_format == EB_YUV444 ? 1 : 2) - 1;
    2455           0 :     const uint16_t subsampling_y = (color_format >= EB_YUV422 ? 1 : 2) - 1;
    2456             : 
    2457             :     //use denoised input if the source is extremly noisy
    2458           0 :     if (picture_control_set_ptr->pic_noise_class >= PIC_NOISE_CLASS_4) {
    2459           0 :         uint32_t inLumaOffSet = input_picture_ptr->origin_x + input_picture_ptr->origin_y      * input_picture_ptr->stride_y;
    2460           0 :         uint32_t inChromaOffSet = (input_picture_ptr->origin_x >> subsampling_x) + (input_picture_ptr->origin_y >> subsampling_y) * input_picture_ptr->stride_cb;
    2461           0 :         uint32_t denLumaOffSet = denoised_picture_ptr->origin_x + denoised_picture_ptr->origin_y   * denoised_picture_ptr->stride_y;
    2462           0 :         uint32_t denChromaOffSet = (denoised_picture_ptr->origin_x >> subsampling_x) + (denoised_picture_ptr->origin_y >> subsampling_y) * denoised_picture_ptr->stride_cb;
    2463             : 
    2464             :         //filter Luma
    2465           0 :         for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
    2466           0 :             sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2467           0 :             sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2468             : 
    2469           0 :             if (sb_origin_x == 0)
    2470           0 :                 noise_extract_luma_strong(
    2471             :                     input_picture_ptr,
    2472             :                     denoised_picture_ptr,
    2473             :                     sb_origin_y,
    2474             :                     sb_origin_x);
    2475             : 
    2476           0 :             if (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width)
    2477             :             {
    2478           0 :                 noise_extract_luma_strong_c(
    2479             :                     input_picture_ptr,
    2480             :                     denoised_picture_ptr,
    2481             :                     sb_origin_y,
    2482             :                     sb_origin_x);
    2483             :             }
    2484             :         }
    2485             : 
    2486             :         //copy Luma
    2487           0 :         for (verticalIdx = 0; verticalIdx < input_picture_ptr->height; ++verticalIdx) {
    2488           0 :             EB_MEMCPY(input_picture_ptr->buffer_y + inLumaOffSet + verticalIdx * input_picture_ptr->stride_y,
    2489             :                 denoised_picture_ptr->buffer_y + denLumaOffSet + verticalIdx * denoised_picture_ptr->stride_y,
    2490             :                 sizeof(uint8_t) * input_picture_ptr->width);
    2491             :         }
    2492             : 
    2493             :         //copy chroma
    2494           0 :         for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
    2495           0 :             sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2496           0 :             sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2497             : 
    2498           0 :             if (sb_origin_x == 0)
    2499           0 :                 noise_extract_chroma_strong(
    2500             :                     input_picture_ptr,
    2501             :                     denoised_picture_ptr,
    2502             :                     sb_origin_y >> subsampling_y,
    2503             :                     sb_origin_x >> subsampling_x);
    2504             : 
    2505           0 :             if (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width)
    2506             :             {
    2507           0 :                 noise_extract_chroma_strong_c(
    2508             :                     input_picture_ptr,
    2509             :                     denoised_picture_ptr,
    2510             :                     sb_origin_y >> subsampling_y,
    2511             :                     sb_origin_x >> subsampling_x);
    2512             :             }
    2513             :         }
    2514             : 
    2515             :         //copy chroma
    2516           0 :         for (verticalIdx = 0; verticalIdx < input_picture_ptr->height >> subsampling_y; ++verticalIdx) {
    2517           0 :             EB_MEMCPY(input_picture_ptr->buffer_cb + inChromaOffSet + verticalIdx * input_picture_ptr->stride_cb,
    2518             :                 denoised_picture_ptr->buffer_cb + denChromaOffSet + verticalIdx * denoised_picture_ptr->stride_cb,
    2519             :                 sizeof(uint8_t) * input_picture_ptr->width >> subsampling_x);
    2520             : 
    2521           0 :             EB_MEMCPY(input_picture_ptr->buffer_cr + inChromaOffSet + verticalIdx * input_picture_ptr->stride_cr,
    2522             :                 denoised_picture_ptr->buffer_cr + denChromaOffSet + verticalIdx * denoised_picture_ptr->stride_cr,
    2523             :                 sizeof(uint8_t) * input_picture_ptr->width >> subsampling_x);
    2524             :         }
    2525             :     }
    2526           0 :     else if (picture_control_set_ptr->pic_noise_class >= PIC_NOISE_CLASS_3_1) {
    2527           0 :         uint32_t inLumaOffSet = input_picture_ptr->origin_x + input_picture_ptr->origin_y      * input_picture_ptr->stride_y;
    2528           0 :         uint32_t inChromaOffSet = (input_picture_ptr->origin_x >> subsampling_x) + (input_picture_ptr->origin_y >> subsampling_y) * input_picture_ptr->stride_cb;
    2529           0 :         uint32_t denLumaOffSet = denoised_picture_ptr->origin_x + denoised_picture_ptr->origin_y   * denoised_picture_ptr->stride_y;
    2530           0 :         uint32_t denChromaOffSet = (denoised_picture_ptr->origin_x >> subsampling_x) + (denoised_picture_ptr->origin_y >> subsampling_y) * denoised_picture_ptr->stride_cb;
    2531             : 
    2532           0 :         for (verticalIdx = 0; verticalIdx < input_picture_ptr->height; ++verticalIdx) {
    2533           0 :             EB_MEMCPY(input_picture_ptr->buffer_y + inLumaOffSet + verticalIdx * input_picture_ptr->stride_y,
    2534             :                 denoised_picture_ptr->buffer_y + denLumaOffSet + verticalIdx * denoised_picture_ptr->stride_y,
    2535             :                 sizeof(uint8_t) * input_picture_ptr->width);
    2536             :         }
    2537             : 
    2538             :         //copy chroma
    2539           0 :         for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
    2540           0 :             sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2541           0 :             sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2542             : 
    2543           0 :             if (sb_origin_x == 0)
    2544           0 :                 noise_extract_chroma_weak(
    2545             :                     input_picture_ptr,
    2546             :                     denoised_picture_ptr,
    2547             :                     sb_origin_y >> subsampling_y,
    2548             :                     sb_origin_x >> subsampling_x);
    2549             : 
    2550           0 :             if (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width)
    2551             :             {
    2552           0 :                 noise_extract_chroma_weak_c(
    2553             :                     input_picture_ptr,
    2554             :                     denoised_picture_ptr,
    2555             :                     sb_origin_y >> subsampling_y,
    2556             :                     sb_origin_x >> subsampling_x);
    2557             :             }
    2558             :         }
    2559             : 
    2560           0 :         for (verticalIdx = 0; verticalIdx < input_picture_ptr->height >> subsampling_y; ++verticalIdx) {
    2561           0 :             EB_MEMCPY(input_picture_ptr->buffer_cb + inChromaOffSet + verticalIdx * input_picture_ptr->stride_cb,
    2562             :                 denoised_picture_ptr->buffer_cb + denChromaOffSet + verticalIdx * denoised_picture_ptr->stride_cb,
    2563             :                 sizeof(uint8_t) * input_picture_ptr->width >> subsampling_x);
    2564             : 
    2565           0 :             EB_MEMCPY(input_picture_ptr->buffer_cr + inChromaOffSet + verticalIdx * input_picture_ptr->stride_cr,
    2566             :                 denoised_picture_ptr->buffer_cr + denChromaOffSet + verticalIdx * denoised_picture_ptr->stride_cr,
    2567             :                 sizeof(uint8_t) * input_picture_ptr->width >> subsampling_x);
    2568             :         }
    2569             :     }
    2570           0 :     else if (context_ptr->pic_noise_variance_float >= 1.0) {
    2571             :         //Luma : use filtered only for flatNoise LCUs
    2572           0 :         for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
    2573           0 :             sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2574           0 :             sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2575           0 :             uint32_t  sb_height = MIN(BLOCK_SIZE_64, input_picture_ptr->height - sb_origin_y);
    2576           0 :             uint32_t  sb_width = MIN(BLOCK_SIZE_64, input_picture_ptr->width - sb_origin_x);
    2577             : 
    2578           0 :             uint32_t inLumaOffSet = input_picture_ptr->origin_x + sb_origin_x + (input_picture_ptr->origin_y + sb_origin_y) * input_picture_ptr->stride_y;
    2579           0 :             uint32_t denLumaOffSet = denoised_picture_ptr->origin_x + sb_origin_x + (denoised_picture_ptr->origin_y + sb_origin_y) * denoised_picture_ptr->stride_y;
    2580             : 
    2581           0 :             if (picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] == 1) {
    2582           0 :                 for (verticalIdx = 0; verticalIdx < sb_height; ++verticalIdx) {
    2583           0 :                     EB_MEMCPY(input_picture_ptr->buffer_y + inLumaOffSet + verticalIdx * input_picture_ptr->stride_y,
    2584             :                         denoised_picture_ptr->buffer_y + denLumaOffSet + verticalIdx * denoised_picture_ptr->stride_y,
    2585             :                         sizeof(uint8_t) * sb_width);
    2586             :                 }
    2587             :             }
    2588             :         }
    2589             :     }
    2590             : 
    2591           0 :     return return_error;
    2592             : }
    2593             : 
    2594           0 : EbErrorType DetectInputPictureNoise(
    2595             :     PictureAnalysisContext    *context_ptr,
    2596             :     SequenceControlSet        *sequence_control_set_ptr,
    2597             :     PictureParentControlSet   *picture_control_set_ptr,
    2598             :     uint32_t                       sb_total_count,
    2599             :     EbPictureBufferDesc       *input_picture_ptr,
    2600             :     EbPictureBufferDesc       *noise_picture_ptr,
    2601             :     EbPictureBufferDesc       *denoised_picture_ptr,
    2602             :     uint32_t                     picture_width_in_sb)
    2603             : {
    2604           0 :     EbErrorType                 return_error = EB_ErrorNone;
    2605             :     uint32_t                    lcuCodingOrder;
    2606             : 
    2607             :     uint64_t                    picNoiseVariance;
    2608             : 
    2609             :     uint32_t                    totLcuCount, noiseTh;
    2610             : 
    2611             :     uint32_t                    sb_origin_x;
    2612             :     uint32_t                    sb_origin_y;
    2613             :     uint32_t                    inputLumaOriginIndex;
    2614             : 
    2615           0 :     picNoiseVariance = 0;
    2616           0 :     totLcuCount = 0;
    2617             : 
    2618             :     //Variance calc for noise picture
    2619           0 :     for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
    2620           0 :         sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2621           0 :         sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2622           0 :         inputLumaOriginIndex = (noise_picture_ptr->origin_y + sb_origin_y) * noise_picture_ptr->stride_y +
    2623           0 :             noise_picture_ptr->origin_x + sb_origin_x;
    2624             : 
    2625           0 :         uint32_t  noiseOriginIndex = noise_picture_ptr->origin_x + sb_origin_x + noise_picture_ptr->origin_y * noise_picture_ptr->stride_y;
    2626             : 
    2627           0 :         if (sb_origin_x == 0)
    2628           0 :             noise_extract_luma_weak(
    2629             :                 input_picture_ptr,
    2630             :                 denoised_picture_ptr,
    2631             :                 noise_picture_ptr,
    2632             :                 sb_origin_y,
    2633             :                 sb_origin_x);
    2634             : 
    2635           0 :         if (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width)
    2636             :         {
    2637           0 :             noise_extract_luma_weak_c(
    2638             :                 input_picture_ptr,
    2639             :                 denoised_picture_ptr,
    2640             :                 noise_picture_ptr,
    2641             :                 sb_origin_y,
    2642             :                 sb_origin_x);
    2643             :         }
    2644             : 
    2645             :         //do it only for complete 64x64 blocks
    2646           0 :         if (sb_origin_x + 64 <= input_picture_ptr->width && sb_origin_y + 64 <= input_picture_ptr->height)
    2647             :         {
    2648             :             uint64_t noiseBlkVar32x32[4], denoiseBlkVar32x32[4];
    2649             : 
    2650           0 :             uint64_t noiseBlkVar = ComputeVariance64x64(
    2651             :                 sequence_control_set_ptr,
    2652             :                 noise_picture_ptr,
    2653             :                 noiseOriginIndex,
    2654             :                 noiseBlkVar32x32);
    2655             : 
    2656             :             uint64_t noiseBlkVarTh;
    2657           0 :             uint64_t denBlkVarTh = FLAT_MAX_VAR;
    2658           0 :             noiseBlkVarTh = NOISE_MIN_LEVEL_M6_M7;
    2659             : 
    2660           0 :             picNoiseVariance += (noiseBlkVar >> 16);
    2661             : 
    2662           0 :             uint64_t denBlkVar = ComputeVariance64x64(
    2663             :                 sequence_control_set_ptr,
    2664             :                 denoised_picture_ptr,
    2665             :                 inputLumaOriginIndex,
    2666             :                 denoiseBlkVar32x32) >> 16;
    2667             : 
    2668           0 :             if (denBlkVar < denBlkVarTh && noiseBlkVar > noiseBlkVarTh)
    2669           0 :                 picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 1;
    2670           0 :             totLcuCount++;
    2671             :         }
    2672             :     }
    2673             : 
    2674           0 :     if (totLcuCount > 0) {
    2675           0 :         context_ptr->pic_noise_variance_float = (double)picNoiseVariance / (double)totLcuCount;
    2676           0 :         picNoiseVariance = picNoiseVariance / totLcuCount;
    2677             :     }
    2678             : 
    2679             :     //the variance of a 64x64 noise area tends to be bigger for small resolutions.
    2680           0 :     if (sequence_control_set_ptr->seq_header.max_frame_height <= 720)
    2681           0 :         noiseTh = 25;
    2682             :     else
    2683           0 :         noiseTh = 0;
    2684             : 
    2685           0 :     if (picNoiseVariance >= 80 + noiseTh)
    2686           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_10;
    2687           0 :     else if (picNoiseVariance >= 70 + noiseTh)
    2688           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_9;
    2689           0 :     else if (picNoiseVariance >= 60 + noiseTh)
    2690           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_8;
    2691           0 :     else if (picNoiseVariance >= 50 + noiseTh)
    2692           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_7;
    2693           0 :     else if (picNoiseVariance >= 40 + noiseTh)
    2694           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_6;
    2695           0 :     else if (picNoiseVariance >= 30 + noiseTh)
    2696           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_5;
    2697           0 :     else if (picNoiseVariance >= 20 + noiseTh)
    2698           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_4;
    2699           0 :     else if (picNoiseVariance >= 17 + noiseTh)
    2700           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_3_1;
    2701           0 :     else if (picNoiseVariance >= 10 + noiseTh)
    2702           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_3;
    2703           0 :     else if (picNoiseVariance >= 5 + noiseTh)
    2704           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_2;
    2705             :     else
    2706           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_1;
    2707             : 
    2708           0 :     if (picture_control_set_ptr->pic_noise_class >= PIC_NOISE_CLASS_4)
    2709           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_3_1;
    2710             : 
    2711           0 :     return return_error;
    2712             : }
    2713             : 
    2714           0 : static int32_t apply_denoise_2d(SequenceControlSet        *scs_ptr,
    2715             :     PictureParentControlSet   *pcs_ptr,
    2716             :     EbPictureBufferDesc *inputPicturePointer)
    2717             : {
    2718           0 :     if (eb_aom_denoise_and_model_run(pcs_ptr->denoise_and_model, inputPicturePointer,
    2719             :         &pcs_ptr->frm_hdr.film_grain_params,
    2720           0 :         scs_ptr->static_config.encoder_bit_depth > EB_8BIT)){
    2721             :     }
    2722           0 :     return 0;
    2723             : }
    2724             : 
    2725           0 : EbErrorType denoise_estimate_film_grain(
    2726             :     SequenceControlSet        *sequence_control_set_ptr,
    2727             :     PictureParentControlSet   *picture_control_set_ptr)
    2728             : {
    2729           0 :     EbErrorType return_error = EB_ErrorNone;
    2730             : 
    2731           0 :     FrameHeader *frm_hdr = &picture_control_set_ptr->frm_hdr;
    2732             : 
    2733           0 :     EbPictureBufferDesc    *input_picture_ptr = picture_control_set_ptr->enhanced_picture_ptr;
    2734           0 :     frm_hdr->film_grain_params.apply_grain = 0;
    2735             : 
    2736           0 :     if (sequence_control_set_ptr->film_grain_denoise_strength) {
    2737           0 :         if (apply_denoise_2d(sequence_control_set_ptr, picture_control_set_ptr, input_picture_ptr) < 0)
    2738           0 :             return 1;
    2739             :     }
    2740             : 
    2741           0 :     sequence_control_set_ptr->seq_header.film_grain_params_present |= frm_hdr->film_grain_params.apply_grain;
    2742             : 
    2743           0 :     return return_error;  //todo: add proper error handling
    2744             : }
    2745             : 
    2746           0 : EbErrorType FullSampleDenoise(
    2747             :     PictureAnalysisContext    *context_ptr,
    2748             :     SequenceControlSet        *sequence_control_set_ptr,
    2749             :     PictureParentControlSet   *picture_control_set_ptr,
    2750             :     uint32_t                     sb_total_count,
    2751             :     EbBool                       denoise_flag,
    2752             :     uint32_t                     picture_width_in_sb)
    2753             : {
    2754           0 :     EbErrorType return_error = EB_ErrorNone;
    2755             : 
    2756             :     uint32_t                     lcuCodingOrder;
    2757           0 :     EbPictureBufferDesc    *input_picture_ptr = picture_control_set_ptr->enhanced_picture_ptr;
    2758           0 :     EbPictureBufferDesc    *denoised_picture_ptr = context_ptr->denoised_picture_ptr;
    2759           0 :     EbPictureBufferDesc    *noise_picture_ptr = context_ptr->noise_picture_ptr;
    2760             : 
    2761             :     //Reset the flat noise flag array to False for both RealTime/HighComplexity Modes
    2762           0 :     for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder)
    2763           0 :         picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 0;
    2764           0 :     picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_INV; //this init is for both REAL-TIME and BEST-QUALITY
    2765             : 
    2766           0 :     DetectInputPictureNoise(
    2767             :         context_ptr,
    2768             :         sequence_control_set_ptr,
    2769             :         picture_control_set_ptr,
    2770             :         sb_total_count,
    2771             :         input_picture_ptr,
    2772             :         noise_picture_ptr,
    2773             :         denoised_picture_ptr,
    2774             :         picture_width_in_sb);
    2775             : 
    2776           0 :     if (denoise_flag == EB_TRUE)
    2777             :     {
    2778           0 :         DenoiseInputPicture(
    2779             :             context_ptr,
    2780             :             sequence_control_set_ptr,
    2781             :             picture_control_set_ptr,
    2782             :             sb_total_count,
    2783             :             input_picture_ptr,
    2784             :             denoised_picture_ptr,
    2785             :             picture_width_in_sb);
    2786             :     }
    2787             : 
    2788           0 :     return return_error;
    2789             : }
    2790             : 
    2791           0 : EbErrorType SubSampleFilterNoise(
    2792             :     SequenceControlSet        *sequence_control_set_ptr,
    2793             :     PictureParentControlSet   *picture_control_set_ptr,
    2794             :     uint32_t                       sb_total_count,
    2795             :     EbPictureBufferDesc       *input_picture_ptr,
    2796             :     EbPictureBufferDesc       *noise_picture_ptr,
    2797             :     EbPictureBufferDesc       *denoised_picture_ptr,
    2798             :     uint32_t                         picture_width_in_sb)
    2799             : {
    2800           0 :     EbErrorType return_error = EB_ErrorNone;
    2801             : 
    2802             :     uint32_t         lcuCodingOrder;
    2803             :     uint32_t       sb_origin_x;
    2804             :     uint32_t       sb_origin_y;
    2805             :     uint16_t        verticalIdx;
    2806             : 
    2807           0 :     uint32_t color_format = input_picture_ptr->color_format;
    2808           0 :     const uint16_t subsampling_x = (color_format == EB_YUV444 ? 1 : 2) - 1;
    2809           0 :     const uint16_t subsampling_y = (color_format >= EB_YUV422 ? 1 : 2) - 1;
    2810             : 
    2811           0 :     if (picture_control_set_ptr->pic_noise_class == PIC_NOISE_CLASS_3_1) {
    2812           0 :         uint32_t inLumaOffSet = input_picture_ptr->origin_x + input_picture_ptr->origin_y      * input_picture_ptr->stride_y;
    2813           0 :         uint32_t inChromaOffSet = (input_picture_ptr->origin_x >> subsampling_x) + (input_picture_ptr->origin_y >> subsampling_y) * input_picture_ptr->stride_cb;
    2814           0 :         uint32_t denLumaOffSet = denoised_picture_ptr->origin_x + denoised_picture_ptr->origin_y   * denoised_picture_ptr->stride_y;
    2815           0 :         uint32_t denChromaOffSet = (denoised_picture_ptr->origin_x >> subsampling_x) + (denoised_picture_ptr->origin_y >> subsampling_y) * denoised_picture_ptr->stride_cb;
    2816             : 
    2817             :         //filter Luma
    2818           0 :         for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
    2819           0 :             sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2820           0 :             sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2821             : 
    2822           0 :             if (sb_origin_x == 0)
    2823           0 :                 noise_extract_luma_weak(
    2824             :                     input_picture_ptr,
    2825             :                     denoised_picture_ptr,
    2826             :                     noise_picture_ptr,
    2827             :                     sb_origin_y,
    2828             :                     sb_origin_x);
    2829             : 
    2830           0 :             if (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width)
    2831             :             {
    2832           0 :                 noise_extract_luma_weak_c(
    2833             :                     input_picture_ptr,
    2834             :                     denoised_picture_ptr,
    2835             :                     noise_picture_ptr,
    2836             :                     sb_origin_y,
    2837             :                     sb_origin_x);
    2838             :             }
    2839             :         }
    2840             : 
    2841             :         //copy luma
    2842           0 :         for (verticalIdx = 0; verticalIdx < input_picture_ptr->height; ++verticalIdx) {
    2843           0 :             EB_MEMCPY(input_picture_ptr->buffer_y + inLumaOffSet + verticalIdx * input_picture_ptr->stride_y,
    2844             :                 denoised_picture_ptr->buffer_y + denLumaOffSet + verticalIdx * denoised_picture_ptr->stride_y,
    2845             :                 sizeof(uint8_t) * input_picture_ptr->width);
    2846             :         }
    2847             : 
    2848             :         //filter chroma
    2849           0 :         for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
    2850           0 :             sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2851           0 :             sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2852             : 
    2853           0 :             if (sb_origin_x == 0)
    2854           0 :                 noise_extract_chroma_weak(
    2855             :                     input_picture_ptr,
    2856             :                     denoised_picture_ptr,
    2857             :                     sb_origin_y >> subsampling_y,
    2858             :                     sb_origin_x >> subsampling_x);
    2859             : 
    2860           0 :             if (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width)
    2861             :             {
    2862           0 :                 noise_extract_chroma_weak_c(
    2863             :                     input_picture_ptr,
    2864             :                     denoised_picture_ptr,
    2865             :                     sb_origin_y >> subsampling_y,
    2866             :                     sb_origin_x >> subsampling_x);
    2867             :             }
    2868             :         }
    2869             : 
    2870             :         //copy chroma
    2871           0 :         for (verticalIdx = 0; verticalIdx < input_picture_ptr->height >> subsampling_y; ++verticalIdx) {
    2872           0 :             EB_MEMCPY(input_picture_ptr->buffer_cb + inChromaOffSet + verticalIdx * input_picture_ptr->stride_cb,
    2873             :                 denoised_picture_ptr->buffer_cb + denChromaOffSet + verticalIdx * denoised_picture_ptr->stride_cb,
    2874             :                 sizeof(uint8_t) * input_picture_ptr->width >> subsampling_x);
    2875             : 
    2876           0 :             EB_MEMCPY(input_picture_ptr->buffer_cr + inChromaOffSet + verticalIdx * input_picture_ptr->stride_cr,
    2877             :                 denoised_picture_ptr->buffer_cr + denChromaOffSet + verticalIdx * denoised_picture_ptr->stride_cr,
    2878             :                 sizeof(uint8_t) * input_picture_ptr->width >> subsampling_x);
    2879             :         }
    2880             :     }
    2881           0 :     else if (picture_control_set_ptr->pic_noise_class == PIC_NOISE_CLASS_2) {
    2882           0 :         uint32_t newTotFN = 0;
    2883             : 
    2884             :         //for each SB ,re check the FN information for only the FNdecim ones
    2885           0 :         for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
    2886           0 :             sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2887           0 :             sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2888           0 :             uint32_t  inputLumaOriginIndex = noise_picture_ptr->origin_x + sb_origin_x + (noise_picture_ptr->origin_y + sb_origin_y) * noise_picture_ptr->stride_y;
    2889           0 :             uint32_t  noiseOriginIndex = noise_picture_ptr->origin_x + sb_origin_x + (noise_picture_ptr->origin_y * noise_picture_ptr->stride_y);
    2890             : 
    2891           0 :             if (sb_origin_x + 64 <= input_picture_ptr->width && sb_origin_y + 64 <= input_picture_ptr->height && picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] == 1)
    2892             :             {
    2893           0 :                 noise_extract_luma_weak_lcu(
    2894             :                     input_picture_ptr,
    2895             :                     denoised_picture_ptr,
    2896             :                     noise_picture_ptr,
    2897             :                     sb_origin_y,
    2898             :                     sb_origin_x);
    2899             : 
    2900           0 :                 if (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width)
    2901             :                 {
    2902           0 :                     noise_extract_luma_weak_lcu_c(
    2903             :                         input_picture_ptr,
    2904             :                         denoised_picture_ptr,
    2905             :                         noise_picture_ptr,
    2906             :                         sb_origin_y,
    2907             :                         sb_origin_x);
    2908             :                 }
    2909             : 
    2910             :                 uint64_t noiseBlkVar32x32[4], denoiseBlkVar32x32[4];
    2911           0 :                 uint64_t noiseBlkVar = ComputeVariance64x64(
    2912             :                     sequence_control_set_ptr,
    2913             :                     noise_picture_ptr,
    2914             :                     noiseOriginIndex,
    2915             :                     noiseBlkVar32x32);
    2916           0 :                 uint64_t denBlkVar = ComputeVariance64x64(
    2917             :                     sequence_control_set_ptr,
    2918             :                     denoised_picture_ptr,
    2919             :                     inputLumaOriginIndex,
    2920             :                     denoiseBlkVar32x32) >> 16;
    2921             : 
    2922             :                 uint64_t noiseBlkVarTh;
    2923           0 :                 uint64_t denBlkVarTh = FLAT_MAX_VAR;
    2924           0 :                 noiseBlkVarTh = NOISE_MIN_LEVEL_M6_M7;
    2925             : 
    2926           0 :                 if (denBlkVar<denBlkVarTh && noiseBlkVar> noiseBlkVarTh) {
    2927           0 :                     picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 1;
    2928             :                     //printf("POC %i (%i,%i) denBlkVar: %i  noiseBlkVar :%i\n", picture_control_set_ptr->picture_number,sb_origin_x,sb_origin_y, denBlkVar, noiseBlkVar);
    2929           0 :                     newTotFN++;
    2930             :                 }
    2931             :                 else
    2932           0 :                     picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 0;
    2933             :             }
    2934             :         }
    2935             : 
    2936           0 :         for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
    2937           0 :             sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2938           0 :             sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
    2939             : 
    2940           0 :             if (sb_origin_x + 64 <= input_picture_ptr->width && sb_origin_y + 64 <= input_picture_ptr->height)
    2941             :             {
    2942             :                 //use the denoised for FN LCUs
    2943           0 :                 if (picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] == 1) {
    2944           0 :                     uint32_t  sb_height = MIN(BLOCK_SIZE_64, input_picture_ptr->height - sb_origin_y);
    2945           0 :                     uint32_t  sb_width = MIN(BLOCK_SIZE_64, input_picture_ptr->width - sb_origin_x);
    2946             : 
    2947           0 :                     uint32_t inLumaOffSet = input_picture_ptr->origin_x + sb_origin_x + (input_picture_ptr->origin_y + sb_origin_y) * input_picture_ptr->stride_y;
    2948           0 :                     uint32_t denLumaOffSet = denoised_picture_ptr->origin_x + sb_origin_x + (denoised_picture_ptr->origin_y + sb_origin_y) * denoised_picture_ptr->stride_y;
    2949             : 
    2950           0 :                     for (verticalIdx = 0; verticalIdx < sb_height; ++verticalIdx) {
    2951           0 :                         EB_MEMCPY(input_picture_ptr->buffer_y + inLumaOffSet + verticalIdx * input_picture_ptr->stride_y,
    2952             :                             denoised_picture_ptr->buffer_y + denLumaOffSet + verticalIdx * denoised_picture_ptr->stride_y,
    2953             :                             sizeof(uint8_t) * sb_width);
    2954             :                     }
    2955             :                 }
    2956             :             }
    2957             :         }
    2958             :     }
    2959           0 :     return return_error;
    2960             : }
    2961             : 
    2962           0 : EbErrorType QuarterSampleDetectNoise(
    2963             :     PictureAnalysisContext    *context_ptr,
    2964             :     PictureParentControlSet   *picture_control_set_ptr,
    2965             :     EbPictureBufferDesc       *quarter_decimated_picture_ptr,
    2966             :     EbPictureBufferDesc       *noise_picture_ptr,
    2967             :     EbPictureBufferDesc       *denoised_picture_ptr,
    2968             :     uint32_t                         picture_width_in_sb)
    2969             : {
    2970           0 :     EbErrorType return_error = EB_ErrorNone;
    2971             : 
    2972             :     uint64_t                   picNoiseVariance;
    2973             : 
    2974             :     uint32_t                     totLcuCount, noiseTh;
    2975             : 
    2976             :     uint32_t                     blockIndex;
    2977             : 
    2978           0 :     picNoiseVariance = 0;
    2979           0 :     totLcuCount = 0;
    2980             : 
    2981             :     uint16_t vert64x64Index;
    2982             :     uint16_t horz64x64Index;
    2983             :     uint32_t block64x64X;
    2984             :     uint32_t block64x64Y;
    2985             :     uint32_t vert32x32Index;
    2986             :     uint32_t horz32x32Index;
    2987             :     uint32_t block32x32X;
    2988             :     uint32_t block32x32Y;
    2989             :     uint32_t noiseOriginIndex;
    2990             :     uint32_t lcuCodingOrder;
    2991             : 
    2992             :     // Loop over 64x64 blocks on the downsampled domain (each block would contain 16 LCUs on the full sampled domain)
    2993           0 :     for (vert64x64Index = 0; vert64x64Index < (quarter_decimated_picture_ptr->height / 64); vert64x64Index++) {
    2994           0 :         for (horz64x64Index = 0; horz64x64Index < (quarter_decimated_picture_ptr->width / 64); horz64x64Index++) {
    2995           0 :             block64x64X = horz64x64Index * 64;
    2996           0 :             block64x64Y = vert64x64Index * 64;
    2997             : 
    2998           0 :             if (block64x64X == 0)
    2999           0 :                 noise_extract_luma_weak(
    3000             :                     quarter_decimated_picture_ptr,
    3001             :                     denoised_picture_ptr,
    3002             :                     noise_picture_ptr,
    3003             :                     block64x64Y,
    3004             :                     block64x64X);
    3005             : 
    3006           0 :             if (block64x64Y + BLOCK_SIZE_64 > quarter_decimated_picture_ptr->width)
    3007             :             {
    3008           0 :                 noise_extract_luma_weak_c(
    3009             :                     quarter_decimated_picture_ptr,
    3010             :                     denoised_picture_ptr,
    3011             :                     noise_picture_ptr,
    3012             :                     block64x64Y,
    3013             :                     block64x64X);
    3014             :             }
    3015             : 
    3016             :             // Loop over 32x32 blocks (i.e, 64x64 blocks in full resolution)
    3017           0 :             for (vert32x32Index = 0; vert32x32Index < 2; vert32x32Index++) {
    3018           0 :                 for (horz32x32Index = 0; horz32x32Index < 2; horz32x32Index++) {
    3019           0 :                     block32x32X = block64x64X + horz32x32Index * 32;
    3020           0 :                     block32x32Y = block64x64Y + vert32x32Index * 32;
    3021             : 
    3022             :                     //do it only for complete 32x32 blocks (i.e, complete 64x64 blocks in full resolution)
    3023           0 :                     if ((block32x32X + 32 <= quarter_decimated_picture_ptr->width) && (block32x32Y + 32 <= quarter_decimated_picture_ptr->height))
    3024             :                     {
    3025           0 :                         lcuCodingOrder = ((vert64x64Index * 2) + vert32x32Index) * picture_width_in_sb + ((horz64x64Index * 2) + horz32x32Index);
    3026             : 
    3027             :                         uint64_t noiseBlkVar8x8[16], denoiseBlkVar8x8[16];
    3028             : 
    3029           0 :                         noiseOriginIndex = noise_picture_ptr->origin_x + block32x32X + noise_picture_ptr->origin_y * noise_picture_ptr->stride_y;
    3030             : 
    3031           0 :                         uint64_t noiseBlkVar = ComputeVariance32x32(
    3032             :                             noise_picture_ptr,
    3033             :                             noiseOriginIndex,
    3034             :                             noiseBlkVar8x8);
    3035             : 
    3036           0 :                         picNoiseVariance += (noiseBlkVar >> 16);
    3037             : 
    3038           0 :                         blockIndex = (noise_picture_ptr->origin_y + block32x32Y) * noise_picture_ptr->stride_y + noise_picture_ptr->origin_x + block32x32X;
    3039             : 
    3040           0 :                         uint64_t denBlkVar = ComputeVariance32x32(
    3041             :                             denoised_picture_ptr,
    3042             :                             blockIndex,
    3043             :                             denoiseBlkVar8x8) >> 16;
    3044             : 
    3045             :                         uint64_t denBlkVarDecTh;
    3046           0 :                         denBlkVarDecTh = NOISE_MIN_LEVEL_DECIM_M6_M7;
    3047           0 :                         if (denBlkVar < FLAT_MAX_VAR_DECIM && noiseBlkVar> denBlkVarDecTh)
    3048           0 :                             picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 1;
    3049           0 :                         totLcuCount++;
    3050             :                     }
    3051             :                 }
    3052             :             }
    3053             :         }
    3054             :     }
    3055             : 
    3056           0 :     if (totLcuCount > 0) {
    3057           0 :         context_ptr->pic_noise_variance_float = (double)picNoiseVariance / (double)totLcuCount;
    3058           0 :         picNoiseVariance = picNoiseVariance / totLcuCount;
    3059             :     }
    3060             : 
    3061             :     //the variance of a 64x64 noise area tends to be bigger for small resolutions.
    3062             :     //if (sequence_control_set_ptr->seq_header.max_frame_height <= 720)
    3063             :     //    noiseTh = 25;
    3064             :     //else if (sequence_control_set_ptr->seq_header.max_frame_height <= 1080)
    3065             :     //    noiseTh = 10;
    3066             :     //else
    3067           0 :     noiseTh = 0;
    3068             : 
    3069             :     //look for extreme noise or big enough flat noisy area to be denoised.
    3070           0 :     if (picNoiseVariance > 60)
    3071           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_3_1; //Noise+Edge information is too big, so may be this is all noise (action: frame based denoising)
    3072           0 :     else if (picNoiseVariance >= 10 + noiseTh)
    3073           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_3;   //Noise+Edge information is big enough, so there is no big enough flat noisy area (action : no denoising)
    3074           0 :     else if (picNoiseVariance >= 5 + noiseTh)
    3075           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_2;   //Noise+Edge information is relatively small, so there might be a big enough flat noisy area(action : denoising only for FN blocks)
    3076             :     else
    3077           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_1;   //Noise+Edge information is very small, so no noise nor edge area (action : no denoising)
    3078             : 
    3079           0 :     return return_error;
    3080             : }
    3081             : 
    3082           0 : EbErrorType SubSampleDetectNoise(
    3083             :     PictureAnalysisContext    *context_ptr,
    3084             :     SequenceControlSet        *sequence_control_set_ptr,
    3085             :     PictureParentControlSet   *picture_control_set_ptr,
    3086             :     EbPictureBufferDesc       *sixteenth_decimated_picture_ptr,
    3087             :     EbPictureBufferDesc       *noise_picture_ptr,
    3088             :     EbPictureBufferDesc       *denoised_picture_ptr,
    3089             :     uint32_t                         picture_width_in_sb)
    3090             : {
    3091           0 :     EbErrorType return_error = EB_ErrorNone;
    3092             : 
    3093             :     uint64_t                   picNoiseVariance;
    3094             : 
    3095             :     uint32_t                     totLcuCount, noiseTh;
    3096             : 
    3097             :     uint32_t                     blockIndex;
    3098             : 
    3099           0 :     picNoiseVariance = 0;
    3100           0 :     totLcuCount = 0;
    3101             : 
    3102             :     uint16_t vert64x64Index;
    3103             :     uint16_t horz64x64Index;
    3104             :     uint32_t block64x64X;
    3105             :     uint32_t block64x64Y;
    3106             :     uint32_t vert16x16Index;
    3107             :     uint32_t horz16x16Index;
    3108             :     uint32_t block16x16X;
    3109             :     uint32_t block16x16Y;
    3110             :     uint32_t noiseOriginIndex;
    3111             :     uint32_t lcuCodingOrder;
    3112             : 
    3113             :     // Loop over 64x64 blocks on the downsampled domain (each block would contain 16 LCUs on the full sampled domain)
    3114           0 :     for (vert64x64Index = 0; vert64x64Index < (sixteenth_decimated_picture_ptr->height / 64); vert64x64Index++) {
    3115           0 :         for (horz64x64Index = 0; horz64x64Index < (sixteenth_decimated_picture_ptr->width / 64); horz64x64Index++) {
    3116           0 :             block64x64X = horz64x64Index * 64;
    3117           0 :             block64x64Y = vert64x64Index * 64;
    3118             : 
    3119           0 :             if (block64x64X == 0)
    3120           0 :                 noise_extract_luma_weak(
    3121             :                     sixteenth_decimated_picture_ptr,
    3122             :                     denoised_picture_ptr,
    3123             :                     noise_picture_ptr,
    3124             :                     block64x64Y,
    3125             :                     block64x64X);
    3126             : 
    3127           0 :             if (block64x64Y + BLOCK_SIZE_64 > sixteenth_decimated_picture_ptr->width)
    3128             :             {
    3129           0 :                 noise_extract_luma_weak_c(
    3130             :                     sixteenth_decimated_picture_ptr,
    3131             :                     denoised_picture_ptr,
    3132             :                     noise_picture_ptr,
    3133             :                     block64x64Y,
    3134             :                     block64x64X);
    3135             :             }
    3136             : 
    3137             :             // Loop over 16x16 blocks (i.e, 64x64 blocks in full resolution)
    3138           0 :             for (vert16x16Index = 0; vert16x16Index < 4; vert16x16Index++) {
    3139           0 :                 for (horz16x16Index = 0; horz16x16Index < 4; horz16x16Index++) {
    3140           0 :                     block16x16X = block64x64X + horz16x16Index * 16;
    3141           0 :                     block16x16Y = block64x64Y + vert16x16Index * 16;
    3142             : 
    3143             :                     //do it only for complete 16x16 blocks (i.e, complete 64x64 blocks in full resolution)
    3144           0 :                     if (block16x16X + 16 <= sixteenth_decimated_picture_ptr->width && block16x16Y + 16 <= sixteenth_decimated_picture_ptr->height)
    3145             :                     {
    3146           0 :                         lcuCodingOrder = ((vert64x64Index * 4) + vert16x16Index) * picture_width_in_sb + ((horz64x64Index * 4) + horz16x16Index);
    3147             : 
    3148             :                         uint64_t noiseBlkVar8x8[4], denoiseBlkVar8x8[4];
    3149             : 
    3150           0 :                         noiseOriginIndex = noise_picture_ptr->origin_x + block16x16X + noise_picture_ptr->origin_y * noise_picture_ptr->stride_y;
    3151             : 
    3152           0 :                         uint64_t noiseBlkVar = ComputeVariance16x16(
    3153             :                             noise_picture_ptr,
    3154             :                             noiseOriginIndex,
    3155             :                             noiseBlkVar8x8);
    3156             : 
    3157           0 :                         picNoiseVariance += (noiseBlkVar >> 16);
    3158             : 
    3159           0 :                         blockIndex = (noise_picture_ptr->origin_y + block16x16Y) * noise_picture_ptr->stride_y + noise_picture_ptr->origin_x + block16x16X;
    3160             : 
    3161           0 :                         uint64_t denBlkVar = ComputeVariance16x16(
    3162             :                             denoised_picture_ptr,
    3163             :                             blockIndex,
    3164             :                             denoiseBlkVar8x8) >> 16;
    3165             : 
    3166             :                         uint64_t  noiseBlkVarDecTh;
    3167           0 :                         uint64_t denBlkVarDecTh = FLAT_MAX_VAR_DECIM;
    3168             : 
    3169           0 :                         noiseBlkVarDecTh = NOISE_MIN_LEVEL_DECIM_M6_M7;
    3170           0 :                         if (denBlkVar < denBlkVarDecTh && noiseBlkVar> noiseBlkVarDecTh)
    3171           0 :                             picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 1;
    3172           0 :                         totLcuCount++;
    3173             :                     }
    3174             :                 }
    3175             :             }
    3176             :         }
    3177             :     }
    3178             : 
    3179           0 :     if (totLcuCount > 0) {
    3180           0 :         context_ptr->pic_noise_variance_float = (double)picNoiseVariance / (double)totLcuCount;
    3181           0 :         picNoiseVariance = picNoiseVariance / totLcuCount;
    3182             :     }
    3183             : 
    3184             :     //the variance of a 64x64 noise area tends to be bigger for small resolutions.
    3185           0 :     if (sequence_control_set_ptr->seq_header.max_frame_height <= 720)
    3186           0 :         noiseTh = 25;
    3187           0 :     else if (sequence_control_set_ptr->seq_header.max_frame_height <= 1080)
    3188           0 :         noiseTh = 10;
    3189             :     else
    3190           0 :         noiseTh = 0;
    3191             : 
    3192             :     //look for extreme noise or big enough flat noisy area to be denoised.
    3193           0 :     if (picNoiseVariance >= 55 + noiseTh)
    3194           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_3_1; //Noise+Edge information is too big, so may be this is all noise (action: frame based denoising)
    3195           0 :     else if (picNoiseVariance >= 10 + noiseTh)
    3196           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_3;   //Noise+Edge information is big enough, so there is no big enough flat noisy area (action : no denoising)
    3197           0 :     else if (picNoiseVariance >= 5 + noiseTh)
    3198           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_2;   //Noise+Edge information is relatively small, so there might be a big enough flat noisy area(action : denoising only for FN blocks)
    3199             :     else
    3200           0 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_1;   //Noise+Edge information is very small, so no noise nor edge area (action : no denoising)
    3201             : 
    3202           0 :     return return_error;
    3203             : }
    3204             : 
    3205           0 : EbErrorType QuarterSampleDenoise(
    3206             :     PictureAnalysisContext    *context_ptr,
    3207             :     SequenceControlSet        *sequence_control_set_ptr,
    3208             :     PictureParentControlSet   *picture_control_set_ptr,
    3209             :     EbPictureBufferDesc        *quarter_decimated_picture_ptr,
    3210             :     uint32_t                       sb_total_count,
    3211             :     EbBool                      denoise_flag,
    3212             :     uint32_t                         picture_width_in_sb)
    3213             : {
    3214           0 :     EbErrorType return_error = EB_ErrorNone;
    3215             : 
    3216             :     uint32_t                     lcuCodingOrder;
    3217           0 :     EbPictureBufferDesc    *input_picture_ptr = picture_control_set_ptr->enhanced_picture_ptr;
    3218           0 :     EbPictureBufferDesc    *denoised_picture_ptr = context_ptr->denoised_picture_ptr;
    3219           0 :     EbPictureBufferDesc    *noise_picture_ptr = context_ptr->noise_picture_ptr;
    3220             : 
    3221             :     //Reset the flat noise flag array to False for both RealTime/HighComplexity Modes
    3222           0 :     for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder)
    3223           0 :         picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 0;
    3224           0 :     picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_INV; //this init is for both REAL-TIME and BEST-QUALITY
    3225             : 
    3226           0 :     decimation_2d(
    3227           0 :         &input_picture_ptr->buffer_y[input_picture_ptr->origin_x + input_picture_ptr->origin_y * input_picture_ptr->stride_y],
    3228           0 :         input_picture_ptr->stride_y,
    3229           0 :         input_picture_ptr->width,
    3230           0 :         input_picture_ptr->height,
    3231           0 :         &quarter_decimated_picture_ptr->buffer_y[quarter_decimated_picture_ptr->origin_x + (quarter_decimated_picture_ptr->origin_y * quarter_decimated_picture_ptr->stride_y)],
    3232           0 :         quarter_decimated_picture_ptr->stride_y,
    3233             :         2);
    3234             : 
    3235           0 :     QuarterSampleDetectNoise(
    3236             :         context_ptr,
    3237             :         picture_control_set_ptr,
    3238             :         quarter_decimated_picture_ptr,
    3239             :         noise_picture_ptr,
    3240             :         denoised_picture_ptr,
    3241             :         picture_width_in_sb);
    3242             : 
    3243           0 :     if (denoise_flag == EB_TRUE) {
    3244             :         // Turn OFF the de-noiser for Class 2 at QP=29 and lower (for Fixed_QP) and at the target rate of 14Mbps and higher (for RC=ON)
    3245           0 :         if ((picture_control_set_ptr->pic_noise_class == PIC_NOISE_CLASS_3_1) ||
    3246           0 :             ((picture_control_set_ptr->pic_noise_class == PIC_NOISE_CLASS_2) && ((sequence_control_set_ptr->static_config.rate_control_mode == 0 && sequence_control_set_ptr->qp > DENOISER_QP_TH) || (sequence_control_set_ptr->static_config.rate_control_mode != 0 && sequence_control_set_ptr->static_config.target_bit_rate < DENOISER_BITRATE_TH)))) {
    3247           0 :             SubSampleFilterNoise(
    3248             :                 sequence_control_set_ptr,
    3249             :                 picture_control_set_ptr,
    3250             :                 sb_total_count,
    3251             :                 input_picture_ptr,
    3252             :                 noise_picture_ptr,
    3253             :                 denoised_picture_ptr,
    3254             :                 picture_width_in_sb);
    3255             :         }
    3256             :     }
    3257             : 
    3258           0 :     return return_error;
    3259             : }
    3260             : 
    3261           0 : EbErrorType SubSampleDenoise(
    3262             :     PictureAnalysisContext    *context_ptr,
    3263             :     SequenceControlSet        *sequence_control_set_ptr,
    3264             :     PictureParentControlSet   *picture_control_set_ptr,
    3265             :     EbPictureBufferDesc        *sixteenth_decimated_picture_ptr,
    3266             :     uint32_t                       sb_total_count,
    3267             :     EbBool                      denoise_flag,
    3268             :     uint32_t                         picture_width_in_sb)
    3269             : {
    3270           0 :     EbErrorType return_error = EB_ErrorNone;
    3271             : 
    3272             :     uint32_t                     lcuCodingOrder;
    3273           0 :     EbPictureBufferDesc    *input_picture_ptr = picture_control_set_ptr->enhanced_picture_ptr;
    3274           0 :     EbPictureBufferDesc    *denoised_picture_ptr = context_ptr->denoised_picture_ptr;
    3275           0 :     EbPictureBufferDesc    *noise_picture_ptr = context_ptr->noise_picture_ptr;
    3276             : 
    3277             :     //Reset the flat noise flag array to False for both RealTime/HighComplexity Modes
    3278           0 :     for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder)
    3279           0 :         picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 0;
    3280           0 :     picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_INV; //this init is for both REAL-TIME and BEST-QUALITY
    3281             : 
    3282           0 :     decimation_2d(
    3283           0 :         &input_picture_ptr->buffer_y[input_picture_ptr->origin_x + input_picture_ptr->origin_y * input_picture_ptr->stride_y],
    3284           0 :         input_picture_ptr->stride_y,
    3285           0 :         input_picture_ptr->width,
    3286           0 :         input_picture_ptr->height,
    3287           0 :         &sixteenth_decimated_picture_ptr->buffer_y[sixteenth_decimated_picture_ptr->origin_x + (sixteenth_decimated_picture_ptr->origin_y * sixteenth_decimated_picture_ptr->stride_y)],
    3288           0 :         sixteenth_decimated_picture_ptr->stride_y,
    3289             :         4);
    3290             : 
    3291           0 :     SubSampleDetectNoise(
    3292             :         context_ptr,
    3293             :         sequence_control_set_ptr,
    3294             :         picture_control_set_ptr,
    3295             :         sixteenth_decimated_picture_ptr,
    3296             :         noise_picture_ptr,
    3297             :         denoised_picture_ptr,
    3298             :         picture_width_in_sb);
    3299             : 
    3300           0 :     if (denoise_flag == EB_TRUE) {
    3301             :         // Turn OFF the de-noiser for Class 2 at QP=29 and lower (for Fixed_QP) and at the target rate of 14Mbps and higher (for RC=ON)
    3302           0 :         if ((picture_control_set_ptr->pic_noise_class == PIC_NOISE_CLASS_3_1) ||
    3303           0 :             ((picture_control_set_ptr->pic_noise_class == PIC_NOISE_CLASS_2) && ((sequence_control_set_ptr->static_config.rate_control_mode == 0 && sequence_control_set_ptr->qp > DENOISER_QP_TH) || (sequence_control_set_ptr->static_config.rate_control_mode != 0 && sequence_control_set_ptr->static_config.target_bit_rate < DENOISER_BITRATE_TH)))) {
    3304           0 :             SubSampleFilterNoise(
    3305             :                 sequence_control_set_ptr,
    3306             :                 picture_control_set_ptr,
    3307             :                 sb_total_count,
    3308             :                 input_picture_ptr,
    3309             :                 noise_picture_ptr,
    3310             :                 denoised_picture_ptr,
    3311             :                 picture_width_in_sb);
    3312             :         }
    3313             :     }
    3314             : 
    3315           0 :     return return_error;
    3316             : }
    3317             : 
    3318             : /************************************************
    3319             :  * Set Picture Parameters based on input configuration
    3320             :  ** Setting Number of regions per resolution
    3321             :  ** Setting width and height for subpicture and when picture scan type is 1
    3322             :  ************************************************/
    3323         117 : void SetPictureParametersForStatisticsGathering(
    3324             :     SequenceControlSet            *sequence_control_set_ptr
    3325             : )
    3326             : {
    3327         117 :     sequence_control_set_ptr->picture_analysis_number_of_regions_per_width = HIGHER_THAN_CLASS_1_REGION_SPLIT_PER_WIDTH;
    3328         117 :     sequence_control_set_ptr->picture_analysis_number_of_regions_per_height = HIGHER_THAN_CLASS_1_REGION_SPLIT_PER_HEIGHT;
    3329             : 
    3330         117 :     return;
    3331             : }
    3332             : /************************************************
    3333             :  * Picture Pre Processing Operations *
    3334             :  *** A function that groups all of the Pre proceesing
    3335             :  * operations performed on the input picture
    3336             :  *** Operations included at this point:
    3337             :  ***** Borders preprocessing
    3338             :  ***** Denoising
    3339             :  ************************************************/
    3340         118 : void PicturePreProcessingOperations(
    3341             :     PictureParentControlSet       *picture_control_set_ptr,
    3342             :     SequenceControlSet            *sequence_control_set_ptr,
    3343             :     uint32_t                       sb_total_count)
    3344             : {
    3345         118 :     if (sequence_control_set_ptr->film_grain_denoise_strength) {
    3346           0 :         denoise_estimate_film_grain(
    3347             :             sequence_control_set_ptr,
    3348             :             picture_control_set_ptr);
    3349             :     }
    3350             :     else {
    3351             :         //Reset the flat noise flag array to False for both RealTime/HighComplexity Modes
    3352        7160 :         for (uint32_t lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder)
    3353        7042 :             picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 0;
    3354         118 :         picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_INV; //this init is for both REAL-TIME and BEST-QUALITY
    3355             :     }
    3356         119 :     return;
    3357             : }
    3358             : 
    3359             : /**************************************************************
    3360             : * Generate picture histogram bins for YUV pixel intensity *
    3361             : * Calculation is done on a region based (Set previously, resolution dependent)
    3362             : **************************************************************/
    3363         120 : void SubSampleLumaGeneratePixelIntensityHistogramBins(
    3364             :     SequenceControlSet            *sequence_control_set_ptr,
    3365             :     PictureParentControlSet       *picture_control_set_ptr,
    3366             :     EbPictureBufferDesc           *input_picture_ptr,
    3367             :     uint64_t                          *sumAverageIntensityTotalRegionsLuma) {
    3368             :     uint32_t                          regionWidth;
    3369             :     uint32_t                          regionHeight;
    3370             :     uint32_t                          regionWidthOffset;
    3371             :     uint32_t                          regionHeightOffset;
    3372             :     uint32_t                          regionInPictureWidthIndex;
    3373             :     uint32_t                          regionInPictureHeightIndex;
    3374             :     uint32_t                            histogramBin;
    3375             :     uint64_t                          sum;
    3376             : 
    3377         120 :     regionWidth = input_picture_ptr->width / sequence_control_set_ptr->picture_analysis_number_of_regions_per_width;
    3378         120 :     regionHeight = input_picture_ptr->height / sequence_control_set_ptr->picture_analysis_number_of_regions_per_height;
    3379             : 
    3380             :     // Loop over regions inside the picture
    3381         599 :     for (regionInPictureWidthIndex = 0; regionInPictureWidthIndex < sequence_control_set_ptr->picture_analysis_number_of_regions_per_width; regionInPictureWidthIndex++) {  // loop over horizontal regions
    3382        2398 :         for (regionInPictureHeightIndex = 0; regionInPictureHeightIndex < sequence_control_set_ptr->picture_analysis_number_of_regions_per_height; regionInPictureHeightIndex++) { // loop over vertical regions
    3383             : 
    3384             :             // Initialize bins to 1
    3385        1919 :             initialize_buffer_32bits(picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][0], 64, 0, 1);
    3386             : 
    3387        3840 :             regionWidthOffset = (regionInPictureWidthIndex == sequence_control_set_ptr->picture_analysis_number_of_regions_per_width - 1) ?
    3388        1920 :                 input_picture_ptr->width - (sequence_control_set_ptr->picture_analysis_number_of_regions_per_width * regionWidth) :
    3389             :                 0;
    3390             : 
    3391        3840 :             regionHeightOffset = (regionInPictureHeightIndex == sequence_control_set_ptr->picture_analysis_number_of_regions_per_height - 1) ?
    3392        1920 :                 input_picture_ptr->height - (sequence_control_set_ptr->picture_analysis_number_of_regions_per_height * regionHeight) :
    3393             :                 0;
    3394             : 
    3395             :             // Y Histogram
    3396        1920 :             CalculateHistogram(
    3397        1920 :                 &input_picture_ptr->buffer_y[(input_picture_ptr->origin_x + regionInPictureWidthIndex * regionWidth) + ((input_picture_ptr->origin_y + regionInPictureHeightIndex * regionHeight) * input_picture_ptr->stride_y)],
    3398             :                 regionWidth + regionWidthOffset,
    3399             :                 regionHeight + regionHeightOffset,
    3400        1920 :                 input_picture_ptr->stride_y,
    3401             :                 1,
    3402        1920 :                 picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][0],
    3403             :                 &sum);
    3404             : 
    3405        1918 :             picture_control_set_ptr->average_intensity_per_region[regionInPictureWidthIndex][regionInPictureHeightIndex][0] = (uint8_t)((sum + (((regionWidth + regionWidthOffset)*(regionHeight + regionHeightOffset)) >> 1)) / ((regionWidth + regionWidthOffset)*(regionHeight + regionHeightOffset)));
    3406        1918 :             (*sumAverageIntensityTotalRegionsLuma) += (sum << 4);
    3407      487256 :             for (histogramBin = 0; histogramBin < HISTOGRAM_NUMBER_OF_BINS; histogramBin++) { // Loop over the histogram bins
    3408      485338 :                 picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][0][histogramBin] =
    3409      485338 :                     picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][0][histogramBin] << 4;
    3410             :             }
    3411             :         }
    3412             :     }
    3413             : 
    3414         119 :     return;
    3415             : }
    3416             : 
    3417         120 : void SubSampleChromaGeneratePixelIntensityHistogramBins(
    3418             :     SequenceControlSet            *sequence_control_set_ptr,
    3419             :     PictureParentControlSet       *picture_control_set_ptr,
    3420             :     EbPictureBufferDesc           *input_picture_ptr,
    3421             :     uint64_t                          *sumAverageIntensityTotalRegionsCb,
    3422             :     uint64_t                          *sumAverageIntensityTotalRegionsCr) {
    3423             :     uint64_t                          sum;
    3424             :     uint32_t                          regionWidth;
    3425             :     uint32_t                          regionHeight;
    3426             :     uint32_t                          regionWidthOffset;
    3427             :     uint32_t                          regionHeightOffset;
    3428             :     uint32_t                          regionInPictureWidthIndex;
    3429             :     uint32_t                          regionInPictureHeightIndex;
    3430             : 
    3431             :     uint16_t                          histogramBin;
    3432         120 :     uint8_t                           decim_step = 4;
    3433             : 
    3434         120 :     regionWidth = input_picture_ptr->width / sequence_control_set_ptr->picture_analysis_number_of_regions_per_width;
    3435         120 :     regionHeight = input_picture_ptr->height / sequence_control_set_ptr->picture_analysis_number_of_regions_per_height;
    3436             : 
    3437             :     // Loop over regions inside the picture
    3438         599 :     for (regionInPictureWidthIndex = 0; regionInPictureWidthIndex < sequence_control_set_ptr->picture_analysis_number_of_regions_per_width; regionInPictureWidthIndex++) {  // loop over horizontal regions
    3439        2399 :         for (regionInPictureHeightIndex = 0; regionInPictureHeightIndex < sequence_control_set_ptr->picture_analysis_number_of_regions_per_height; regionInPictureHeightIndex++) { // loop over vertical regions
    3440             : 
    3441             :             // Initialize bins to 1
    3442        1920 :             initialize_buffer_32bits(picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][1], 64, 0, 1);
    3443        1920 :             initialize_buffer_32bits(picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][2], 64, 0, 1);
    3444             : 
    3445        3838 :             regionWidthOffset = (regionInPictureWidthIndex == sequence_control_set_ptr->picture_analysis_number_of_regions_per_width - 1) ?
    3446        1919 :                 input_picture_ptr->width - (sequence_control_set_ptr->picture_analysis_number_of_regions_per_width * regionWidth) :
    3447             :                 0;
    3448             : 
    3449        3838 :             regionHeightOffset = (regionInPictureHeightIndex == sequence_control_set_ptr->picture_analysis_number_of_regions_per_height - 1) ?
    3450        1919 :                 input_picture_ptr->height - (sequence_control_set_ptr->picture_analysis_number_of_regions_per_height * regionHeight) :
    3451             :                 0;
    3452             : 
    3453             :             // U Histogram
    3454        1919 :             CalculateHistogram(
    3455        1919 :                 &input_picture_ptr->buffer_cb[((input_picture_ptr->origin_x + regionInPictureWidthIndex * regionWidth) >> 1) + (((input_picture_ptr->origin_y + regionInPictureHeightIndex * regionHeight) >> 1) * input_picture_ptr->stride_cb)],
    3456        1919 :                 (regionWidth + regionWidthOffset) >> 1,
    3457        1919 :                 (regionHeight + regionHeightOffset) >> 1,
    3458        1919 :                 input_picture_ptr->stride_cb,
    3459             :                 decim_step,
    3460        1919 :                 picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][1],
    3461             :                 &sum);
    3462             : 
    3463        1919 :             sum = (sum << decim_step);
    3464        1919 :             *sumAverageIntensityTotalRegionsCb += sum;
    3465        1919 :             picture_control_set_ptr->average_intensity_per_region[regionInPictureWidthIndex][regionInPictureHeightIndex][1] = (uint8_t)((sum + (((regionWidth + regionWidthOffset)*(regionHeight + regionHeightOffset)) >> 3)) / (((regionWidth + regionWidthOffset)*(regionHeight + regionHeightOffset)) >> 2));
    3466             : 
    3467      485917 :             for (histogramBin = 0; histogramBin < HISTOGRAM_NUMBER_OF_BINS; histogramBin++) { // Loop over the histogram bins
    3468      483998 :                 picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][1][histogramBin] =
    3469      483998 :                     picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][1][histogramBin] << decim_step;
    3470             :             }
    3471             : 
    3472             :             // V Histogram
    3473        1919 :             CalculateHistogram(
    3474        1919 :                 &input_picture_ptr->buffer_cr[((input_picture_ptr->origin_x + regionInPictureWidthIndex * regionWidth) >> 1) + (((input_picture_ptr->origin_y + regionInPictureHeightIndex * regionHeight) >> 1) * input_picture_ptr->stride_cr)],
    3475        1919 :                 (regionWidth + regionWidthOffset) >> 1,
    3476        1919 :                 (regionHeight + regionHeightOffset) >> 1,
    3477        1919 :                 input_picture_ptr->stride_cr,
    3478             :                 decim_step,
    3479        1919 :                 picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][2],
    3480             :                 &sum);
    3481             : 
    3482        1919 :             sum = (sum << decim_step);
    3483        1919 :             *sumAverageIntensityTotalRegionsCr += sum;
    3484        1919 :             picture_control_set_ptr->average_intensity_per_region[regionInPictureWidthIndex][regionInPictureHeightIndex][2] = (uint8_t)((sum + (((regionWidth + regionWidthOffset)*(regionHeight + regionHeightOffset)) >> 3)) / (((regionWidth + regionWidthOffset)*(regionHeight + regionHeightOffset)) >> 2));
    3485             : 
    3486      486504 :             for (histogramBin = 0; histogramBin < HISTOGRAM_NUMBER_OF_BINS; histogramBin++) { // Loop over the histogram bins
    3487      484585 :                 picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][2][histogramBin] =
    3488      484585 :                     picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][2][histogramBin] << decim_step;
    3489             :             }
    3490             :         }
    3491             :     }
    3492         119 :     return;
    3493             : }
    3494             : 
    3495         120 : void EdgeDetectionMeanLumaChroma16x16(
    3496             :     SequenceControlSet        *sequence_control_set_ptr,
    3497             :     PictureParentControlSet   *picture_control_set_ptr,
    3498             :     uint32_t                       totalLcuCount)
    3499             : {
    3500             :     uint32_t               sb_index;
    3501             : 
    3502         120 :     uint32_t maxGrad = 1;
    3503             : 
    3504             :     // The values are calculated for every 4th frame
    3505         120 :     if ((picture_control_set_ptr->picture_number & 3) == 0) {
    3506        1830 :         for (sb_index = 0; sb_index < totalLcuCount; sb_index++) {
    3507        1800 :             SbStat *sb_stat_ptr = &picture_control_set_ptr->sb_stat_array[sb_index];
    3508             : 
    3509        1800 :             EB_MEMSET(sb_stat_ptr, 0, sizeof(SbStat));
    3510        1800 :             SbParams     *sb_params = &sequence_control_set_ptr->sb_params_array[sb_index];
    3511        1800 :             if (sb_params->potential_logo_sb &&sb_params->is_complete_sb)
    3512             : 
    3513             :             {
    3514         660 :                 uint8_t *y_mean_ptr = picture_control_set_ptr->y_mean[sb_index];
    3515         660 :                 uint8_t *cr_mean_ptr = picture_control_set_ptr->crMean[sb_index];
    3516         660 :                 uint8_t *cb_mean_ptr = picture_control_set_ptr->cbMean[sb_index];
    3517             : 
    3518             :                 uint8_t rasterScanCuIndex;
    3519             : 
    3520       11220 :                 for (rasterScanCuIndex = RASTER_SCAN_CU_INDEX_16x16_0; rasterScanCuIndex <= RASTER_SCAN_CU_INDEX_16x16_15; rasterScanCuIndex++) {
    3521       10560 :                     uint8_t cu_index = rasterScanCuIndex - 5;
    3522       10560 :                     uint8_t x = cu_index & 3;
    3523       10560 :                     uint8_t y = (cu_index >> 2);
    3524       10560 :                     int32_t gradx = 0;
    3525       10560 :                     int32_t grady = 0;
    3526       10560 :                     int32_t nbcompx = 0;
    3527       10560 :                     int32_t nbcompy = 0;
    3528       10560 :                     if (x != 0)
    3529             :                     {
    3530        7920 :                         gradx += ABS((int32_t)(y_mean_ptr[rasterScanCuIndex]) - (int32_t)(y_mean_ptr[rasterScanCuIndex - 1]));
    3531        7920 :                         gradx += ABS((int32_t)(cr_mean_ptr[rasterScanCuIndex]) - (int32_t)(cr_mean_ptr[rasterScanCuIndex - 1]));
    3532        7920 :                         gradx += ABS((int32_t)(cb_mean_ptr[rasterScanCuIndex]) - (int32_t)(cb_mean_ptr[rasterScanCuIndex - 1]));
    3533        7920 :                         nbcompx++;
    3534             :                     }
    3535       10560 :                     if (x != 3)
    3536             :                     {
    3537        7920 :                         gradx += ABS((int32_t)(y_mean_ptr[rasterScanCuIndex + 1]) - (int32_t)(y_mean_ptr[rasterScanCuIndex]));
    3538        7920 :                         gradx += ABS((int32_t)(cr_mean_ptr[rasterScanCuIndex + 1]) - (int32_t)(cr_mean_ptr[rasterScanCuIndex]));
    3539        7920 :                         gradx += ABS((int32_t)(cb_mean_ptr[rasterScanCuIndex + 1]) - (int32_t)(cb_mean_ptr[rasterScanCuIndex]));
    3540        7920 :                         nbcompx++;
    3541             :                     }
    3542       10560 :                     gradx = gradx / nbcompx;
    3543             : 
    3544       10560 :                     if (y != 0)
    3545             :                     {
    3546        7920 :                         grady += ABS((int32_t)(y_mean_ptr[rasterScanCuIndex]) - (int32_t)(y_mean_ptr[rasterScanCuIndex - 4]));
    3547        7920 :                         grady += ABS((int32_t)(cr_mean_ptr[rasterScanCuIndex]) - (int32_t)(cr_mean_ptr[rasterScanCuIndex - 4]));
    3548        7920 :                         grady += ABS((int32_t)(cb_mean_ptr[rasterScanCuIndex]) - (int32_t)(cb_mean_ptr[rasterScanCuIndex - 4]));
    3549        7920 :                         nbcompy++;
    3550             :                     }
    3551       10560 :                     if (y != 3)
    3552             :                     {
    3553        7920 :                         grady += ABS((int32_t)(y_mean_ptr[rasterScanCuIndex + 4]) - (int32_t)(y_mean_ptr[rasterScanCuIndex]));
    3554        7920 :                         grady += ABS((int32_t)(cr_mean_ptr[rasterScanCuIndex + 4]) - (int32_t)(cr_mean_ptr[rasterScanCuIndex]));
    3555        7920 :                         grady += ABS((int32_t)(cb_mean_ptr[rasterScanCuIndex + 4]) - (int32_t)(cb_mean_ptr[rasterScanCuIndex]));
    3556             : 
    3557        7920 :                         nbcompy++;
    3558             :                     }
    3559             : 
    3560       10560 :                     grady = grady / nbcompy;
    3561       10560 :                     sb_stat_ptr->cu_stat_array[rasterScanCuIndex].grad = (uint32_t)ABS(gradx) + ABS(grady);
    3562       10560 :                     if (sb_stat_ptr->cu_stat_array[rasterScanCuIndex].grad > maxGrad)
    3563         222 :                         maxGrad = sb_stat_ptr->cu_stat_array[rasterScanCuIndex].grad;
    3564             :                 }
    3565             :             }
    3566             :         }
    3567             : 
    3568        1830 :         for (sb_index = 0; sb_index < totalLcuCount; sb_index++) {
    3569        1800 :             SbParams *sb_params = &sequence_control_set_ptr->sb_params_array[sb_index];
    3570        1800 :             if (sb_params->potential_logo_sb &&sb_params->is_complete_sb) {
    3571         660 :                 SbStat *sb_stat_ptr = &picture_control_set_ptr->sb_stat_array[sb_index];
    3572             : 
    3573             :                 uint32_t rasterScanCuIndex;
    3574       11220 :                 for (rasterScanCuIndex = RASTER_SCAN_CU_INDEX_16x16_0; rasterScanCuIndex <= RASTER_SCAN_CU_INDEX_16x16_15; rasterScanCuIndex++)
    3575       10560 :                     sb_stat_ptr->cu_stat_array[rasterScanCuIndex].edge_cu = (uint16_t)MIN(((sb_stat_ptr->cu_stat_array[rasterScanCuIndex].grad * (255 * 3)) / maxGrad), 255) < 30 ? 0 : 1;
    3576             :             }
    3577             :         }
    3578             :     }
    3579             :     else {
    3580        5487 :         for (sb_index = 0; sb_index < totalLcuCount; sb_index++) {
    3581        5397 :             SbStat *sb_stat_ptr = &picture_control_set_ptr->sb_stat_array[sb_index];
    3582             : 
    3583        5397 :             EB_MEMSET(sb_stat_ptr, 0, sizeof(SbStat));
    3584             :         }
    3585             :     }
    3586         120 : }
    3587             : 
    3588             : /******************************************************
    3589             : * Edge map derivation
    3590             : ******************************************************/
    3591         120 : void EdgeDetection(
    3592             :     SequenceControlSet            *sequence_control_set_ptr,
    3593             :     PictureParentControlSet       *picture_control_set_ptr)
    3594             : {
    3595             :     uint16_t  *variancePtr;
    3596         120 :     uint32_t sb_total_count = picture_control_set_ptr->sb_total_count;
    3597         120 :     uint64_t thrsldLevel0 = (picture_control_set_ptr->pic_avg_variance * 70) / 100;
    3598             :     uint8_t  *meanPtr;
    3599         120 :     uint32_t picture_width_in_sb = (sequence_control_set_ptr->seq_header.max_frame_width + sequence_control_set_ptr->sb_sz - 1) / sequence_control_set_ptr->sb_sz;
    3600         120 :     uint32_t picture_height_in_sb = (sequence_control_set_ptr->seq_header.max_frame_height + sequence_control_set_ptr->sb_sz - 1) / sequence_control_set_ptr->sb_sz;
    3601         120 :     uint32_t neighbourLcuIndex = 0;
    3602         120 :     uint64_t similarityCount = 0;
    3603         120 :     uint64_t similarityCount0 = 0;
    3604         120 :     uint64_t similarityCount1 = 0;
    3605         120 :     uint64_t similarityCount2 = 0;
    3606         120 :     uint64_t similarityCount3 = 0;
    3607         120 :     uint32_t sb_x = 0;
    3608         120 :     uint32_t sb_y = 0;
    3609             :     uint32_t sb_index;
    3610             :     EbBool highVarianceLucFlag;
    3611             : 
    3612         120 :     uint32_t rasterScanCuIndex = 0;
    3613         120 :     uint32_t numberOfEdgeLcu = 0;
    3614             :     EbBool highIntensityLcuFlag;
    3615             : 
    3616             :     uint64_t neighbourLcuMean;
    3617             :     int32_t i, j;
    3618             : 
    3619         120 :     uint8_t highIntensityTh = 180;
    3620         120 :     uint8_t lowIntensityTh = 120;
    3621         120 :     uint8_t highIntensityTh1 = 200;
    3622         120 :     uint8_t veryLowIntensityTh = 20;
    3623             : 
    3624        7308 :     for (sb_index = 0; sb_index < sb_total_count; ++sb_index) {
    3625        7188 :         sb_x = sb_index % picture_width_in_sb;
    3626        7188 :         sb_y = sb_index / picture_width_in_sb;
    3627             : 
    3628        7188 :         EdgeLcuResults *edge_results_ptr = picture_control_set_ptr->edge_results_ptr;
    3629        7188 :         picture_control_set_ptr->edge_results_ptr[sb_index].edge_block_num = 0;
    3630        7188 :         picture_control_set_ptr->edge_results_ptr[sb_index].isolated_high_intensity_sb = 0;
    3631        7188 :         picture_control_set_ptr->sharp_edge_sb_flag[sb_index] = 0;
    3632             : 
    3633        7188 :         if (sb_x > 0 && sb_x < (uint32_t)(picture_width_in_sb - 1) && sb_y >  0 && sb_y < (uint32_t)(picture_height_in_sb - 1)) {
    3634        3838 :             variancePtr = picture_control_set_ptr->variance[sb_index];
    3635        3838 :             meanPtr = picture_control_set_ptr->y_mean[sb_index];
    3636             : 
    3637        3838 :             similarityCount = 0;
    3638             : 
    3639        3838 :             highVarianceLucFlag =
    3640        3838 :                 (variancePtr[RASTER_SCAN_CU_INDEX_64x64] > thrsldLevel0) ? EB_TRUE : EB_FALSE;
    3641        3838 :             edge_results_ptr[sb_index].edge_block_num = highVarianceLucFlag;
    3642        3838 :             if (variancePtr[0] > highIntensityTh1) {
    3643        1945 :                 uint8_t sharpEdge = 0;
    3644       33014 :                 for (rasterScanCuIndex = RASTER_SCAN_CU_INDEX_16x16_0; rasterScanCuIndex <= RASTER_SCAN_CU_INDEX_16x16_15; rasterScanCuIndex++)
    3645       31069 :                     sharpEdge = (variancePtr[rasterScanCuIndex] < veryLowIntensityTh) ? sharpEdge + 1 : sharpEdge;
    3646        1945 :                 if (sharpEdge > 4)
    3647         424 :                     picture_control_set_ptr->sharp_edge_sb_flag[sb_index] = 1;
    3648             :             }
    3649             : 
    3650        3838 :             if (sb_x > 3 && sb_x < (uint32_t)(picture_width_in_sb - 4) && sb_y >  3 && sb_y < (uint32_t)(picture_height_in_sb - 4)) {
    3651           0 :                 highIntensityLcuFlag =
    3652           0 :                     (meanPtr[RASTER_SCAN_CU_INDEX_64x64] > highIntensityTh) ? EB_TRUE : EB_FALSE;
    3653             : 
    3654           0 :                 if (highIntensityLcuFlag) {
    3655           0 :                     neighbourLcuIndex = sb_index - 1;
    3656           0 :                     neighbourLcuMean = picture_control_set_ptr->y_mean[neighbourLcuIndex][RASTER_SCAN_CU_INDEX_64x64];
    3657             : 
    3658           0 :                     similarityCount0 = (neighbourLcuMean < lowIntensityTh) ? 1 : 0;
    3659             : 
    3660           0 :                     neighbourLcuIndex = sb_index + 1;
    3661             : 
    3662           0 :                     neighbourLcuMean = picture_control_set_ptr->y_mean[neighbourLcuIndex][RASTER_SCAN_CU_INDEX_64x64];
    3663           0 :                     similarityCount1 = (neighbourLcuMean < lowIntensityTh) ? 1 : 0;
    3664             : 
    3665           0 :                     neighbourLcuIndex = sb_index - picture_width_in_sb;
    3666           0 :                     neighbourLcuMean = picture_control_set_ptr->y_mean[neighbourLcuIndex][RASTER_SCAN_CU_INDEX_64x64];
    3667           0 :                     similarityCount2 = (neighbourLcuMean < lowIntensityTh) ? 1 : 0;
    3668             : 
    3669           0 :                     neighbourLcuIndex = sb_index + picture_width_in_sb;
    3670           0 :                     neighbourLcuMean = picture_control_set_ptr->y_mean[neighbourLcuIndex][RASTER_SCAN_CU_INDEX_64x64];
    3671           0 :                     similarityCount3 = (neighbourLcuMean < lowIntensityTh) ? 1 : 0;
    3672             : 
    3673           0 :                     similarityCount = similarityCount0 + similarityCount1 + similarityCount2 + similarityCount3;
    3674             : 
    3675           0 :                     if (similarityCount > 0) {
    3676           0 :                         for (i = -4; i < 5; i++) {
    3677           0 :                             for (j = -4; j < 5; j++) {
    3678           0 :                                 neighbourLcuIndex = sb_index + (i * picture_width_in_sb) + j;
    3679           0 :                                 picture_control_set_ptr->edge_results_ptr[neighbourLcuIndex].isolated_high_intensity_sb = 1;
    3680             :                             }
    3681             :                         }
    3682             :                     }
    3683             :                 }
    3684             :             }
    3685             : 
    3686        3838 :             if (highVarianceLucFlag)
    3687        1567 :                 numberOfEdgeLcu += edge_results_ptr[sb_index].edge_block_num;
    3688             :         }
    3689             :     }
    3690         120 :     return;
    3691             : }
    3692             : 
    3693             : /******************************************************
    3694             : * Calculate the variance of variance to determine Homogeneous regions. Note: Variance calculation should be on.
    3695             : ******************************************************/
    3696         120 : void DetermineHomogeneousRegionInPicture(
    3697             :     SequenceControlSet            *sequence_control_set_ptr,
    3698             :     PictureParentControlSet       *picture_control_set_ptr)
    3699             : {
    3700             :     uint16_t  *variancePtr;
    3701             :     uint32_t sb_index;
    3702         120 :     uint64_t nullVarCnt = 0;
    3703         120 :     uint64_t veryLowVarCnt = 0;
    3704         120 :     uint64_t varLcuCnt = 0;
    3705         120 :     uint32_t sb_total_count = picture_control_set_ptr->sb_total_count;
    3706             : 
    3707        7315 :     for (sb_index = 0; sb_index < sb_total_count; ++sb_index) {
    3708        7195 :         SbParams sb_params = sequence_control_set_ptr->sb_params_array[sb_index];
    3709        7195 :         variancePtr = picture_control_set_ptr->variance[sb_index];
    3710             : 
    3711        7195 :         if (sb_params.is_complete_sb) {
    3712        5999 :             nullVarCnt += (variancePtr[ME_TIER_ZERO_PU_64x64] == 0) ? 1 : 0;
    3713             : 
    3714        5999 :             varLcuCnt++;
    3715             : 
    3716        5999 :             veryLowVarCnt += ((variancePtr[ME_TIER_ZERO_PU_64x64]) < LCU_LOW_VAR_TH) ? 1 : 0;
    3717             :         }
    3718             :     }
    3719         120 :     picture_control_set_ptr->very_low_var_pic_flag = EB_FALSE;
    3720         120 :     if ((varLcuCnt > 0) && (((veryLowVarCnt * 100) / varLcuCnt) > PIC_LOW_VAR_PERCENTAGE_TH))
    3721           0 :         picture_control_set_ptr->very_low_var_pic_flag = EB_TRUE;
    3722         120 :     picture_control_set_ptr->logo_pic_flag = EB_FALSE;
    3723         120 :     if ((varLcuCnt > 0) && (((veryLowVarCnt * 100) / varLcuCnt) > 80))
    3724           0 :         picture_control_set_ptr->logo_pic_flag = EB_TRUE;
    3725         120 :     return;
    3726             : }
    3727             : /************************************************
    3728             :  * ComputePictureSpatialStatistics
    3729             :  ** Compute Block Variance
    3730             :  ** Compute Picture Variance
    3731             :  ** Compute Block Mean for all blocks in the picture
    3732             :  ************************************************/
    3733         120 : void ComputePictureSpatialStatistics(
    3734             :     SequenceControlSet            *sequence_control_set_ptr,
    3735             :     PictureParentControlSet       *picture_control_set_ptr,
    3736             :     EbPictureBufferDesc           *input_picture_ptr,
    3737             :     EbPictureBufferDesc           *input_padded_picture_ptr,
    3738             :     uint32_t                           sb_total_count)
    3739             : {
    3740             :     uint32_t sb_index;
    3741             :     uint32_t sb_origin_x;        // to avoid using child PCS
    3742             :     uint32_t sb_origin_y;
    3743             :     uint32_t inputLumaOriginIndex;
    3744             :     uint32_t inputCbOriginIndex;
    3745             :     uint32_t inputCrOriginIndex;
    3746             :     uint64_t picTotVariance;
    3747             : 
    3748             :     // Variance
    3749         120 :     picTotVariance = 0;
    3750             : 
    3751        7311 :     for (sb_index = 0; sb_index < picture_control_set_ptr->sb_total_count; ++sb_index) {
    3752        7192 :         SbParams   *sb_params = &sequence_control_set_ptr->sb_params_array[sb_index];
    3753             : 
    3754        7192 :         sb_origin_x = sb_params->origin_x;
    3755        7192 :         sb_origin_y = sb_params->origin_y;
    3756        7192 :         inputLumaOriginIndex = (input_padded_picture_ptr->origin_y + sb_origin_y) * input_padded_picture_ptr->stride_y +
    3757        7192 :             input_padded_picture_ptr->origin_x + sb_origin_x;
    3758             : 
    3759        7192 :         inputCbOriginIndex = ((input_picture_ptr->origin_y + sb_origin_y) >> 1) * input_picture_ptr->stride_cb + ((input_picture_ptr->origin_x + sb_origin_x) >> 1);
    3760        7192 :         inputCrOriginIndex = ((input_picture_ptr->origin_y + sb_origin_y) >> 1) * input_picture_ptr->stride_cr + ((input_picture_ptr->origin_x + sb_origin_x) >> 1);
    3761             : 
    3762        7192 :         ComputeBlockMeanComputeVariance(
    3763             :             sequence_control_set_ptr,
    3764             :             picture_control_set_ptr,
    3765             :             input_padded_picture_ptr,
    3766             :             sb_index,
    3767             :             inputLumaOriginIndex);
    3768             : 
    3769        7186 :         if (sb_params->is_complete_sb) {
    3770        5991 :             ComputeChromaBlockMean(
    3771             :                 sequence_control_set_ptr,
    3772             :                 picture_control_set_ptr,
    3773             :                 input_picture_ptr,
    3774             :                 sb_index,
    3775             :                 inputCbOriginIndex,
    3776             :                 inputCrOriginIndex);
    3777             :         }
    3778             :         else {
    3779        1195 :             ZeroOutChromaBlockMean(
    3780             :                 picture_control_set_ptr,
    3781             :                 sb_index);
    3782             :         }
    3783             : 
    3784        7191 :         picTotVariance += (picture_control_set_ptr->variance[sb_index][RASTER_SCAN_CU_INDEX_64x64]);
    3785             :     }
    3786             : 
    3787         119 :     picture_control_set_ptr->pic_avg_variance = (uint16_t)(picTotVariance / sb_total_count);
    3788             : 
    3789             :     // Calculate the variance of variance to determine Homogeneous regions. Note: Variance calculation should be on.
    3790         119 :     DetermineHomogeneousRegionInPicture(
    3791             :         sequence_control_set_ptr,
    3792             :         picture_control_set_ptr);
    3793             : 
    3794         120 :     EdgeDetectionMeanLumaChroma16x16(
    3795             :         sequence_control_set_ptr,
    3796             :         picture_control_set_ptr,
    3797         120 :         sequence_control_set_ptr->sb_total_count);
    3798             : 
    3799         120 :     EdgeDetection(
    3800             :         sequence_control_set_ptr,
    3801             :         picture_control_set_ptr);
    3802             : 
    3803         120 :     return;
    3804             : }
    3805             : 
    3806         120 : void CalculateInputAverageIntensity(
    3807             :     SequenceControlSet            *sequence_control_set_ptr,
    3808             :     PictureParentControlSet       *picture_control_set_ptr,
    3809             :     EbPictureBufferDesc           *input_picture_ptr,
    3810             :     uint64_t                           sumAverageIntensityTotalRegionsLuma,
    3811             :     uint64_t                           sumAverageIntensityTotalRegionsCb,
    3812             :     uint64_t                           sumAverageIntensityTotalRegionsCr)
    3813             : {
    3814         120 :     if (sequence_control_set_ptr->scd_mode == SCD_MODE_0) {
    3815             :         uint16_t blockIndexInWidth;
    3816             :         uint16_t blockIndexInHeight;
    3817         120 :         uint64_t mean = 0;
    3818             : 
    3819         120 :         const uint16_t stride_y = input_picture_ptr->stride_y;
    3820             :         // Loop over 8x8 blocks and calculates the mean value
    3821         120 :         if (sequence_control_set_ptr->block_mean_calc_prec == BLOCK_MEAN_PREC_FULL) {
    3822           0 :             for (blockIndexInHeight = 0; blockIndexInHeight < input_picture_ptr->height >> 3; ++blockIndexInHeight) {
    3823           0 :                 for (blockIndexInWidth = 0; blockIndexInWidth < input_picture_ptr->width >> 3; ++blockIndexInWidth)
    3824           0 :                     mean += compute_mean_8x8(&(input_picture_ptr->buffer_y[(blockIndexInWidth << 3) + (blockIndexInHeight << 3) * input_picture_ptr->stride_y]), input_picture_ptr->stride_y, 8, 8);
    3825             :             }
    3826             :         }
    3827             :         else {
    3828        4659 :             for (blockIndexInHeight = 0; blockIndexInHeight < input_picture_ptr->height >> 3; ++blockIndexInHeight) {
    3829      423373 :                 for (blockIndexInWidth = 0; blockIndexInWidth < input_picture_ptr->width >> 3; ++blockIndexInWidth)
    3830      418834 :                     mean += compute_sub_mean8x8_sse2_intrin(&(input_picture_ptr->buffer_y[(blockIndexInWidth << 3) + (blockIndexInHeight << 3) * stride_y]), stride_y);
    3831             :             }
    3832             :         }
    3833           0 :         mean = ((mean + ((input_picture_ptr->height* input_picture_ptr->width) >> 7)) / ((input_picture_ptr->height* input_picture_ptr->width) >> 6));
    3834           0 :         mean = (mean + (1 << (MEAN_PRECISION - 1))) >> MEAN_PRECISION;
    3835           0 :         picture_control_set_ptr->average_intensity[0] = (uint8_t)mean;
    3836             :     }
    3837             : 
    3838             :     else {
    3839           0 :         picture_control_set_ptr->average_intensity[0] = (uint8_t)((sumAverageIntensityTotalRegionsLuma + ((input_picture_ptr->width*input_picture_ptr->height) >> 1)) / (input_picture_ptr->width*input_picture_ptr->height));
    3840           0 :         picture_control_set_ptr->average_intensity[1] = (uint8_t)((sumAverageIntensityTotalRegionsCb + ((input_picture_ptr->width*input_picture_ptr->height) >> 3)) / ((input_picture_ptr->width*input_picture_ptr->height) >> 2));
    3841           0 :         picture_control_set_ptr->average_intensity[2] = (uint8_t)((sumAverageIntensityTotalRegionsCr + ((input_picture_ptr->width*input_picture_ptr->height) >> 3)) / ((input_picture_ptr->width*input_picture_ptr->height) >> 2));
    3842             :     }
    3843             : 
    3844           0 :     return;
    3845             : }
    3846             : 
    3847             : /************************************************
    3848             :  * Gathering statistics per picture
    3849             :  ** Calculating the pixel intensity histogram bins per picture needed for SCD
    3850             :  ** Computing Picture Variance
    3851             :  ************************************************/
    3852         120 : void GatheringPictureStatistics(
    3853             :     SequenceControlSet            *sequence_control_set_ptr,
    3854             :     PictureParentControlSet       *picture_control_set_ptr,
    3855             :     EbPictureBufferDesc           *input_picture_ptr,
    3856             :     EbPictureBufferDesc           *input_padded_picture_ptr,
    3857             :     EbPictureBufferDesc            *sixteenth_decimated_picture_ptr,
    3858             :     uint32_t                           sb_total_count)
    3859             : {
    3860         120 :     uint64_t                          sumAverageIntensityTotalRegionsLuma = 0;
    3861         120 :     uint64_t                          sumAverageIntensityTotalRegionsCb = 0;
    3862         120 :     uint64_t                          sumAverageIntensityTotalRegionsCr = 0;
    3863             : 
    3864             :     // Histogram bins
    3865             :         // Use 1/16 Luma for Histogram generation
    3866             :         // 1/16 input ready
    3867         120 :     SubSampleLumaGeneratePixelIntensityHistogramBins(
    3868             :         sequence_control_set_ptr,
    3869             :         picture_control_set_ptr,
    3870             :         sixteenth_decimated_picture_ptr,
    3871             :         &sumAverageIntensityTotalRegionsLuma);
    3872             : 
    3873             :     // Use 1/4 Chroma for Histogram generation
    3874             :     // 1/4 input not ready => perform operation on the fly
    3875         120 :     SubSampleChromaGeneratePixelIntensityHistogramBins(
    3876             :         sequence_control_set_ptr,
    3877             :         picture_control_set_ptr,
    3878             :         input_picture_ptr,
    3879             :         &sumAverageIntensityTotalRegionsCb,
    3880             :         &sumAverageIntensityTotalRegionsCr);
    3881             :     //
    3882             :     // Calculate the LUMA average intensity
    3883         120 :     CalculateInputAverageIntensity(
    3884             :         sequence_control_set_ptr,
    3885             :         picture_control_set_ptr,
    3886             :         input_picture_ptr,
    3887             :         sumAverageIntensityTotalRegionsLuma,
    3888             :         sumAverageIntensityTotalRegionsCb,
    3889             :         sumAverageIntensityTotalRegionsCr);
    3890             : 
    3891         120 :     ComputePictureSpatialStatistics(
    3892             :         sequence_control_set_ptr,
    3893             :         picture_control_set_ptr,
    3894             :         input_picture_ptr,
    3895             :         input_padded_picture_ptr,
    3896             :         sb_total_count);
    3897             : 
    3898         120 :     return;
    3899             : }
    3900             : /************************************************
    3901             :  * Pad Picture at the right and bottom sides
    3902             :  ** To match a multiple of min CU size in width and height
    3903             :  ************************************************/
    3904         118 : void PadPictureToMultipleOfMinCuSizeDimensions(
    3905             :     SequenceControlSet            *sequence_control_set_ptr,
    3906             :     EbPictureBufferDesc           *input_picture_ptr)
    3907             : {
    3908         118 :     EbBool                          is16BitInput = (EbBool)(sequence_control_set_ptr->static_config.encoder_bit_depth > EB_8BIT);
    3909             : 
    3910         118 :     uint32_t color_format = input_picture_ptr->color_format;
    3911         118 :     const uint16_t subsampling_x = (color_format == EB_YUV444 ? 1 : 2) - 1;
    3912         118 :     const uint16_t subsampling_y = (color_format >= EB_YUV422 ? 1 : 2) - 1;
    3913             : 
    3914             :     // Input Picture Padding
    3915         118 :     pad_input_picture(
    3916         118 :         &input_picture_ptr->buffer_y[input_picture_ptr->origin_x + (input_picture_ptr->origin_y * input_picture_ptr->stride_y)],
    3917         118 :         input_picture_ptr->stride_y,
    3918         118 :         (input_picture_ptr->width - sequence_control_set_ptr->pad_right),
    3919         118 :         (input_picture_ptr->height - sequence_control_set_ptr->pad_bottom),
    3920             :         sequence_control_set_ptr->pad_right,
    3921             :         sequence_control_set_ptr->pad_bottom);
    3922             : 
    3923         120 :     pad_input_picture(
    3924         120 :         &input_picture_ptr->buffer_cb[(input_picture_ptr->origin_x >> subsampling_x) + ((input_picture_ptr->origin_y >> subsampling_y) * input_picture_ptr->stride_cb)],
    3925         120 :         input_picture_ptr->stride_cb,
    3926         120 :         (input_picture_ptr->width - sequence_control_set_ptr->pad_right) >> subsampling_x,
    3927         120 :         (input_picture_ptr->height - sequence_control_set_ptr->pad_bottom) >> subsampling_y,
    3928         120 :         sequence_control_set_ptr->pad_right >> subsampling_x,
    3929         120 :         sequence_control_set_ptr->pad_bottom >> subsampling_y);
    3930             : 
    3931         120 :     pad_input_picture(
    3932         120 :         &input_picture_ptr->buffer_cr[(input_picture_ptr->origin_x >> subsampling_x) + ((input_picture_ptr->origin_y >> subsampling_y) * input_picture_ptr->stride_cb)],
    3933         120 :         input_picture_ptr->stride_cr,
    3934         120 :         (input_picture_ptr->width - sequence_control_set_ptr->pad_right) >> subsampling_x,
    3935         120 :         (input_picture_ptr->height - sequence_control_set_ptr->pad_bottom) >> subsampling_y,
    3936         120 :         sequence_control_set_ptr->pad_right >> subsampling_x,
    3937         120 :         sequence_control_set_ptr->pad_bottom >> subsampling_y);
    3938             : 
    3939         120 :     if (is16BitInput)
    3940             :     {
    3941           0 :         pad_input_picture(
    3942           0 :             &input_picture_ptr->buffer_bit_inc_y[input_picture_ptr->origin_x + (input_picture_ptr->origin_y * input_picture_ptr->stride_bit_inc_y)],
    3943           0 :             input_picture_ptr->stride_bit_inc_y,
    3944           0 :             (input_picture_ptr->width - sequence_control_set_ptr->pad_right),
    3945           0 :             (input_picture_ptr->height - sequence_control_set_ptr->pad_bottom),
    3946             :             sequence_control_set_ptr->pad_right,
    3947             :             sequence_control_set_ptr->pad_bottom);
    3948             : 
    3949           0 :         pad_input_picture(
    3950           0 :             &input_picture_ptr->buffer_bit_inc_cb[(input_picture_ptr->origin_x >> subsampling_x) + ((input_picture_ptr->origin_y >> subsampling_y) * input_picture_ptr->stride_bit_inc_cb)],
    3951           0 :             input_picture_ptr->stride_bit_inc_cb,
    3952           0 :             (input_picture_ptr->width - sequence_control_set_ptr->pad_right) >> subsampling_x,
    3953           0 :             (input_picture_ptr->height - sequence_control_set_ptr->pad_bottom) >> subsampling_y,
    3954           0 :             sequence_control_set_ptr->pad_right >> subsampling_x,
    3955           0 :             sequence_control_set_ptr->pad_bottom >> subsampling_y);
    3956             : 
    3957           0 :         pad_input_picture(
    3958           0 :             &input_picture_ptr->buffer_bit_inc_cr[(input_picture_ptr->origin_x >> subsampling_x) + ((input_picture_ptr->origin_y >> subsampling_y) * input_picture_ptr->stride_bit_inc_cb)],
    3959           0 :             input_picture_ptr->stride_bit_inc_cr,
    3960           0 :             (input_picture_ptr->width - sequence_control_set_ptr->pad_right) >> subsampling_x,
    3961           0 :             (input_picture_ptr->height - sequence_control_set_ptr->pad_bottom) >> subsampling_y,
    3962           0 :             sequence_control_set_ptr->pad_right >> subsampling_x,
    3963           0 :             sequence_control_set_ptr->pad_bottom >> subsampling_y);
    3964             :     }
    3965             : 
    3966         120 :     return;
    3967             : }
    3968             : 
    3969             : /************************************************
    3970             :  * Pad Picture at the right and bottom sides
    3971             :  ** To complete border SB smaller than SB size
    3972             :  ************************************************/
    3973         117 : void PadPictureToMultipleOfLcuDimensions(
    3974             :     EbPictureBufferDesc           *input_padded_picture_ptr
    3975             : )
    3976             : {
    3977             :     // Generate Padding
    3978         117 :     generate_padding(
    3979             :         &input_padded_picture_ptr->buffer_y[0],
    3980         117 :         input_padded_picture_ptr->stride_y,
    3981         117 :         input_padded_picture_ptr->width,
    3982         117 :         input_padded_picture_ptr->height,
    3983         117 :         input_padded_picture_ptr->origin_x,
    3984         117 :         input_padded_picture_ptr->origin_y);
    3985             : 
    3986         120 :     return;
    3987             : }
    3988             : 
    3989             : /************************************************
    3990             : * 1/4 & 1/16 input picture decimation
    3991             : ************************************************/
    3992         128 : void DownsampleDecimationInputPicture(
    3993             :     PictureParentControlSet       *picture_control_set_ptr,
    3994             :     EbPictureBufferDesc           *input_padded_picture_ptr,
    3995             :     EbPictureBufferDesc           *quarter_decimated_picture_ptr,
    3996             :     EbPictureBufferDesc           *sixteenth_decimated_picture_ptr) {
    3997             :     // Decimate input picture for HME L0 and L1
    3998         128 :     if (picture_control_set_ptr->enable_hme_flag || picture_control_set_ptr->tf_enable_hme_flag) {
    3999         128 :         if (picture_control_set_ptr->enable_hme_level1_flag || picture_control_set_ptr->tf_enable_hme_level1_flag) {
    4000         128 :             decimation_2d(
    4001         128 :                 &input_padded_picture_ptr->buffer_y[input_padded_picture_ptr->origin_x + input_padded_picture_ptr->origin_y * input_padded_picture_ptr->stride_y],
    4002         128 :                 input_padded_picture_ptr->stride_y,
    4003         128 :                 input_padded_picture_ptr->width,
    4004         128 :                 input_padded_picture_ptr->height,
    4005         128 :                 &quarter_decimated_picture_ptr->buffer_y[quarter_decimated_picture_ptr->origin_x + quarter_decimated_picture_ptr->origin_x*quarter_decimated_picture_ptr->stride_y],
    4006         128 :                 quarter_decimated_picture_ptr->stride_y,
    4007             :                 2);
    4008         128 :             generate_padding(
    4009             :                 &quarter_decimated_picture_ptr->buffer_y[0],
    4010         128 :                 quarter_decimated_picture_ptr->stride_y,
    4011         128 :                 quarter_decimated_picture_ptr->width,
    4012         128 :                 quarter_decimated_picture_ptr->height,
    4013         128 :                 quarter_decimated_picture_ptr->origin_x,
    4014         128 :                 quarter_decimated_picture_ptr->origin_y);
    4015             :         }
    4016             :     }
    4017             : 
    4018             :     // Always perform 1/16th decimation as
    4019             :     // Sixteenth Input Picture Decimation
    4020         128 :     decimation_2d(
    4021         128 :         &input_padded_picture_ptr->buffer_y[input_padded_picture_ptr->origin_x + input_padded_picture_ptr->origin_y * input_padded_picture_ptr->stride_y],
    4022         128 :         input_padded_picture_ptr->stride_y,
    4023         128 :         input_padded_picture_ptr->width,
    4024         128 :         input_padded_picture_ptr->height,
    4025         128 :         &sixteenth_decimated_picture_ptr->buffer_y[sixteenth_decimated_picture_ptr->origin_x + sixteenth_decimated_picture_ptr->origin_x*sixteenth_decimated_picture_ptr->stride_y],
    4026         128 :         sixteenth_decimated_picture_ptr->stride_y,
    4027             :         4);
    4028             : 
    4029         128 :     generate_padding(
    4030             :         &sixteenth_decimated_picture_ptr->buffer_y[0],
    4031         128 :         sixteenth_decimated_picture_ptr->stride_y,
    4032         128 :         sixteenth_decimated_picture_ptr->width,
    4033         128 :         sixteenth_decimated_picture_ptr->height,
    4034         128 :         sixteenth_decimated_picture_ptr->origin_x,
    4035         128 :         sixteenth_decimated_picture_ptr->origin_y);
    4036             : 
    4037         128 : }
    4038             : #if PAL_SUP
    4039           0 : int av1_count_colors_highbd(uint16_t *src, int stride, int rows, int cols,
    4040             :     int bit_depth, int *val_count) {
    4041           0 :     assert(bit_depth <= 12);
    4042           0 :     const int max_pix_val = 1 << bit_depth;
    4043             :    // const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
    4044           0 :     memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
    4045           0 :     for (int r = 0; r < rows; ++r) {
    4046           0 :         for (int c = 0; c < cols; ++c) {
    4047           0 :             const int this_val = src[r * stride + c];
    4048           0 :             assert(this_val < max_pix_val);
    4049           0 :             if (this_val >= max_pix_val) return 0;
    4050           0 :             ++val_count[this_val];
    4051             :         }
    4052             :     }
    4053           0 :     int n = 0;
    4054           0 :     for (int i = 0; i < max_pix_val; ++i) {
    4055           0 :         if (val_count[i]) ++n;
    4056             :     }
    4057           0 :     return n;
    4058             : }
    4059             : #endif
    4060      102696 : int eb_av1_count_colors(const uint8_t *src, int stride, int rows, int cols,
    4061             :     int *val_count) {
    4062      102696 :     const int max_pix_val = 1 << 8;
    4063      102696 :     memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
    4064     1252300 :     for (int r = 0; r < rows; ++r) {
    4065    14823600 :         for (int c = 0; c < cols; ++c) {
    4066    13674000 :             const int this_val = src[r * stride + c];
    4067    13674000 :             assert(this_val < max_pix_val);
    4068    13674000 :             ++val_count[this_val];
    4069             :         }
    4070             :     }
    4071      102696 :     int n = 0;
    4072    16500500 :     for (int i = 0; i < max_pix_val; ++i)
    4073    16397800 :         if (val_count[i]) ++n;
    4074      102696 :     return n;
    4075             : }
    4076             : extern aom_variance_fn_ptr_t mefn_ptr[BlockSizeS_ALL];
    4077             : 
    4078             : // This is used as a reference when computing the source variance for the
    4079             : //  purposes of activity masking.
    4080             : // Eventually this should be replaced by custom no-reference routines,
    4081             : //  which will be faster.
    4082             : const uint8_t eb_AV1_VAR_OFFS[MAX_SB_SIZE] = {
    4083             :   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
    4084             :   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
    4085             :   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
    4086             :   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
    4087             :   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
    4088             :   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
    4089             :   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
    4090             :   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
    4091             :   128, 128, 128, 128, 128, 128, 128, 128
    4092             : };
    4093             : 
    4094      814145 : unsigned int eb_av1_get_sby_perpixel_variance(const aom_variance_fn_ptr_t *fn_ptr, //const AV1_COMP *cpi,
    4095             :                                            const uint8_t *src,int stride,//const struct buf_2d *ref,
    4096             :                                            BlockSize bs) {
    4097             :   unsigned int sse;
    4098             :   const unsigned int var =
    4099             :       //cpi->fn_ptr[bs].vf(ref->buf, ref->stride, eb_AV1_VAR_OFFS, 0, &sse);
    4100      814145 :      fn_ptr->vf(src,  stride, eb_AV1_VAR_OFFS, 0, &sse);
    4101      814222 :   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
    4102             : }
    4103             : 
    4104             : // Estimate if the source frame is screen content, based on the portion of
    4105             : // blocks that have no more than 4 (experimentally selected) luma colors.
    4106         120 : static void is_screen_content(
    4107             :     PictureParentControlSet     *picture_control_set_ptr,
    4108             :     const uint8_t               *src,
    4109             :     int                          use_hbd,
    4110             :     int                          stride,
    4111             :     int                         width,
    4112             :     int                         height) {
    4113         120 :     assert(src != NULL);
    4114         120 :     const int blk_w = 16;
    4115         120 :     const int blk_h = 16;
    4116             :     // These threshold values are selected experimentally.
    4117         120 :     const int color_thresh = 4;
    4118         120 :     const unsigned int var_thresh = 0;
    4119             :     // Counts of blocks with no more than color_thresh colors.
    4120         120 :     int counts_1 = 0;
    4121             :     // Counts of blocks with no more than color_thresh colors and variance larger
    4122             :     // than var_thresh.
    4123         120 :     int counts_2 = 0;
    4124             : 
    4125        2651 :     for (int r = 0; r + blk_h <= height; r += blk_h) {
    4126      104706 :         for (int c = 0; c + blk_w <= width; c += blk_w) {
    4127             :             int count_buf[1 << 12];  // Maximum (1 << 12) color levels.
    4128      102061 :             const int n_colors =
    4129             :                 use_hbd ? 0 /*av1_count_colors_highbd(src + r * stride + c, stride, blk_w,
    4130             :                     blk_h, bd, count_buf)*/
    4131      102175 :                 : eb_av1_count_colors(src + r * stride + c, stride, blk_w, blk_h,
    4132             :                     count_buf);
    4133      102061 :             if (n_colors > 1 && n_colors <= color_thresh) {
    4134        2787 :                 ++counts_1;
    4135             :                 //struct buf_2d buf;
    4136             :                 //buf.stride = stride;
    4137             :                 //buf.buf = (uint8_t *)src;
    4138        2787 :                 const aom_variance_fn_ptr_t *fn_ptr = &mefn_ptr[BLOCK_16X16];
    4139             : 
    4140        2787 :                 const unsigned int var = eb_av1_get_sby_perpixel_variance(fn_ptr, src + r * stride + c,stride, BLOCK_16X16);
    4141             :                                /* use_hbd
    4142             :                 ? av1_high_get_sby_perpixel_variance(cpi, &buf, BLOCK_16X16, bd)
    4143             :                 : */
    4144        2792 :                 if (var > var_thresh) ++counts_2;
    4145             :             }
    4146             :         }
    4147             :     }
    4148             : 
    4149          11 :     picture_control_set_ptr->sc_content_detected =
    4150          11 :         (counts_1 * blk_h * blk_w * 10 > width * height) &&
    4151           0 :         ( counts_2 * blk_h * blk_w * 15 > width * height) ;
    4152          11 : }
    4153             : 
    4154             : 
    4155             : /************************************************
    4156             :  * 1/4 & 1/16 input picture downsampling (filtering)
    4157             :  ************************************************/
    4158          64 : void DownsampleFilteringInputPicture(
    4159             :     PictureParentControlSet       *picture_control_set_ptr,
    4160             :     EbPictureBufferDesc           *input_padded_picture_ptr,
    4161             :     EbPictureBufferDesc           *quarter_picture_ptr,
    4162             :     EbPictureBufferDesc           *sixteenth_picture_ptr) {
    4163             : 
    4164             :     // Downsample input picture for HME L0 and L1
    4165          64 :     if (picture_control_set_ptr->enable_hme_flag || picture_control_set_ptr->tf_enable_hme_flag) {
    4166          64 :         if (picture_control_set_ptr->enable_hme_level1_flag || picture_control_set_ptr->tf_enable_hme_level1_flag) {
    4167             : 
    4168          64 :             downsample_2d(
    4169          64 :                 &input_padded_picture_ptr->buffer_y[input_padded_picture_ptr->origin_x + input_padded_picture_ptr->origin_y * input_padded_picture_ptr->stride_y],
    4170          64 :                 input_padded_picture_ptr->stride_y,
    4171          64 :                 input_padded_picture_ptr->width,
    4172          64 :                 input_padded_picture_ptr->height,
    4173          64 :                 &quarter_picture_ptr->buffer_y[quarter_picture_ptr->origin_x + quarter_picture_ptr->origin_x * quarter_picture_ptr->stride_y],
    4174          64 :                 quarter_picture_ptr->stride_y,
    4175             :                 2);
    4176          64 :             generate_padding(
    4177             :                 &quarter_picture_ptr->buffer_y[0],
    4178          64 :                 quarter_picture_ptr->stride_y,
    4179          64 :                 quarter_picture_ptr->width,
    4180          64 :                 quarter_picture_ptr->height,
    4181          64 :                 quarter_picture_ptr->origin_x,
    4182          64 :                 quarter_picture_ptr->origin_y);
    4183             : 
    4184             :         }
    4185             : 
    4186          64 :         if (picture_control_set_ptr->enable_hme_level0_flag || picture_control_set_ptr->tf_enable_hme_level0_flag) {
    4187             :             // Sixteenth Input Picture Downsampling
    4188          64 :             if (picture_control_set_ptr->enable_hme_level1_flag || picture_control_set_ptr->tf_enable_hme_level1_flag)
    4189          64 :                 downsample_2d(
    4190          64 :                     &quarter_picture_ptr->buffer_y[quarter_picture_ptr->origin_x + quarter_picture_ptr->origin_y * quarter_picture_ptr->stride_y],
    4191          64 :                     quarter_picture_ptr->stride_y,
    4192          64 :                     quarter_picture_ptr->width,
    4193          64 :                     quarter_picture_ptr->height,
    4194          64 :                     &sixteenth_picture_ptr->buffer_y[sixteenth_picture_ptr->origin_x + sixteenth_picture_ptr->origin_x*sixteenth_picture_ptr->stride_y],
    4195          64 :                     sixteenth_picture_ptr->stride_y,
    4196             :                     2);
    4197             :             else
    4198           0 :                 downsample_2d(
    4199           0 :                     &input_padded_picture_ptr->buffer_y[input_padded_picture_ptr->origin_x + input_padded_picture_ptr->origin_y * input_padded_picture_ptr->stride_y],
    4200           0 :                     input_padded_picture_ptr->stride_y,
    4201           0 :                     input_padded_picture_ptr->width,
    4202           0 :                     input_padded_picture_ptr->height,
    4203           0 :                     &sixteenth_picture_ptr->buffer_y[sixteenth_picture_ptr->origin_x + sixteenth_picture_ptr->origin_x*sixteenth_picture_ptr->stride_y],
    4204           0 :                     sixteenth_picture_ptr->stride_y,
    4205             :                     4);
    4206             : 
    4207          64 :             generate_padding(
    4208             :                 &sixteenth_picture_ptr->buffer_y[0],
    4209          64 :                 sixteenth_picture_ptr->stride_y,
    4210          64 :                 sixteenth_picture_ptr->width,
    4211          64 :                 sixteenth_picture_ptr->height,
    4212          64 :                 sixteenth_picture_ptr->origin_x,
    4213          64 :                 sixteenth_picture_ptr->origin_y);
    4214             : 
    4215             :         }
    4216             :     }
    4217          64 : }
    4218             : 
    4219             : /************************************************
    4220             :  * Picture Analysis Kernel
    4221             :  * The Picture Analysis Process pads & decimates the input pictures.
    4222             :  * The Picture Analysis also includes creating an n-bin Histogram,
    4223             :  * gathering picture 1st and 2nd moment statistics for each 8x8 block,
    4224             :  * which are used to compute variance.
    4225             :  * The Picture Analysis process is multithreaded, so pictures can be
    4226             :  * processed out of order as long as all inputs are available.
    4227             :  ************************************************/
    4228           8 : void* picture_analysis_kernel(void *input_ptr)
    4229             : {
    4230           8 :     PictureAnalysisContext        *context_ptr = (PictureAnalysisContext*)input_ptr;
    4231             :     PictureParentControlSet       *picture_control_set_ptr;
    4232             :     SequenceControlSet            *sequence_control_set_ptr;
    4233             : 
    4234             :     EbObjectWrapper               *inputResultsWrapperPtr;
    4235             :     ResourceCoordinationResults   *inputResultsPtr;
    4236             :     EbObjectWrapper               *outputResultsWrapperPtr;
    4237             :     PictureAnalysisResults        *outputResultsPtr;
    4238             :     EbPaReferenceObject           *paReferenceObject;
    4239             : 
    4240             :     EbPictureBufferDesc           *input_padded_picture_ptr;
    4241             :     EbPictureBufferDesc           *input_picture_ptr;
    4242             : 
    4243             :     // Variance
    4244             :     uint32_t                        picture_width_in_sb;
    4245             :     uint32_t                        pictureHeighInLcu;
    4246             :     uint32_t                        sb_total_count;
    4247             : 
    4248             :     for (;;) {
    4249             :         // Get Input Full Object
    4250         128 :         eb_get_full_object(
    4251             :             context_ptr->resource_coordination_results_input_fifo_ptr,
    4252             :             &inputResultsWrapperPtr);
    4253             : 
    4254         118 :         inputResultsPtr = (ResourceCoordinationResults*)inputResultsWrapperPtr->object_ptr;
    4255         118 :         picture_control_set_ptr = (PictureParentControlSet*)inputResultsPtr->picture_control_set_wrapper_ptr->object_ptr;
    4256             : 
    4257             :         // There is no need to do processing for overlay picture. Overlay and AltRef share the same results.
    4258         118 :         if (!picture_control_set_ptr->is_overlay)
    4259             :         {
    4260         120 :             sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
    4261         120 :             input_picture_ptr = picture_control_set_ptr->enhanced_picture_ptr;
    4262             : 
    4263         120 :             paReferenceObject = (EbPaReferenceObject*)picture_control_set_ptr->pa_reference_picture_wrapper_ptr->object_ptr;
    4264         120 :             input_padded_picture_ptr = (EbPictureBufferDesc*)paReferenceObject->input_padded_picture_ptr;
    4265             :             // Variance
    4266         120 :             picture_width_in_sb = (sequence_control_set_ptr->seq_header.max_frame_width + sequence_control_set_ptr->sb_sz - 1) / sequence_control_set_ptr->sb_sz;
    4267         120 :             pictureHeighInLcu = (sequence_control_set_ptr->seq_header.max_frame_height + sequence_control_set_ptr->sb_sz - 1) / sequence_control_set_ptr->sb_sz;
    4268         120 :             sb_total_count = picture_width_in_sb * pictureHeighInLcu;
    4269             : 
    4270             :             // Set picture parameters to account for subpicture, picture scantype, and set regions by resolutions
    4271         120 :             SetPictureParametersForStatisticsGathering(
    4272             :                 sequence_control_set_ptr);
    4273             : 
    4274             :             // Pad pictures to multiple min cu size
    4275         119 :             PadPictureToMultipleOfMinCuSizeDimensions(
    4276             :                 sequence_control_set_ptr,
    4277             :                 input_picture_ptr);
    4278             : 
    4279             :             // Pre processing operations performed on the input picture
    4280         119 :             PicturePreProcessingOperations(
    4281             :                 picture_control_set_ptr,
    4282             :                 sequence_control_set_ptr,
    4283             :                 sb_total_count);
    4284         118 :             if (input_picture_ptr->color_format >= EB_YUV422) {
    4285             :                 // Jing: Do the conversion of 422/444=>420 here since it's multi-threaded kernel
    4286             :                 //       Reuse the Y, only add cb/cr in the newly created buffer desc
    4287             :                 //       NOTE: since denoise may change the src, so this part is after PicturePreProcessingOperations()
    4288           0 :                 picture_control_set_ptr->chroma_downsampled_picture_ptr->buffer_y = input_picture_ptr->buffer_y;
    4289           0 :                 DownSampleChroma(input_picture_ptr, picture_control_set_ptr->chroma_downsampled_picture_ptr);
    4290             :             }
    4291             :             else
    4292         118 :                 picture_control_set_ptr->chroma_downsampled_picture_ptr = input_picture_ptr;
    4293             :             // Pad input picture to complete border LCUs
    4294         118 :             PadPictureToMultipleOfLcuDimensions(
    4295             :                 input_padded_picture_ptr);
    4296             :             // 1/4 & 1/16 input picture decimation
    4297         120 :             DownsampleDecimationInputPicture(
    4298             :                 picture_control_set_ptr,
    4299             :                 input_padded_picture_ptr,
    4300             :                 (EbPictureBufferDesc*)paReferenceObject->quarter_decimated_picture_ptr,
    4301             :                 (EbPictureBufferDesc*)paReferenceObject->sixteenth_decimated_picture_ptr);
    4302             : 
    4303             :             // 1/4 & 1/16 input picture downsampling through filtering
    4304         120 :             if (sequence_control_set_ptr->down_sampling_method_me_search == ME_FILTERED_DOWNSAMPLED) {
    4305          60 :                 DownsampleFilteringInputPicture(
    4306             :                     picture_control_set_ptr,
    4307             :                     input_padded_picture_ptr,
    4308             :                     (EbPictureBufferDesc*)paReferenceObject->quarter_filtered_picture_ptr,
    4309             :                     (EbPictureBufferDesc*)paReferenceObject->sixteenth_filtered_picture_ptr);
    4310             :             }
    4311             :            // Gathering statistics of input picture, including Variance Calculation, Histogram Bins
    4312         120 :             GatheringPictureStatistics(
    4313             :                 sequence_control_set_ptr,
    4314             :                 picture_control_set_ptr,
    4315             :                 picture_control_set_ptr->chroma_downsampled_picture_ptr, //420 input_picture_ptr
    4316             :                 input_padded_picture_ptr,
    4317             :                 (EbPictureBufferDesc*)paReferenceObject->sixteenth_decimated_picture_ptr, // Hsan: always use decimated until studying the trade offs
    4318             :                 sb_total_count);
    4319             : 
    4320         120 :             if (sequence_control_set_ptr->static_config.screen_content_mode == 2){ // auto detect
    4321         120 :                 is_screen_content(
    4322             :                     picture_control_set_ptr,
    4323         120 :                     input_picture_ptr->buffer_y + input_picture_ptr->origin_x + input_picture_ptr->origin_y*input_picture_ptr->stride_y,
    4324             :                     0,
    4325         120 :                     input_picture_ptr->stride_y,
    4326         120 :                     sequence_control_set_ptr->seq_header.max_frame_width, sequence_control_set_ptr->seq_header.max_frame_height);
    4327             :             }
    4328             :             else // off / on
    4329           0 :                 picture_control_set_ptr->sc_content_detected = sequence_control_set_ptr->static_config.screen_content_mode;
    4330             : 
    4331             :             // Hold the 64x64 variance and mean in the reference frame
    4332             :             uint32_t sb_index;
    4333        7308 :             for (sb_index = 0; sb_index < picture_control_set_ptr->sb_total_count; ++sb_index) {
    4334        7188 :                 paReferenceObject->variance[sb_index] = picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_64x64];
    4335        7188 :                 paReferenceObject->y_mean[sb_index] = picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_64x64];
    4336             :             }
    4337             :         }
    4338             :         // Get Empty Results Object
    4339         118 :         eb_get_empty_object(
    4340             :             context_ptr->picture_analysis_results_output_fifo_ptr,
    4341             :             &outputResultsWrapperPtr);
    4342             : 
    4343         120 :         outputResultsPtr = (PictureAnalysisResults*)outputResultsWrapperPtr->object_ptr;
    4344         120 :         outputResultsPtr->picture_control_set_wrapper_ptr = inputResultsPtr->picture_control_set_wrapper_ptr;
    4345             : 
    4346             :         // Release the Input Results
    4347         120 :         eb_release_object(inputResultsWrapperPtr);
    4348             : 
    4349             :         // Post the Full Results Object
    4350         120 :         eb_post_full_object(outputResultsWrapperPtr);
    4351             :     }
    4352             :     return EB_NULL;
    4353             : }

Generated by: LCOV version 1.14