Line data Source code
1 : /*
2 : * Copyright(c) 2019 Intel Corporation
3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 : */
5 :
6 : #include <stdlib.h>
7 : #include <string.h>
8 :
9 : #include "aom_dsp_rtcd.h"
10 : #include "EbDefinitions.h"
11 : #include "EbSystemResourceManager.h"
12 : #include "EbPictureControlSet.h"
13 : #include "EbSequenceControlSet.h"
14 : #include "EbPictureBufferDesc.h"
15 :
16 : #include "EbResourceCoordinationResults.h"
17 : #include "EbPictureAnalysisProcess.h"
18 : #include "EbPictureAnalysisResults.h"
19 : #include "EbMcp.h"
20 : #include "EbMotionEstimation.h"
21 : #include "EbReferenceObject.h"
22 :
23 : #include "EbComputeMean.h"
24 : #include "EbMeSadCalculation.h"
25 : #include "EbComputeMean_SSE2.h"
26 : #include "EbCombinedAveragingSAD_Intrinsic_AVX2.h"
27 :
28 : #define VARIANCE_PRECISION 16
29 : #define LCU_LOW_VAR_TH 5
30 : #define PIC_LOW_VAR_PERCENTAGE_TH 60
31 : #define FLAT_MAX_VAR 50
32 : #define FLAT_MAX_VAR_DECIM (50-00)
33 : #define NOISE_MIN_LEVEL 70000//120000
34 : #define NOISE_MIN_LEVEL_DECIM (70000+000000)//(120000+000000)
35 : #define NOISE_MIN_LEVEL_M6_M7 120000
36 : #define NOISE_MIN_LEVEL_DECIM_M6_M7 (120000+000000)
37 : #define DENOISER_QP_TH 29
38 : #define DENOISER_BITRATE_TH 14000000
39 : #define SAMPLE_THRESHOLD_PRECENT_BORDER_LINE 15
40 : #define SAMPLE_THRESHOLD_PRECENT_TWO_BORDER_LINES 10
41 :
42 8 : static void picture_analysis_context_dctor(EbPtr p)
43 : {
44 8 : PictureAnalysisContext *obj = (PictureAnalysisContext*)p;
45 8 : EB_DELETE(obj->noise_picture_ptr);
46 8 : EB_DELETE(obj->denoised_picture_ptr);
47 8 : }
48 : /************************************************
49 : * Picture Analysis Context Constructor
50 : ************************************************/
51 8 : EbErrorType picture_analysis_context_ctor(
52 : PictureAnalysisContext *context_ptr,
53 : EbPictureBufferDescInitData * input_picture_buffer_desc_init_data,
54 : EbBool denoise_flag,
55 : EbFifo *resource_coordination_results_input_fifo_ptr,
56 : EbFifo *picture_analysis_results_output_fifo_ptr)
57 : {
58 8 : context_ptr->resource_coordination_results_input_fifo_ptr = resource_coordination_results_input_fifo_ptr;
59 8 : context_ptr->picture_analysis_results_output_fifo_ptr = picture_analysis_results_output_fifo_ptr;
60 :
61 8 : context_ptr->dctor = picture_analysis_context_dctor;
62 :
63 8 : if (denoise_flag == EB_TRUE) {
64 : //denoised
65 : // If 420/422, re-use luma for chroma
66 : // If 444, re-use luma for Cr
67 8 : if (input_picture_buffer_desc_init_data->color_format != EB_YUV444) {
68 8 : input_picture_buffer_desc_init_data->buffer_enable_mask = PICTURE_BUFFER_DESC_Y_FLAG;
69 : } else
70 0 : input_picture_buffer_desc_init_data->buffer_enable_mask = PICTURE_BUFFER_DESC_Y_FLAG | PICTURE_BUFFER_DESC_Cb_FLAG;
71 8 : EB_NEW(
72 : context_ptr->denoised_picture_ptr,
73 : eb_picture_buffer_desc_ctor,
74 : (EbPtr)input_picture_buffer_desc_init_data);
75 :
76 8 : if (input_picture_buffer_desc_init_data->color_format != EB_YUV444) {
77 8 : context_ptr->denoised_picture_ptr->buffer_cb = context_ptr->denoised_picture_ptr->buffer_y;
78 8 : context_ptr->denoised_picture_ptr->buffer_cr = context_ptr->denoised_picture_ptr->buffer_y + context_ptr->denoised_picture_ptr->chroma_size;
79 : } else
80 0 : context_ptr->denoised_picture_ptr->buffer_cr = context_ptr->denoised_picture_ptr->buffer_y;
81 : // noise
82 8 : input_picture_buffer_desc_init_data->max_height = BLOCK_SIZE_64;
83 8 : input_picture_buffer_desc_init_data->buffer_enable_mask = PICTURE_BUFFER_DESC_Y_FLAG;
84 :
85 8 : EB_NEW(
86 : context_ptr->noise_picture_ptr,
87 : eb_picture_buffer_desc_ctor,
88 : (EbPtr)input_picture_buffer_desc_init_data);
89 : }
90 8 : return EB_ErrorNone;
91 : }
92 0 : void DownSampleChroma(EbPictureBufferDesc* input_picture_ptr, EbPictureBufferDesc* outputPicturePtr)
93 : {
94 0 : uint32_t input_color_format = input_picture_ptr->color_format;
95 0 : const uint16_t input_subsampling_x = (input_color_format == EB_YUV444 ? 1 : 2) - 1;
96 0 : const uint16_t input_subsampling_y = (input_color_format >= EB_YUV422 ? 1 : 2) - 1;
97 :
98 0 : uint32_t output_color_format = outputPicturePtr->color_format;
99 0 : const uint16_t output_subsampling_x = (output_color_format == EB_YUV444 ? 1 : 2) - 1;
100 0 : const uint16_t output_subsampling_y = (output_color_format >= EB_YUV422 ? 1 : 2) - 1;
101 :
102 : uint32_t stride_in, strideOut;
103 : uint32_t inputOriginIndex, outputOriginIndex;
104 :
105 : uint8_t *ptrIn;
106 : uint8_t *ptrOut;
107 :
108 : uint32_t ii, jj;
109 :
110 : //Cb
111 : {
112 0 : stride_in = input_picture_ptr->stride_cb;
113 0 : inputOriginIndex = (input_picture_ptr->origin_x >> input_subsampling_x) +
114 0 : (input_picture_ptr->origin_y >> input_subsampling_y) * input_picture_ptr->stride_cb;
115 0 : ptrIn = &(input_picture_ptr->buffer_cb[inputOriginIndex]);
116 :
117 0 : strideOut = outputPicturePtr->stride_cb;
118 0 : outputOriginIndex = (outputPicturePtr->origin_x >> output_subsampling_x) +
119 0 : (outputPicturePtr->origin_y >> output_subsampling_y) * outputPicturePtr->stride_cb;
120 0 : ptrOut = &(outputPicturePtr->buffer_cb[outputOriginIndex]);
121 :
122 0 : for (jj = 0; jj < (uint32_t)(outputPicturePtr->height >> output_subsampling_y); jj++) {
123 0 : for (ii = 0; ii < (uint32_t)(outputPicturePtr->width >> output_subsampling_x); ii++) {
124 0 : ptrOut[ii + jj * strideOut] =
125 0 : ptrIn[(ii << (1 - input_subsampling_x)) +
126 0 : (jj << (1 - input_subsampling_y)) * stride_in];
127 : }
128 : }
129 : }
130 :
131 : //Cr
132 : {
133 0 : stride_in = input_picture_ptr->stride_cr;
134 0 : inputOriginIndex = (input_picture_ptr->origin_x >> input_subsampling_x) + (input_picture_ptr->origin_y >> input_subsampling_y) * input_picture_ptr->stride_cr;
135 0 : ptrIn = &(input_picture_ptr->buffer_cr[inputOriginIndex]);
136 :
137 0 : strideOut = outputPicturePtr->stride_cr;
138 0 : outputOriginIndex = (outputPicturePtr->origin_x >> output_subsampling_x) + (outputPicturePtr->origin_y >> output_subsampling_y) * outputPicturePtr->stride_cr;
139 0 : ptrOut = &(outputPicturePtr->buffer_cr[outputOriginIndex]);
140 :
141 0 : for (jj = 0; jj < (uint32_t)(outputPicturePtr->height >> output_subsampling_y); jj++) {
142 0 : for (ii = 0; ii < (uint32_t)(outputPicturePtr->width >> output_subsampling_x); ii++) {
143 0 : ptrOut[ii + jj * strideOut] =
144 0 : ptrIn[(ii << (1 - input_subsampling_x)) +
145 0 : (jj << (1 - input_subsampling_y)) * stride_in];
146 : }
147 : }
148 : }
149 0 : }
150 :
151 : /************************************************
152 : * Picture Analysis Context Destructor
153 : ************************************************/
154 : /********************************************
155 : * decimation_2d
156 : * decimates the input
157 : ********************************************/
158 6155 : void decimation_2d(
159 : uint8_t * input_samples, // input parameter, input samples Ptr
160 : uint32_t input_stride, // input parameter, input stride
161 : uint32_t input_area_width, // input parameter, input area width
162 : uint32_t input_area_height, // input parameter, input area height
163 : uint8_t * decim_samples, // output parameter, decimated samples Ptr
164 : uint32_t decim_stride, // input parameter, output stride
165 : uint32_t decim_step) // input parameter, decimation amount in pixels
166 : {
167 : uint32_t horizontal_index;
168 : uint32_t vertical_index;
169 6155 : uint32_t input_stripe_stride = input_stride * decim_step;
170 :
171 134597 : for (vertical_index = 0; vertical_index < input_area_height; vertical_index += decim_step) {
172 9383800 : for (horizontal_index = 0; horizontal_index < input_area_width; horizontal_index += decim_step)
173 9255360 : decim_samples[(horizontal_index >> (decim_step >> 1))] = input_samples[horizontal_index];
174 :
175 128442 : input_samples += input_stripe_stride;
176 128442 : decim_samples += decim_stride;
177 : }
178 :
179 6155 : return;
180 : }
181 :
182 : /********************************************
183 : * downsample_2d
184 : * downsamples the input
185 : * Alternative implementation to decimation_2d that performs filtering (2x2, 0-phase)
186 : ********************************************/
187 128 : void downsample_2d(
188 : uint8_t * input_samples, // input parameter, input samples Ptr
189 : uint32_t input_stride, // input parameter, input stride
190 : uint32_t input_area_width, // input parameter, input area width
191 : uint32_t input_area_height, // input parameter, input area height
192 : uint8_t * decim_samples, // output parameter, decimated samples Ptr
193 : uint32_t decim_stride, // input parameter, output stride
194 : uint32_t decim_step) // input parameter, decimation amount in pixels
195 : {
196 :
197 : uint32_t horizontal_index;
198 : uint32_t vertical_index;
199 128 : uint32_t input_stripe_stride = input_stride * decim_step;
200 : uint32_t decim_horizontal_index;
201 128 : const uint32_t half_decim_step = decim_step >> 1;
202 :
203 17264 : for (input_samples += half_decim_step * input_stride, vertical_index = half_decim_step; vertical_index < input_area_height; vertical_index += decim_step) {
204 17136 : uint8_t *prev_input_line = input_samples - input_stride;
205 3804880 : for (horizontal_index = half_decim_step, decim_horizontal_index = 0; horizontal_index < input_area_width; horizontal_index += decim_step, decim_horizontal_index++) {
206 3787740 : uint32_t sum = (uint32_t)prev_input_line[horizontal_index - 1] + (uint32_t)prev_input_line[horizontal_index] + (uint32_t)input_samples[horizontal_index - 1] + (uint32_t)input_samples[horizontal_index];
207 3787740 : decim_samples[decim_horizontal_index] = (sum + 2) >> 2;
208 :
209 : }
210 17136 : input_samples += input_stripe_stride;
211 17136 : decim_samples += decim_stride;
212 : }
213 :
214 128 : return;
215 : }
216 :
217 : /********************************************
218 : * CalculateHistogram
219 : * creates n-bins histogram for the input
220 : ********************************************/
221 5756 : void CalculateHistogram(
222 : uint8_t * input_samples, // input parameter, input samples Ptr
223 : uint32_t input_area_width, // input parameter, input area width
224 : uint32_t input_area_height, // input parameter, input area height
225 : uint32_t stride, // input parameter, input stride
226 : uint8_t decim_step, // input parameter, area height
227 : uint32_t *histogram, // output parameter, output histogram
228 : uint64_t *sum)
229 : {
230 : uint32_t horizontal_index;
231 : uint32_t vertical_index;
232 5756 : *sum = 0;
233 :
234 91814 : for (vertical_index = 0; vertical_index < input_area_height; vertical_index += decim_step) {
235 2509550 : for (horizontal_index = 0; horizontal_index < input_area_width; horizontal_index += decim_step) {
236 2423490 : ++(histogram[input_samples[horizontal_index]]);
237 2423490 : *sum += input_samples[horizontal_index];
238 : }
239 86058 : input_samples += (stride << (decim_step >> 1));
240 : }
241 :
242 5756 : return;
243 : }
244 :
245 0 : uint64_t ComputeVariance32x32(
246 : EbPictureBufferDesc *input_padded_picture_ptr, // input parameter, Input Padded Picture
247 : uint32_t inputLumaOriginIndex, // input parameter, SB index, used to point to source/reference samples
248 : uint64_t *variance8x8) {
249 : uint32_t blockIndex;
250 :
251 : uint64_t mean_of8x8_blocks[16];
252 : uint64_t meanOf8x8SquaredValuesBlocks[16];
253 :
254 : uint64_t meanOf16x16Blocks[4];
255 : uint64_t meanOf16x16SquaredValuesBlocks[4];
256 :
257 : uint64_t meanOf32x32Blocks;
258 : uint64_t meanOf32x32SquaredValuesBlocks;
259 : /////////////////////////////////////////////
260 : // (0,0)
261 0 : blockIndex = inputLumaOriginIndex;
262 :
263 0 : mean_of8x8_blocks[0] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
264 0 : meanOf8x8SquaredValuesBlocks[0] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
265 :
266 : // (0,1)
267 0 : blockIndex = blockIndex + 8;
268 0 : mean_of8x8_blocks[1] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
269 0 : meanOf8x8SquaredValuesBlocks[1] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
270 :
271 : // (0,2)
272 0 : blockIndex = blockIndex + 8;
273 0 : mean_of8x8_blocks[2] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
274 0 : meanOf8x8SquaredValuesBlocks[2] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
275 :
276 : // (0,3)
277 0 : blockIndex = blockIndex + 8;
278 0 : mean_of8x8_blocks[3] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
279 0 : meanOf8x8SquaredValuesBlocks[3] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
280 :
281 : // (1,0)
282 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3);
283 0 : mean_of8x8_blocks[4] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
284 0 : meanOf8x8SquaredValuesBlocks[4] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
285 :
286 : // (1,1)
287 0 : blockIndex = blockIndex + 8;
288 0 : mean_of8x8_blocks[5] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
289 0 : meanOf8x8SquaredValuesBlocks[5] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
290 :
291 : // (1,2)
292 0 : blockIndex = blockIndex + 8;
293 0 : mean_of8x8_blocks[6] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
294 0 : meanOf8x8SquaredValuesBlocks[6] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
295 :
296 : // (1,3)
297 0 : blockIndex = blockIndex + 8;
298 0 : mean_of8x8_blocks[7] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
299 0 : meanOf8x8SquaredValuesBlocks[7] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
300 :
301 : // (2,0)
302 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 4);
303 0 : mean_of8x8_blocks[8] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
304 0 : meanOf8x8SquaredValuesBlocks[8] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
305 :
306 : // (2,1)
307 0 : blockIndex = blockIndex + 8;
308 0 : mean_of8x8_blocks[9] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
309 0 : meanOf8x8SquaredValuesBlocks[9] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
310 :
311 : // (2,2)
312 0 : blockIndex = blockIndex + 8;
313 0 : mean_of8x8_blocks[10] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
314 0 : meanOf8x8SquaredValuesBlocks[10] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
315 :
316 : // (2,3)
317 0 : blockIndex = blockIndex + 8;
318 0 : mean_of8x8_blocks[11] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
319 0 : meanOf8x8SquaredValuesBlocks[11] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
320 :
321 : // (3,0)
322 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3) + (input_padded_picture_ptr->stride_y << 4);
323 0 : mean_of8x8_blocks[12] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
324 0 : meanOf8x8SquaredValuesBlocks[12] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
325 :
326 : // (3,1)
327 0 : blockIndex = blockIndex + 8;
328 0 : mean_of8x8_blocks[13] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
329 0 : meanOf8x8SquaredValuesBlocks[13] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
330 :
331 : // (3,2)
332 0 : blockIndex = blockIndex + 8;
333 0 : mean_of8x8_blocks[14] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
334 0 : meanOf8x8SquaredValuesBlocks[14] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
335 :
336 : // (3,3)
337 0 : blockIndex = blockIndex + 8;
338 0 : mean_of8x8_blocks[15] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
339 0 : meanOf8x8SquaredValuesBlocks[15] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
340 :
341 : /////////////////////////////////////////////
342 :
343 0 : variance8x8[0] = meanOf8x8SquaredValuesBlocks[0] - (mean_of8x8_blocks[0] * mean_of8x8_blocks[0]);
344 0 : variance8x8[1] = meanOf8x8SquaredValuesBlocks[1] - (mean_of8x8_blocks[1] * mean_of8x8_blocks[1]);
345 0 : variance8x8[2] = meanOf8x8SquaredValuesBlocks[2] - (mean_of8x8_blocks[2] * mean_of8x8_blocks[2]);
346 0 : variance8x8[3] = meanOf8x8SquaredValuesBlocks[3] - (mean_of8x8_blocks[3] * mean_of8x8_blocks[3]);
347 0 : variance8x8[4] = meanOf8x8SquaredValuesBlocks[4] - (mean_of8x8_blocks[4] * mean_of8x8_blocks[4]);
348 0 : variance8x8[5] = meanOf8x8SquaredValuesBlocks[5] - (mean_of8x8_blocks[5] * mean_of8x8_blocks[5]);
349 0 : variance8x8[6] = meanOf8x8SquaredValuesBlocks[6] - (mean_of8x8_blocks[6] * mean_of8x8_blocks[6]);
350 0 : variance8x8[7] = meanOf8x8SquaredValuesBlocks[7] - (mean_of8x8_blocks[7] * mean_of8x8_blocks[7]);
351 0 : variance8x8[8] = meanOf8x8SquaredValuesBlocks[8] - (mean_of8x8_blocks[8] * mean_of8x8_blocks[8]);
352 0 : variance8x8[9] = meanOf8x8SquaredValuesBlocks[9] - (mean_of8x8_blocks[9] * mean_of8x8_blocks[9]);
353 0 : variance8x8[10] = meanOf8x8SquaredValuesBlocks[10] - (mean_of8x8_blocks[10] * mean_of8x8_blocks[10]);
354 0 : variance8x8[11] = meanOf8x8SquaredValuesBlocks[11] - (mean_of8x8_blocks[11] * mean_of8x8_blocks[11]);
355 0 : variance8x8[12] = meanOf8x8SquaredValuesBlocks[12] - (mean_of8x8_blocks[12] * mean_of8x8_blocks[12]);
356 0 : variance8x8[13] = meanOf8x8SquaredValuesBlocks[13] - (mean_of8x8_blocks[13] * mean_of8x8_blocks[13]);
357 0 : variance8x8[14] = meanOf8x8SquaredValuesBlocks[14] - (mean_of8x8_blocks[14] * mean_of8x8_blocks[14]);
358 0 : variance8x8[15] = meanOf8x8SquaredValuesBlocks[15] - (mean_of8x8_blocks[15] * mean_of8x8_blocks[15]);
359 :
360 : // 16x16
361 0 : meanOf16x16Blocks[0] = (mean_of8x8_blocks[0] + mean_of8x8_blocks[1] + mean_of8x8_blocks[8] + mean_of8x8_blocks[9]) >> 2;
362 0 : meanOf16x16Blocks[1] = (mean_of8x8_blocks[2] + mean_of8x8_blocks[3] + mean_of8x8_blocks[10] + mean_of8x8_blocks[11]) >> 2;
363 0 : meanOf16x16Blocks[2] = (mean_of8x8_blocks[4] + mean_of8x8_blocks[5] + mean_of8x8_blocks[12] + mean_of8x8_blocks[13]) >> 2;
364 0 : meanOf16x16Blocks[3] = (mean_of8x8_blocks[6] + mean_of8x8_blocks[7] + mean_of8x8_blocks[14] + mean_of8x8_blocks[15]) >> 2;
365 :
366 0 : meanOf16x16SquaredValuesBlocks[0] = (meanOf8x8SquaredValuesBlocks[0] + meanOf8x8SquaredValuesBlocks[1] + meanOf8x8SquaredValuesBlocks[8] + meanOf8x8SquaredValuesBlocks[9]) >> 2;
367 0 : meanOf16x16SquaredValuesBlocks[1] = (meanOf8x8SquaredValuesBlocks[2] + meanOf8x8SquaredValuesBlocks[3] + meanOf8x8SquaredValuesBlocks[10] + meanOf8x8SquaredValuesBlocks[11]) >> 2;
368 0 : meanOf16x16SquaredValuesBlocks[2] = (meanOf8x8SquaredValuesBlocks[4] + meanOf8x8SquaredValuesBlocks[5] + meanOf8x8SquaredValuesBlocks[12] + meanOf8x8SquaredValuesBlocks[13]) >> 2;
369 0 : meanOf16x16SquaredValuesBlocks[3] = (meanOf8x8SquaredValuesBlocks[6] + meanOf8x8SquaredValuesBlocks[7] + meanOf8x8SquaredValuesBlocks[14] + meanOf8x8SquaredValuesBlocks[15]) >> 2;
370 :
371 : // 32x32
372 0 : meanOf32x32Blocks = (meanOf16x16Blocks[0] + meanOf16x16Blocks[1] + meanOf16x16Blocks[2] + meanOf16x16Blocks[3]) >> 2;
373 :
374 0 : meanOf32x32SquaredValuesBlocks = (meanOf16x16SquaredValuesBlocks[0] + meanOf16x16SquaredValuesBlocks[1] + meanOf16x16SquaredValuesBlocks[2] + meanOf16x16SquaredValuesBlocks[3]) >> 2;
375 :
376 0 : return (meanOf32x32SquaredValuesBlocks - (meanOf32x32Blocks * meanOf32x32Blocks));
377 : }
378 :
379 0 : uint64_t ComputeVariance16x16(
380 : EbPictureBufferDesc *input_padded_picture_ptr, // input parameter, Input Padded Picture
381 : uint32_t inputLumaOriginIndex, // input parameter, SB index, used to point to source/reference samples
382 : uint64_t *variance8x8)
383 : {
384 : uint32_t blockIndex;
385 :
386 : uint64_t mean_of8x8_blocks[4];
387 : uint64_t meanOf8x8SquaredValuesBlocks[4];
388 :
389 : uint64_t meanOf16x16Blocks;
390 : uint64_t meanOf16x16SquaredValuesBlocks;
391 :
392 : // (0,0)
393 0 : blockIndex = inputLumaOriginIndex;
394 :
395 0 : mean_of8x8_blocks[0] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
396 0 : meanOf8x8SquaredValuesBlocks[0] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
397 :
398 : // (0,1)
399 0 : blockIndex = blockIndex + 8;
400 0 : mean_of8x8_blocks[1] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
401 0 : meanOf8x8SquaredValuesBlocks[1] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
402 :
403 : // (1,0)
404 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3);
405 0 : mean_of8x8_blocks[2] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
406 0 : meanOf8x8SquaredValuesBlocks[2] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
407 :
408 : // (1,1)
409 0 : blockIndex = blockIndex + 8;
410 0 : mean_of8x8_blocks[3] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
411 0 : meanOf8x8SquaredValuesBlocks[3] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
412 :
413 0 : variance8x8[0] = meanOf8x8SquaredValuesBlocks[0] - (mean_of8x8_blocks[0] * mean_of8x8_blocks[0]);
414 0 : variance8x8[1] = meanOf8x8SquaredValuesBlocks[1] - (mean_of8x8_blocks[1] * mean_of8x8_blocks[1]);
415 0 : variance8x8[2] = meanOf8x8SquaredValuesBlocks[2] - (mean_of8x8_blocks[2] * mean_of8x8_blocks[2]);
416 0 : variance8x8[3] = meanOf8x8SquaredValuesBlocks[3] - (mean_of8x8_blocks[3] * mean_of8x8_blocks[3]);
417 :
418 : // 16x16
419 0 : meanOf16x16Blocks = (mean_of8x8_blocks[0] + mean_of8x8_blocks[1] + mean_of8x8_blocks[2] + mean_of8x8_blocks[3]) >> 2;
420 0 : meanOf16x16SquaredValuesBlocks = (meanOf8x8SquaredValuesBlocks[0] + meanOf8x8SquaredValuesBlocks[1] + meanOf8x8SquaredValuesBlocks[2] + meanOf8x8SquaredValuesBlocks[3]) >> 2;
421 :
422 0 : return (meanOf16x16SquaredValuesBlocks - (meanOf16x16Blocks * meanOf16x16Blocks));
423 : }
424 :
425 0 : uint64_t compute_sub_mean_c(
426 : uint8_t* input_samples, /**< input parameter, input samples Ptr */
427 : uint32_t input_stride, /**< input parameter, input stride */
428 : uint32_t input_area_width, /**< input parameter, input area width */
429 : uint32_t input_area_height) /**< input parameter, input area height */
430 : {
431 : uint32_t hi, vi;
432 0 : uint64_t block_mean = 0;
433 0 : uint16_t skip = 0;
434 :
435 0 : for (vi = 0; skip < input_area_height; skip = vi + vi) {
436 0 : for (hi = 0; hi < input_area_width; hi++) {
437 0 : block_mean += input_samples[hi];
438 : }
439 0 : input_samples += 2 * input_stride;
440 0 : vi++;
441 : }
442 :
443 0 : block_mean = block_mean << 3; // (VARIANCE_PRECISION >> 1)) /
444 : // (input_area_width * input_area_height/2)
445 :
446 0 : return block_mean;
447 : }
448 :
449 0 : uint64_t compute_sub_mean_squared_values_c(
450 : uint8_t* input_samples, /**< input parameter, input samples Ptr */
451 : uint32_t input_stride, /**< input parameter, input stride */
452 : uint32_t input_area_width, /**< input parameter, input area width */
453 : uint32_t input_area_height) /**< input parameter, input area height */
454 : {
455 : uint32_t hi, vi;
456 0 : uint64_t block_mean = 0;
457 0 : uint16_t skip = 0;
458 :
459 0 : for (vi = 0; skip < input_area_height; skip = vi + vi) {
460 0 : for (hi = 0; hi < input_area_width; hi++) {
461 0 : block_mean += input_samples[hi] * input_samples[hi];
462 : }
463 0 : input_samples += 2 * input_stride;
464 0 : vi++;
465 : }
466 :
467 0 : block_mean =
468 : block_mean
469 : << 11; // VARIANCE_PRECISION) / (input_area_width * input_area_height);
470 :
471 0 : return block_mean;
472 : }
473 :
474 0 : void compute_interm_var_four8x8_c(
475 : uint8_t * input_samples,
476 : uint16_t input_stride,
477 : uint64_t * mean_of8x8_blocks, // mean of four 8x8
478 : uint64_t * mean_of_squared8x8_blocks) // meanSquared
479 : {
480 0 : uint32_t blockIndex = 0;
481 : // (0,1)
482 0 : mean_of8x8_blocks[0] = compute_sub_mean_c(
483 : input_samples + blockIndex, input_stride, 8, 8);
484 0 : mean_of_squared8x8_blocks[0] = compute_sub_mean_squared_values_c(input_samples + blockIndex, input_stride, 8, 8);
485 :
486 : // (0,2)
487 0 : blockIndex = blockIndex + 8;
488 0 : mean_of8x8_blocks[1] = compute_sub_mean_c(input_samples + blockIndex, input_stride, 8, 8);
489 0 : mean_of_squared8x8_blocks[1] = compute_sub_mean_squared_values_c(input_samples + blockIndex, input_stride, 8, 8);
490 :
491 : // (0,3)
492 0 : blockIndex = blockIndex + 8;
493 0 : mean_of8x8_blocks[2] = compute_sub_mean_c(input_samples + blockIndex, input_stride, 8, 8);
494 0 : mean_of_squared8x8_blocks[2] = compute_sub_mean_squared_values_c(input_samples + blockIndex, input_stride, 8, 8);
495 :
496 : // (0,4)
497 0 : blockIndex = blockIndex + 8;
498 0 : mean_of8x8_blocks[3] = compute_sub_mean_c(input_samples + blockIndex, input_stride, 8, 8);
499 0 : mean_of_squared8x8_blocks[3] = compute_sub_mean_squared_values_c(input_samples + blockIndex, input_stride, 8, 8);
500 0 : }
501 :
502 : /*******************************************
503 : ComputeVariance64x64
504 : this function is exactly same as
505 : PictureAnalysisComputeVarianceLcu excpet it
506 : does not store data for every block,
507 : just returns the 64x64 data point
508 : *******************************************/
509 0 : uint64_t ComputeVariance64x64(
510 : SequenceControlSet *sequence_control_set_ptr,
511 : EbPictureBufferDesc *input_padded_picture_ptr, // input parameter, Input Padded Picture
512 : uint32_t inputLumaOriginIndex, // input parameter, SB index, used to point to source/reference samples
513 : uint64_t *variance32x32)
514 : {
515 : uint32_t blockIndex;
516 :
517 : uint64_t mean_of8x8_blocks[64];
518 : uint64_t meanOf8x8SquaredValuesBlocks[64];
519 :
520 : uint64_t meanOf16x16Blocks[16];
521 : uint64_t meanOf16x16SquaredValuesBlocks[16];
522 :
523 : uint64_t meanOf32x32Blocks[4];
524 : uint64_t meanOf32x32SquaredValuesBlocks[4];
525 :
526 : uint64_t meanOf64x64Blocks;
527 : uint64_t meanOf64x64SquaredValuesBlocks;
528 :
529 : // (0,0)
530 0 : blockIndex = inputLumaOriginIndex;
531 0 : const uint16_t stride_y = input_padded_picture_ptr->stride_y;
532 0 : if (sequence_control_set_ptr->block_mean_calc_prec == BLOCK_MEAN_PREC_FULL) {
533 0 : mean_of8x8_blocks[0] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
534 0 : meanOf8x8SquaredValuesBlocks[0] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
535 :
536 : // (0,1)
537 0 : blockIndex = blockIndex + 8;
538 0 : mean_of8x8_blocks[1] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
539 0 : meanOf8x8SquaredValuesBlocks[1] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
540 :
541 : // (0,2)
542 0 : blockIndex = blockIndex + 8;
543 0 : mean_of8x8_blocks[2] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
544 0 : meanOf8x8SquaredValuesBlocks[2] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
545 :
546 : // (0,3)
547 0 : blockIndex = blockIndex + 8;
548 0 : mean_of8x8_blocks[3] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
549 0 : meanOf8x8SquaredValuesBlocks[3] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
550 :
551 : // (0,4)
552 0 : blockIndex = blockIndex + 8;
553 0 : mean_of8x8_blocks[4] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
554 0 : meanOf8x8SquaredValuesBlocks[4] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
555 :
556 : // (0,5)
557 0 : blockIndex = blockIndex + 8;
558 0 : mean_of8x8_blocks[5] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
559 0 : meanOf8x8SquaredValuesBlocks[5] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
560 :
561 : // (0,6)
562 0 : blockIndex = blockIndex + 8;
563 0 : mean_of8x8_blocks[6] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
564 0 : meanOf8x8SquaredValuesBlocks[6] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
565 :
566 : // (0,7)
567 0 : blockIndex = blockIndex + 8;
568 0 : mean_of8x8_blocks[7] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
569 0 : meanOf8x8SquaredValuesBlocks[7] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
570 :
571 : // (1,0)
572 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3);
573 0 : mean_of8x8_blocks[8] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
574 0 : meanOf8x8SquaredValuesBlocks[8] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
575 :
576 : // (1,1)
577 0 : blockIndex = blockIndex + 8;
578 0 : mean_of8x8_blocks[9] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
579 0 : meanOf8x8SquaredValuesBlocks[9] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
580 :
581 : // (1,2)
582 0 : blockIndex = blockIndex + 8;
583 0 : mean_of8x8_blocks[10] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
584 0 : meanOf8x8SquaredValuesBlocks[10] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
585 :
586 : // (1,3)
587 0 : blockIndex = blockIndex + 8;
588 0 : mean_of8x8_blocks[11] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
589 0 : meanOf8x8SquaredValuesBlocks[11] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
590 :
591 : // (1,4)
592 0 : blockIndex = blockIndex + 8;
593 0 : mean_of8x8_blocks[12] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
594 0 : meanOf8x8SquaredValuesBlocks[12] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
595 :
596 : // (1,5)
597 0 : blockIndex = blockIndex + 8;
598 0 : mean_of8x8_blocks[13] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
599 0 : meanOf8x8SquaredValuesBlocks[13] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
600 :
601 : // (1,6)
602 0 : blockIndex = blockIndex + 8;
603 0 : mean_of8x8_blocks[14] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
604 0 : meanOf8x8SquaredValuesBlocks[14] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
605 :
606 : // (1,7)
607 0 : blockIndex = blockIndex + 8;
608 0 : mean_of8x8_blocks[15] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
609 0 : meanOf8x8SquaredValuesBlocks[15] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
610 :
611 : // (2,0)
612 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 4);
613 0 : mean_of8x8_blocks[16] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
614 0 : meanOf8x8SquaredValuesBlocks[16] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
615 :
616 : // (2,1)
617 0 : blockIndex = blockIndex + 8;
618 0 : mean_of8x8_blocks[17] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
619 0 : meanOf8x8SquaredValuesBlocks[17] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
620 :
621 : // (2,2)
622 0 : blockIndex = blockIndex + 8;
623 0 : mean_of8x8_blocks[18] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
624 0 : meanOf8x8SquaredValuesBlocks[18] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
625 :
626 : // (2,3)
627 0 : blockIndex = blockIndex + 8;
628 0 : mean_of8x8_blocks[19] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
629 0 : meanOf8x8SquaredValuesBlocks[19] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
630 :
631 : /// (2,4)
632 0 : blockIndex = blockIndex + 8;
633 0 : mean_of8x8_blocks[20] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
634 0 : meanOf8x8SquaredValuesBlocks[20] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
635 :
636 : // (2,5)
637 0 : blockIndex = blockIndex + 8;
638 0 : mean_of8x8_blocks[21] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
639 0 : meanOf8x8SquaredValuesBlocks[21] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
640 :
641 : // (2,6)
642 0 : blockIndex = blockIndex + 8;
643 0 : mean_of8x8_blocks[22] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
644 0 : meanOf8x8SquaredValuesBlocks[22] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
645 :
646 : // (2,7)
647 0 : blockIndex = blockIndex + 8;
648 0 : mean_of8x8_blocks[23] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
649 0 : meanOf8x8SquaredValuesBlocks[23] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
650 :
651 : // (3,0)
652 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3) + (input_padded_picture_ptr->stride_y << 4);
653 0 : mean_of8x8_blocks[24] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
654 0 : meanOf8x8SquaredValuesBlocks[24] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
655 :
656 : // (3,1)
657 0 : blockIndex = blockIndex + 8;
658 0 : mean_of8x8_blocks[25] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
659 0 : meanOf8x8SquaredValuesBlocks[25] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
660 :
661 : // (3,2)
662 0 : blockIndex = blockIndex + 8;
663 0 : mean_of8x8_blocks[26] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
664 0 : meanOf8x8SquaredValuesBlocks[26] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
665 :
666 : // (3,3)
667 0 : blockIndex = blockIndex + 8;
668 0 : mean_of8x8_blocks[27] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
669 0 : meanOf8x8SquaredValuesBlocks[27] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
670 :
671 : // (3,4)
672 0 : blockIndex = blockIndex + 8;
673 0 : mean_of8x8_blocks[28] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
674 0 : meanOf8x8SquaredValuesBlocks[28] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
675 :
676 : // (3,5)
677 0 : blockIndex = blockIndex + 8;
678 0 : mean_of8x8_blocks[29] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
679 0 : meanOf8x8SquaredValuesBlocks[29] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
680 :
681 : // (3,6)
682 0 : blockIndex = blockIndex + 8;
683 0 : mean_of8x8_blocks[30] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
684 0 : meanOf8x8SquaredValuesBlocks[30] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
685 :
686 : // (3,7)
687 0 : blockIndex = blockIndex + 8;
688 0 : mean_of8x8_blocks[31] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
689 0 : meanOf8x8SquaredValuesBlocks[31] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
690 :
691 : // (4,0)
692 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 5);
693 0 : mean_of8x8_blocks[32] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
694 0 : meanOf8x8SquaredValuesBlocks[32] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
695 :
696 : // (4,1)
697 0 : blockIndex = blockIndex + 8;
698 0 : mean_of8x8_blocks[33] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
699 0 : meanOf8x8SquaredValuesBlocks[33] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
700 :
701 : // (4,2)
702 0 : blockIndex = blockIndex + 8;
703 0 : mean_of8x8_blocks[34] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
704 0 : meanOf8x8SquaredValuesBlocks[34] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
705 :
706 : // (4,3)
707 0 : blockIndex = blockIndex + 8;
708 0 : mean_of8x8_blocks[35] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
709 0 : meanOf8x8SquaredValuesBlocks[35] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
710 :
711 : // (4,4)
712 0 : blockIndex = blockIndex + 8;
713 0 : mean_of8x8_blocks[36] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
714 0 : meanOf8x8SquaredValuesBlocks[36] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
715 :
716 : // (4,5)
717 0 : blockIndex = blockIndex + 8;
718 0 : mean_of8x8_blocks[37] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
719 0 : meanOf8x8SquaredValuesBlocks[37] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
720 :
721 : // (4,6)
722 0 : blockIndex = blockIndex + 8;
723 0 : mean_of8x8_blocks[38] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
724 0 : meanOf8x8SquaredValuesBlocks[38] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
725 :
726 : // (4,7)
727 0 : blockIndex = blockIndex + 8;
728 0 : mean_of8x8_blocks[39] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
729 0 : meanOf8x8SquaredValuesBlocks[39] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
730 :
731 : // (5,0)
732 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3) + (input_padded_picture_ptr->stride_y << 5);
733 0 : mean_of8x8_blocks[40] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
734 0 : meanOf8x8SquaredValuesBlocks[40] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
735 :
736 : // (5,1)
737 0 : blockIndex = blockIndex + 8;
738 0 : mean_of8x8_blocks[41] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
739 0 : meanOf8x8SquaredValuesBlocks[41] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
740 :
741 : // (5,2)
742 0 : blockIndex = blockIndex + 8;
743 0 : mean_of8x8_blocks[42] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
744 0 : meanOf8x8SquaredValuesBlocks[42] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
745 :
746 : // (5,3)
747 0 : blockIndex = blockIndex + 8;
748 0 : mean_of8x8_blocks[43] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
749 0 : meanOf8x8SquaredValuesBlocks[43] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
750 :
751 : // (5,4)
752 0 : blockIndex = blockIndex + 8;
753 0 : mean_of8x8_blocks[44] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
754 0 : meanOf8x8SquaredValuesBlocks[44] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
755 :
756 : // (5,5)
757 0 : blockIndex = blockIndex + 8;
758 0 : mean_of8x8_blocks[45] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
759 0 : meanOf8x8SquaredValuesBlocks[45] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
760 :
761 : // (5,6)
762 0 : blockIndex = blockIndex + 8;
763 0 : mean_of8x8_blocks[46] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
764 0 : meanOf8x8SquaredValuesBlocks[46] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
765 :
766 : // (5,7)
767 0 : blockIndex = blockIndex + 8;
768 0 : mean_of8x8_blocks[47] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
769 0 : meanOf8x8SquaredValuesBlocks[47] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
770 :
771 : // (6,0)
772 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 4) + (input_padded_picture_ptr->stride_y << 5);
773 0 : mean_of8x8_blocks[48] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
774 0 : meanOf8x8SquaredValuesBlocks[48] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
775 :
776 : // (6,1)
777 0 : blockIndex = blockIndex + 8;
778 0 : mean_of8x8_blocks[49] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
779 0 : meanOf8x8SquaredValuesBlocks[49] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
780 :
781 : // (6,2)
782 0 : blockIndex = blockIndex + 8;
783 0 : mean_of8x8_blocks[50] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
784 0 : meanOf8x8SquaredValuesBlocks[50] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
785 :
786 : // (6,3)
787 0 : blockIndex = blockIndex + 8;
788 0 : mean_of8x8_blocks[51] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
789 0 : meanOf8x8SquaredValuesBlocks[51] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
790 :
791 : // (6,4)
792 0 : blockIndex = blockIndex + 8;
793 0 : mean_of8x8_blocks[52] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
794 0 : meanOf8x8SquaredValuesBlocks[52] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
795 :
796 : // (6,5)
797 0 : blockIndex = blockIndex + 8;
798 0 : mean_of8x8_blocks[53] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
799 0 : meanOf8x8SquaredValuesBlocks[53] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
800 :
801 : // (6,6)
802 0 : blockIndex = blockIndex + 8;
803 0 : mean_of8x8_blocks[54] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
804 0 : meanOf8x8SquaredValuesBlocks[54] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
805 :
806 : // (6,7)
807 0 : blockIndex = blockIndex + 8;
808 0 : mean_of8x8_blocks[55] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
809 0 : meanOf8x8SquaredValuesBlocks[55] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
810 :
811 : // (7,0)
812 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3) + (input_padded_picture_ptr->stride_y << 4) + (input_padded_picture_ptr->stride_y << 5);
813 0 : mean_of8x8_blocks[56] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
814 0 : meanOf8x8SquaredValuesBlocks[56] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
815 :
816 : // (7,1)
817 0 : blockIndex = blockIndex + 8;
818 0 : mean_of8x8_blocks[57] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
819 0 : meanOf8x8SquaredValuesBlocks[57] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
820 :
821 : // (7,2)
822 0 : blockIndex = blockIndex + 8;
823 0 : mean_of8x8_blocks[58] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
824 0 : meanOf8x8SquaredValuesBlocks[58] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
825 :
826 : // (7,3)
827 0 : blockIndex = blockIndex + 8;
828 0 : mean_of8x8_blocks[59] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
829 0 : meanOf8x8SquaredValuesBlocks[59] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
830 :
831 : // (7,4)
832 0 : blockIndex = blockIndex + 8;
833 0 : mean_of8x8_blocks[60] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
834 0 : meanOf8x8SquaredValuesBlocks[60] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
835 :
836 : // (7,5)
837 0 : blockIndex = blockIndex + 8;
838 0 : mean_of8x8_blocks[61] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
839 0 : meanOf8x8SquaredValuesBlocks[61] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
840 :
841 : // (7,6)
842 0 : blockIndex = blockIndex + 8;
843 0 : mean_of8x8_blocks[62] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
844 0 : meanOf8x8SquaredValuesBlocks[62] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
845 :
846 : // (7,7)
847 0 : blockIndex = blockIndex + 8;
848 0 : mean_of8x8_blocks[63] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
849 0 : meanOf8x8SquaredValuesBlocks[63] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
850 : }
851 :
852 : else {
853 :
854 0 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[0], &meanOf8x8SquaredValuesBlocks[0]);
855 :
856 : // (0,1)
857 0 : blockIndex = blockIndex + 32;
858 :
859 0 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[4], &meanOf8x8SquaredValuesBlocks[4]);
860 : // (0,5)
861 0 : blockIndex = blockIndex + 24;
862 :
863 : // (1,0)
864 0 : blockIndex = inputLumaOriginIndex + (stride_y << 3);
865 :
866 0 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[8], &meanOf8x8SquaredValuesBlocks[8]);
867 :
868 : // (1,1)
869 0 : blockIndex = blockIndex + 32;
870 :
871 0 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[12], &meanOf8x8SquaredValuesBlocks[12]);
872 :
873 : // (1,5)
874 0 : blockIndex = blockIndex + 24;
875 :
876 : // (2,0)
877 0 : blockIndex = inputLumaOriginIndex + (stride_y << 4);
878 :
879 0 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[16], &meanOf8x8SquaredValuesBlocks[16]);
880 :
881 : // (2,1)
882 0 : blockIndex = blockIndex + 32;
883 :
884 0 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[20], &meanOf8x8SquaredValuesBlocks[20]);
885 :
886 : // (2,5)
887 0 : blockIndex = blockIndex + 24;
888 :
889 : // (3,0)
890 0 : blockIndex = inputLumaOriginIndex + (stride_y << 3) + (stride_y << 4);
891 :
892 0 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[24], &meanOf8x8SquaredValuesBlocks[24]);
893 :
894 : // (3,1)
895 0 : blockIndex = blockIndex + 32;
896 :
897 0 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[28], &meanOf8x8SquaredValuesBlocks[28]);
898 :
899 : // (3,5)
900 0 : blockIndex = blockIndex + 24;
901 :
902 : // (4,0)
903 0 : blockIndex = inputLumaOriginIndex + (stride_y << 5);
904 :
905 0 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[32], &meanOf8x8SquaredValuesBlocks[32]);
906 :
907 : // (4,1)
908 0 : blockIndex = blockIndex + 32;
909 :
910 0 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[36], &meanOf8x8SquaredValuesBlocks[36]);
911 :
912 : // (4,5)
913 0 : blockIndex = blockIndex + 24;
914 :
915 : // (5,0)
916 0 : blockIndex = inputLumaOriginIndex + (stride_y << 3) + (stride_y << 5);
917 :
918 0 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[40], &meanOf8x8SquaredValuesBlocks[40]);
919 :
920 : // (5,1)
921 0 : blockIndex = blockIndex + 32;
922 :
923 0 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[44], &meanOf8x8SquaredValuesBlocks[44]);
924 :
925 : // (5,5)
926 0 : blockIndex = blockIndex + 24;
927 :
928 : // (6,0)
929 0 : blockIndex = inputLumaOriginIndex + (stride_y << 4) + (stride_y << 5);
930 :
931 0 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[48], &meanOf8x8SquaredValuesBlocks[48]);
932 :
933 : // (6,1)
934 0 : blockIndex = blockIndex + 32;
935 :
936 0 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[52], &meanOf8x8SquaredValuesBlocks[52]);
937 :
938 : // (6,5)
939 0 : blockIndex = blockIndex + 24;
940 :
941 : // (7,0)
942 0 : blockIndex = inputLumaOriginIndex + (stride_y << 3) + (stride_y << 4) + (stride_y << 5);
943 :
944 0 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[56], &meanOf8x8SquaredValuesBlocks[56]);
945 :
946 : // (7,1)
947 0 : blockIndex = blockIndex + 32;
948 :
949 0 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[60], &meanOf8x8SquaredValuesBlocks[60]);
950 :
951 : }
952 :
953 : // 16x16
954 0 : meanOf16x16Blocks[0] = (mean_of8x8_blocks[0] + mean_of8x8_blocks[1] + mean_of8x8_blocks[8] + mean_of8x8_blocks[9]) >> 2;
955 0 : meanOf16x16Blocks[1] = (mean_of8x8_blocks[2] + mean_of8x8_blocks[3] + mean_of8x8_blocks[10] + mean_of8x8_blocks[11]) >> 2;
956 0 : meanOf16x16Blocks[2] = (mean_of8x8_blocks[4] + mean_of8x8_blocks[5] + mean_of8x8_blocks[12] + mean_of8x8_blocks[13]) >> 2;
957 0 : meanOf16x16Blocks[3] = (mean_of8x8_blocks[6] + mean_of8x8_blocks[7] + mean_of8x8_blocks[14] + mean_of8x8_blocks[15]) >> 2;
958 :
959 0 : meanOf16x16Blocks[4] = (mean_of8x8_blocks[16] + mean_of8x8_blocks[17] + mean_of8x8_blocks[24] + mean_of8x8_blocks[25]) >> 2;
960 0 : meanOf16x16Blocks[5] = (mean_of8x8_blocks[18] + mean_of8x8_blocks[19] + mean_of8x8_blocks[26] + mean_of8x8_blocks[27]) >> 2;
961 0 : meanOf16x16Blocks[6] = (mean_of8x8_blocks[20] + mean_of8x8_blocks[21] + mean_of8x8_blocks[28] + mean_of8x8_blocks[29]) >> 2;
962 0 : meanOf16x16Blocks[7] = (mean_of8x8_blocks[22] + mean_of8x8_blocks[23] + mean_of8x8_blocks[30] + mean_of8x8_blocks[31]) >> 2;
963 :
964 0 : meanOf16x16Blocks[8] = (mean_of8x8_blocks[32] + mean_of8x8_blocks[33] + mean_of8x8_blocks[40] + mean_of8x8_blocks[41]) >> 2;
965 0 : meanOf16x16Blocks[9] = (mean_of8x8_blocks[34] + mean_of8x8_blocks[35] + mean_of8x8_blocks[42] + mean_of8x8_blocks[43]) >> 2;
966 0 : meanOf16x16Blocks[10] = (mean_of8x8_blocks[36] + mean_of8x8_blocks[37] + mean_of8x8_blocks[44] + mean_of8x8_blocks[45]) >> 2;
967 0 : meanOf16x16Blocks[11] = (mean_of8x8_blocks[38] + mean_of8x8_blocks[39] + mean_of8x8_blocks[46] + mean_of8x8_blocks[47]) >> 2;
968 :
969 0 : meanOf16x16Blocks[12] = (mean_of8x8_blocks[48] + mean_of8x8_blocks[49] + mean_of8x8_blocks[56] + mean_of8x8_blocks[57]) >> 2;
970 0 : meanOf16x16Blocks[13] = (mean_of8x8_blocks[50] + mean_of8x8_blocks[51] + mean_of8x8_blocks[58] + mean_of8x8_blocks[59]) >> 2;
971 0 : meanOf16x16Blocks[14] = (mean_of8x8_blocks[52] + mean_of8x8_blocks[53] + mean_of8x8_blocks[60] + mean_of8x8_blocks[61]) >> 2;
972 0 : meanOf16x16Blocks[15] = (mean_of8x8_blocks[54] + mean_of8x8_blocks[55] + mean_of8x8_blocks[62] + mean_of8x8_blocks[63]) >> 2;
973 :
974 0 : meanOf16x16SquaredValuesBlocks[0] = (meanOf8x8SquaredValuesBlocks[0] + meanOf8x8SquaredValuesBlocks[1] + meanOf8x8SquaredValuesBlocks[8] + meanOf8x8SquaredValuesBlocks[9]) >> 2;
975 0 : meanOf16x16SquaredValuesBlocks[1] = (meanOf8x8SquaredValuesBlocks[2] + meanOf8x8SquaredValuesBlocks[3] + meanOf8x8SquaredValuesBlocks[10] + meanOf8x8SquaredValuesBlocks[11]) >> 2;
976 0 : meanOf16x16SquaredValuesBlocks[2] = (meanOf8x8SquaredValuesBlocks[4] + meanOf8x8SquaredValuesBlocks[5] + meanOf8x8SquaredValuesBlocks[12] + meanOf8x8SquaredValuesBlocks[13]) >> 2;
977 0 : meanOf16x16SquaredValuesBlocks[3] = (meanOf8x8SquaredValuesBlocks[6] + meanOf8x8SquaredValuesBlocks[7] + meanOf8x8SquaredValuesBlocks[14] + meanOf8x8SquaredValuesBlocks[15]) >> 2;
978 :
979 0 : meanOf16x16SquaredValuesBlocks[4] = (meanOf8x8SquaredValuesBlocks[16] + meanOf8x8SquaredValuesBlocks[17] + meanOf8x8SquaredValuesBlocks[24] + meanOf8x8SquaredValuesBlocks[25]) >> 2;
980 0 : meanOf16x16SquaredValuesBlocks[5] = (meanOf8x8SquaredValuesBlocks[18] + meanOf8x8SquaredValuesBlocks[19] + meanOf8x8SquaredValuesBlocks[26] + meanOf8x8SquaredValuesBlocks[27]) >> 2;
981 0 : meanOf16x16SquaredValuesBlocks[6] = (meanOf8x8SquaredValuesBlocks[20] + meanOf8x8SquaredValuesBlocks[21] + meanOf8x8SquaredValuesBlocks[28] + meanOf8x8SquaredValuesBlocks[29]) >> 2;
982 0 : meanOf16x16SquaredValuesBlocks[7] = (meanOf8x8SquaredValuesBlocks[22] + meanOf8x8SquaredValuesBlocks[23] + meanOf8x8SquaredValuesBlocks[30] + meanOf8x8SquaredValuesBlocks[31]) >> 2;
983 :
984 0 : meanOf16x16SquaredValuesBlocks[8] = (meanOf8x8SquaredValuesBlocks[32] + meanOf8x8SquaredValuesBlocks[33] + meanOf8x8SquaredValuesBlocks[40] + meanOf8x8SquaredValuesBlocks[41]) >> 2;
985 0 : meanOf16x16SquaredValuesBlocks[9] = (meanOf8x8SquaredValuesBlocks[34] + meanOf8x8SquaredValuesBlocks[35] + meanOf8x8SquaredValuesBlocks[42] + meanOf8x8SquaredValuesBlocks[43]) >> 2;
986 0 : meanOf16x16SquaredValuesBlocks[10] = (meanOf8x8SquaredValuesBlocks[36] + meanOf8x8SquaredValuesBlocks[37] + meanOf8x8SquaredValuesBlocks[44] + meanOf8x8SquaredValuesBlocks[45]) >> 2;
987 0 : meanOf16x16SquaredValuesBlocks[11] = (meanOf8x8SquaredValuesBlocks[38] + meanOf8x8SquaredValuesBlocks[39] + meanOf8x8SquaredValuesBlocks[46] + meanOf8x8SquaredValuesBlocks[47]) >> 2;
988 :
989 0 : meanOf16x16SquaredValuesBlocks[12] = (meanOf8x8SquaredValuesBlocks[48] + meanOf8x8SquaredValuesBlocks[49] + meanOf8x8SquaredValuesBlocks[56] + meanOf8x8SquaredValuesBlocks[57]) >> 2;
990 0 : meanOf16x16SquaredValuesBlocks[13] = (meanOf8x8SquaredValuesBlocks[50] + meanOf8x8SquaredValuesBlocks[51] + meanOf8x8SquaredValuesBlocks[58] + meanOf8x8SquaredValuesBlocks[59]) >> 2;
991 0 : meanOf16x16SquaredValuesBlocks[14] = (meanOf8x8SquaredValuesBlocks[52] + meanOf8x8SquaredValuesBlocks[53] + meanOf8x8SquaredValuesBlocks[60] + meanOf8x8SquaredValuesBlocks[61]) >> 2;
992 0 : meanOf16x16SquaredValuesBlocks[15] = (meanOf8x8SquaredValuesBlocks[54] + meanOf8x8SquaredValuesBlocks[55] + meanOf8x8SquaredValuesBlocks[62] + meanOf8x8SquaredValuesBlocks[63]) >> 2;
993 :
994 : // 32x32
995 0 : meanOf32x32Blocks[0] = (meanOf16x16Blocks[0] + meanOf16x16Blocks[1] + meanOf16x16Blocks[4] + meanOf16x16Blocks[5]) >> 2;
996 0 : meanOf32x32Blocks[1] = (meanOf16x16Blocks[2] + meanOf16x16Blocks[3] + meanOf16x16Blocks[6] + meanOf16x16Blocks[7]) >> 2;
997 0 : meanOf32x32Blocks[2] = (meanOf16x16Blocks[8] + meanOf16x16Blocks[9] + meanOf16x16Blocks[12] + meanOf16x16Blocks[13]) >> 2;
998 0 : meanOf32x32Blocks[3] = (meanOf16x16Blocks[10] + meanOf16x16Blocks[11] + meanOf16x16Blocks[14] + meanOf16x16Blocks[15]) >> 2;
999 :
1000 0 : meanOf32x32SquaredValuesBlocks[0] = (meanOf16x16SquaredValuesBlocks[0] + meanOf16x16SquaredValuesBlocks[1] + meanOf16x16SquaredValuesBlocks[4] + meanOf16x16SquaredValuesBlocks[5]) >> 2;
1001 0 : meanOf32x32SquaredValuesBlocks[1] = (meanOf16x16SquaredValuesBlocks[2] + meanOf16x16SquaredValuesBlocks[3] + meanOf16x16SquaredValuesBlocks[6] + meanOf16x16SquaredValuesBlocks[7]) >> 2;
1002 0 : meanOf32x32SquaredValuesBlocks[2] = (meanOf16x16SquaredValuesBlocks[8] + meanOf16x16SquaredValuesBlocks[9] + meanOf16x16SquaredValuesBlocks[12] + meanOf16x16SquaredValuesBlocks[13]) >> 2;
1003 0 : meanOf32x32SquaredValuesBlocks[3] = (meanOf16x16SquaredValuesBlocks[10] + meanOf16x16SquaredValuesBlocks[11] + meanOf16x16SquaredValuesBlocks[14] + meanOf16x16SquaredValuesBlocks[15]) >> 2;
1004 :
1005 0 : variance32x32[0] = meanOf32x32SquaredValuesBlocks[0] - (meanOf32x32Blocks[0] * meanOf32x32Blocks[0]);
1006 0 : variance32x32[1] = meanOf32x32SquaredValuesBlocks[1] - (meanOf32x32Blocks[1] * meanOf32x32Blocks[1]);
1007 0 : variance32x32[2] = meanOf32x32SquaredValuesBlocks[2] - (meanOf32x32Blocks[2] * meanOf32x32Blocks[2]);
1008 0 : variance32x32[3] = meanOf32x32SquaredValuesBlocks[3] - (meanOf32x32Blocks[3] * meanOf32x32Blocks[3]);
1009 :
1010 : // 64x64
1011 0 : meanOf64x64Blocks = (meanOf32x32Blocks[0] + meanOf32x32Blocks[1] + meanOf32x32Blocks[2] + meanOf32x32Blocks[3]) >> 2;
1012 0 : meanOf64x64SquaredValuesBlocks = (meanOf32x32SquaredValuesBlocks[0] + meanOf32x32SquaredValuesBlocks[1] + meanOf32x32SquaredValuesBlocks[2] + meanOf32x32SquaredValuesBlocks[3]) >> 2;
1013 :
1014 0 : return (meanOf64x64SquaredValuesBlocks - (meanOf64x64Blocks * meanOf64x64Blocks));
1015 : }
1016 :
1017 0 : uint8_t getFilteredTypes(uint8_t *ptr,
1018 : uint32_t stride,
1019 : uint8_t filterType)
1020 : {
1021 0 : uint8_t *p = ptr - 1 - stride;
1022 :
1023 0 : uint32_t a = 0;
1024 :
1025 0 : if (filterType == 0) {
1026 : //Luma
1027 0 : a = (p[1] +
1028 0 : p[0 + stride] + 4 * p[1 + stride] + p[2 + stride] +
1029 0 : p[1 + 2 * stride]) / 8;
1030 : }
1031 0 : else if (filterType == 1) {
1032 0 : a = (2 * p[1] +
1033 0 : 2 * p[0 + stride] + 4 * p[1 + stride] + 2 * p[2 + stride] +
1034 0 : 2 * p[1 + 2 * stride]);
1035 :
1036 0 : a = (((uint32_t)((a * 2730) >> 14) + 1) >> 1) & 0xFFFF;
1037 :
1038 : //fixed point version of a=a/12 to mimic x86 instruction _mm256_mulhrs_epi16;
1039 : //a= (a*2730)>>15;
1040 : }
1041 0 : else if (filterType == 2) {
1042 0 : a = (4 * p[1] +
1043 0 : 4 * p[0 + stride] + 4 * p[1 + stride] + 4 * p[2 + stride] +
1044 0 : 4 * p[1 + 2 * stride]) / 20;
1045 : }
1046 0 : else if (filterType == 3) {
1047 0 : a = (1 * p[0] + 1 * p[1] + 1 * p[2] +
1048 0 : 1 * p[0 + stride] + 4 * p[1 + stride] + 1 * p[2 + stride] +
1049 0 : 1 * p[0 + 2 * stride] + 1 * p[1 + 2 * stride] + 1 * p[2 + 2 * stride]) / 12;
1050 : }
1051 0 : else if (filterType == 4) {
1052 : //gaussian matrix(Chroma)
1053 0 : a = (1 * p[0] + 2 * p[1] + 1 * p[2] +
1054 0 : 2 * p[0 + stride] + 4 * p[1 + stride] + 2 * p[2 + stride] +
1055 0 : 1 * p[0 + 2 * stride] + 2 * p[1 + 2 * stride] + 1 * p[2 + 2 * stride]) / 16;
1056 : }
1057 0 : else if (filterType == 5) {
1058 0 : a = (2 * p[0] + 2 * p[1] + 2 * p[2] +
1059 0 : 2 * p[0 + stride] + 4 * p[1 + stride] + 2 * p[2 + stride] +
1060 0 : 2 * p[0 + 2 * stride] + 2 * p[1 + 2 * stride] + 2 * p[2 + 2 * stride]) / 20;
1061 : }
1062 0 : else if (filterType == 6) {
1063 0 : a = (4 * p[0] + 4 * p[1] + 4 * p[2] +
1064 0 : 4 * p[0 + stride] + 4 * p[1 + stride] + 4 * p[2 + stride] +
1065 0 : 4 * p[0 + 2 * stride] + 4 * p[1 + 2 * stride] + 4 * p[2 + 2 * stride]) / 36;
1066 : }
1067 :
1068 0 : return (uint8_t)CLIP3EQ(0, 255, a);
1069 : }
1070 :
1071 : /*******************************************
1072 : * noise_extract_luma_strong
1073 : * strong filter Luma.
1074 : *******************************************/
1075 0 : void noise_extract_luma_strong_c(
1076 : EbPictureBufferDesc *input_picture_ptr,
1077 : EbPictureBufferDesc *denoised_picture_ptr,
1078 : uint32_t sb_origin_y
1079 : , uint32_t sb_origin_x
1080 : )
1081 : {
1082 : uint32_t ii, jj;
1083 : uint32_t picHeight, sb_height;
1084 : uint32_t picWidth;
1085 : uint32_t inputOriginIndex;
1086 : uint32_t inputOriginIndexPad;
1087 :
1088 : uint8_t *ptrIn;
1089 : uint32_t stride_in;
1090 : uint8_t *ptr_denoised;
1091 :
1092 : uint32_t strideOut;
1093 0 : uint32_t idx = (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width) ? sb_origin_x : 0;
1094 :
1095 : //Luma
1096 : {
1097 0 : picHeight = input_picture_ptr->height;
1098 0 : picWidth = input_picture_ptr->width;
1099 0 : sb_height = MIN(BLOCK_SIZE_64, picHeight - sb_origin_y);
1100 :
1101 0 : stride_in = input_picture_ptr->stride_y;
1102 0 : inputOriginIndex = input_picture_ptr->origin_x + (input_picture_ptr->origin_y + sb_origin_y)* input_picture_ptr->stride_y;
1103 0 : ptrIn = &(input_picture_ptr->buffer_y[inputOriginIndex]);
1104 :
1105 0 : inputOriginIndexPad = denoised_picture_ptr->origin_x + (denoised_picture_ptr->origin_y + sb_origin_y) * denoised_picture_ptr->stride_y;
1106 0 : strideOut = denoised_picture_ptr->stride_y;
1107 0 : ptr_denoised = &(denoised_picture_ptr->buffer_y[inputOriginIndexPad]);
1108 :
1109 0 : for (jj = 0; jj < sb_height; jj++) {
1110 0 : for (ii = idx; ii < picWidth; ii++) {
1111 0 : if ((jj > 0 || sb_origin_y > 0) && (jj < sb_height - 1 || sb_origin_y + sb_height < picHeight) && ii > 0 && ii < picWidth - 1)
1112 0 : ptr_denoised[ii + jj * strideOut] = getFilteredTypes(&ptrIn[ii + jj * stride_in], stride_in, 4);
1113 : else
1114 0 : ptr_denoised[ii + jj * strideOut] = ptrIn[ii + jj * stride_in];
1115 : }
1116 : }
1117 : }
1118 0 : }
1119 : /*******************************************
1120 : * noise_extract_chroma_strong
1121 : * strong filter chroma.
1122 : *******************************************/
1123 0 : void noise_extract_chroma_strong_c(
1124 : EbPictureBufferDesc *input_picture_ptr,
1125 : EbPictureBufferDesc *denoised_picture_ptr,
1126 : uint32_t sb_origin_y
1127 : , uint32_t sb_origin_x
1128 : )
1129 : {
1130 : uint32_t ii, jj;
1131 : uint32_t picHeight, sb_height;
1132 : uint32_t picWidth;
1133 : uint32_t inputOriginIndex;
1134 : uint32_t inputOriginIndexPad;
1135 :
1136 : uint8_t *ptrIn;
1137 : uint32_t stride_in;
1138 : uint8_t *ptr_denoised;
1139 :
1140 : uint32_t strideOut;
1141 0 : uint32_t idx = (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width) ? sb_origin_x : 0;
1142 :
1143 0 : uint32_t color_format = input_picture_ptr->color_format;
1144 0 : const uint16_t subsampling_x = (color_format == EB_YUV444 ? 1 : 2) - 1;
1145 0 : const uint16_t subsampling_y = (color_format >= EB_YUV422 ? 1 : 2) - 1;
1146 :
1147 : //Cb
1148 : {
1149 0 : picHeight = input_picture_ptr->height >> subsampling_y;
1150 0 : picWidth = input_picture_ptr->width >> subsampling_x;
1151 0 : sb_height = MIN(BLOCK_SIZE_64 >> subsampling_y, picHeight - sb_origin_y);
1152 :
1153 0 : stride_in = input_picture_ptr->stride_cb;
1154 0 : inputOriginIndex = (input_picture_ptr->origin_x >> subsampling_x) + ((input_picture_ptr->origin_y >> subsampling_y) + sb_origin_y) * input_picture_ptr->stride_cb;
1155 0 : ptrIn = &(input_picture_ptr->buffer_cb[inputOriginIndex]);
1156 :
1157 0 : inputOriginIndexPad = (denoised_picture_ptr->origin_x >> subsampling_x) + ((denoised_picture_ptr->origin_y >> subsampling_y) + sb_origin_y) * denoised_picture_ptr->stride_cb;
1158 0 : strideOut = denoised_picture_ptr->stride_cb;
1159 0 : ptr_denoised = &(denoised_picture_ptr->buffer_cb[inputOriginIndexPad]);
1160 :
1161 0 : for (jj = 0; jj < sb_height; jj++) {
1162 0 : for (ii = idx; ii < picWidth; ii++) {
1163 0 : if ((jj > 0 || sb_origin_y > 0) && (jj < sb_height - 1 || (sb_origin_y + sb_height) < picHeight) && ii > 0 && ii < picWidth - 1)
1164 0 : ptr_denoised[ii + jj * strideOut] = getFilteredTypes(&ptrIn[ii + jj * stride_in], stride_in, 6);
1165 : else
1166 0 : ptr_denoised[ii + jj * strideOut] = ptrIn[ii + jj * stride_in];
1167 : }
1168 : }
1169 : }
1170 :
1171 : //Cr
1172 : {
1173 0 : picHeight = input_picture_ptr->height >> subsampling_y;
1174 0 : picWidth = input_picture_ptr->width >> subsampling_x;
1175 0 : sb_height = MIN(BLOCK_SIZE_64 >> subsampling_y, picHeight - sb_origin_y);
1176 :
1177 0 : stride_in = input_picture_ptr->stride_cr;
1178 0 : inputOriginIndex = (input_picture_ptr->origin_x >> subsampling_x) + ((input_picture_ptr->origin_y >> subsampling_y) + sb_origin_y) * input_picture_ptr->stride_cr;
1179 :
1180 0 : ptrIn = &(input_picture_ptr->buffer_cr[inputOriginIndex]);
1181 :
1182 0 : inputOriginIndexPad = (denoised_picture_ptr->origin_x >> subsampling_x) + ((denoised_picture_ptr->origin_y >> subsampling_y) + sb_origin_y) * denoised_picture_ptr->stride_cr;
1183 0 : strideOut = denoised_picture_ptr->stride_cr;
1184 0 : ptr_denoised = &(denoised_picture_ptr->buffer_cr[inputOriginIndexPad]);
1185 :
1186 0 : for (jj = 0; jj < sb_height; jj++) {
1187 0 : for (ii = idx; ii < picWidth; ii++) {
1188 0 : if ((jj > 0 || sb_origin_y > 0) && (jj < sb_height - 1 || (sb_origin_y + sb_height) < picHeight) && ii > 0 && ii < picWidth - 1)
1189 0 : ptr_denoised[ii + jj * strideOut] = getFilteredTypes(&ptrIn[ii + jj * stride_in], stride_in, 6);
1190 : else
1191 0 : ptr_denoised[ii + jj * strideOut] = ptrIn[ii + jj * stride_in];
1192 : }
1193 : }
1194 : }
1195 0 : }
1196 :
1197 : /*******************************************
1198 : * noise_extract_chroma_weak
1199 : * weak filter chroma.
1200 : *******************************************/
1201 0 : void noise_extract_chroma_weak_c(
1202 : EbPictureBufferDesc *input_picture_ptr,
1203 : EbPictureBufferDesc *denoised_picture_ptr,
1204 : uint32_t sb_origin_y
1205 : , uint32_t sb_origin_x
1206 : )
1207 : {
1208 : uint32_t ii, jj;
1209 : uint32_t picHeight, sb_height;
1210 : uint32_t picWidth;
1211 : uint32_t inputOriginIndex;
1212 : uint32_t inputOriginIndexPad;
1213 :
1214 : uint8_t *ptrIn;
1215 : uint32_t stride_in;
1216 : uint8_t *ptr_denoised;
1217 :
1218 : uint32_t strideOut;
1219 :
1220 0 : uint32_t idx = (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width) ? sb_origin_x : 0;
1221 :
1222 0 : uint32_t color_format = input_picture_ptr->color_format;
1223 0 : const uint16_t subsampling_x = (color_format == EB_YUV444 ? 1 : 2) - 1;
1224 0 : const uint16_t subsampling_y = (color_format >= EB_YUV422 ? 1 : 2) - 1;
1225 :
1226 : //Cb
1227 : {
1228 0 : picHeight = input_picture_ptr->height >> subsampling_y;
1229 0 : picWidth = input_picture_ptr->width >> subsampling_x;
1230 0 : sb_height = MIN(BLOCK_SIZE_64 >> subsampling_y, picHeight - sb_origin_y);
1231 :
1232 0 : stride_in = input_picture_ptr->stride_cb;
1233 0 : inputOriginIndex = (input_picture_ptr->origin_x >> subsampling_x) + ((input_picture_ptr->origin_y >> subsampling_y) + sb_origin_y) * input_picture_ptr->stride_cb;
1234 :
1235 0 : ptrIn = &(input_picture_ptr->buffer_cb[inputOriginIndex]);
1236 :
1237 0 : inputOriginIndexPad = (denoised_picture_ptr->origin_x >> subsampling_x) + ((denoised_picture_ptr->origin_y >> subsampling_y) + sb_origin_y) * denoised_picture_ptr->stride_cb;
1238 :
1239 0 : strideOut = denoised_picture_ptr->stride_cb;
1240 0 : ptr_denoised = &(denoised_picture_ptr->buffer_cb[inputOriginIndexPad]);
1241 :
1242 0 : for (jj = 0; jj < sb_height; jj++) {
1243 0 : for (ii = idx; ii < picWidth; ii++) {
1244 0 : if ((jj > 0 || sb_origin_y > 0) && (jj < sb_height - 1 || (sb_origin_y + sb_height) < picHeight) && ii > 0 && ii < picWidth - 1)
1245 0 : ptr_denoised[ii + jj * strideOut] = getFilteredTypes(&ptrIn[ii + jj * stride_in], stride_in, 4);
1246 : else
1247 0 : ptr_denoised[ii + jj * strideOut] = ptrIn[ii + jj * stride_in];
1248 : }
1249 : }
1250 : }
1251 :
1252 : //Cr
1253 : {
1254 0 : picHeight = input_picture_ptr->height >> subsampling_y;
1255 0 : picWidth = input_picture_ptr->width >> subsampling_x;
1256 0 : sb_height = MIN(BLOCK_SIZE_64 >> subsampling_y, picHeight - sb_origin_y);
1257 :
1258 0 : stride_in = input_picture_ptr->stride_cr;
1259 0 : inputOriginIndex = (input_picture_ptr->origin_x >> subsampling_x) + ((input_picture_ptr->origin_y >> subsampling_y) + sb_origin_y) * input_picture_ptr->stride_cr;
1260 0 : ptrIn = &(input_picture_ptr->buffer_cr[inputOriginIndex]);
1261 :
1262 0 : inputOriginIndexPad = (denoised_picture_ptr->origin_x >> subsampling_x) + ((denoised_picture_ptr->origin_y >> subsampling_y) + sb_origin_y) * denoised_picture_ptr->stride_cr;
1263 0 : strideOut = denoised_picture_ptr->stride_cr;
1264 0 : ptr_denoised = &(denoised_picture_ptr->buffer_cr[inputOriginIndexPad]);
1265 :
1266 0 : for (jj = 0; jj < sb_height; jj++) {
1267 0 : for (ii = idx; ii < picWidth; ii++) {
1268 0 : if ((jj > 0 || sb_origin_y > 0) && (jj < sb_height - 1 || (sb_origin_y + sb_height) < picHeight) && ii > 0 && ii < picWidth - 1)
1269 0 : ptr_denoised[ii + jj * strideOut] = getFilteredTypes(&ptrIn[ii + jj * stride_in], stride_in, 4);
1270 : else
1271 0 : ptr_denoised[ii + jj * strideOut] = ptrIn[ii + jj * stride_in];
1272 : }
1273 : }
1274 : }
1275 0 : }
1276 :
1277 : /*******************************************
1278 : * noise_extract_luma_weak
1279 : * weak filter Luma and store noise.
1280 : *******************************************/
1281 0 : void noise_extract_luma_weak_c(
1282 : EbPictureBufferDesc *input_picture_ptr,
1283 : EbPictureBufferDesc *denoised_picture_ptr,
1284 : EbPictureBufferDesc *noise_picture_ptr,
1285 : uint32_t sb_origin_y
1286 : , uint32_t sb_origin_x
1287 : )
1288 : {
1289 : uint32_t ii, jj;
1290 : uint32_t picHeight, sb_height;
1291 : uint32_t picWidth;
1292 : uint32_t inputOriginIndex;
1293 : uint32_t inputOriginIndexPad;
1294 : uint32_t noiseOriginIndex;
1295 :
1296 : uint8_t *ptrIn;
1297 : uint32_t stride_in;
1298 : uint8_t *ptr_denoised;
1299 :
1300 : uint8_t *ptr_noise;
1301 : uint32_t strideOut;
1302 :
1303 0 : uint32_t idx = (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width) ? sb_origin_x : 0;
1304 :
1305 : //Luma
1306 : {
1307 0 : picHeight = input_picture_ptr->height;
1308 0 : picWidth = input_picture_ptr->width;
1309 0 : sb_height = MIN(BLOCK_SIZE_64, picHeight - sb_origin_y);
1310 :
1311 0 : stride_in = input_picture_ptr->stride_y;
1312 0 : inputOriginIndex = input_picture_ptr->origin_x + (input_picture_ptr->origin_y + sb_origin_y) * input_picture_ptr->stride_y;
1313 0 : ptrIn = &(input_picture_ptr->buffer_y[inputOriginIndex]);
1314 :
1315 0 : inputOriginIndexPad = denoised_picture_ptr->origin_x + (denoised_picture_ptr->origin_y + sb_origin_y) * denoised_picture_ptr->stride_y;
1316 0 : strideOut = denoised_picture_ptr->stride_y;
1317 0 : ptr_denoised = &(denoised_picture_ptr->buffer_y[inputOriginIndexPad]);
1318 :
1319 0 : noiseOriginIndex = noise_picture_ptr->origin_x + noise_picture_ptr->origin_y * noise_picture_ptr->stride_y;
1320 0 : ptr_noise = &(noise_picture_ptr->buffer_y[noiseOriginIndex]);
1321 :
1322 0 : for (jj = 0; jj < sb_height; jj++) {
1323 0 : for (ii = idx; ii < picWidth; ii++) {
1324 0 : if ((jj > 0 || sb_origin_y > 0) && (jj < sb_height - 1 || sb_origin_y + sb_height < picHeight) && ii > 0 && ii < picWidth - 1) {
1325 0 : ptr_denoised[ii + jj * strideOut] = getFilteredTypes(&ptrIn[ii + jj * stride_in], stride_in, 0);
1326 0 : ptr_noise[ii + jj * strideOut] = CLIP3EQ(0, 255, ptrIn[ii + jj * stride_in] - ptr_denoised[ii + jj * strideOut]);
1327 : }
1328 : else {
1329 0 : ptr_denoised[ii + jj * strideOut] = ptrIn[ii + jj * stride_in];
1330 0 : ptr_noise[ii + jj * strideOut] = 0;
1331 : }
1332 : }
1333 : }
1334 : }
1335 0 : }
1336 :
1337 0 : void noise_extract_luma_weak_lcu_c(
1338 : EbPictureBufferDesc *input_picture_ptr,
1339 : EbPictureBufferDesc *denoised_picture_ptr,
1340 : EbPictureBufferDesc *noise_picture_ptr,
1341 : uint32_t sb_origin_y
1342 : , uint32_t sb_origin_x
1343 : )
1344 : {
1345 : uint32_t ii, jj;
1346 : uint32_t picHeight, sb_height;
1347 : uint32_t picWidth, sb_width;
1348 : uint32_t inputOriginIndex;
1349 : uint32_t inputOriginIndexPad;
1350 : uint32_t noiseOriginIndex;
1351 :
1352 : uint8_t *ptrIn;
1353 : uint32_t stride_in;
1354 : uint8_t *ptr_denoised;
1355 :
1356 : uint8_t *ptr_noise;
1357 : uint32_t strideOut;
1358 :
1359 0 : uint32_t idx = (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width) ? sb_origin_x : 0;
1360 :
1361 : //Luma
1362 : {
1363 0 : picHeight = input_picture_ptr->height;
1364 0 : picWidth = input_picture_ptr->width;
1365 0 : sb_height = MIN(BLOCK_SIZE_64, picHeight - sb_origin_y);
1366 0 : sb_width = MIN(BLOCK_SIZE_64, picWidth - sb_origin_x);
1367 :
1368 0 : stride_in = input_picture_ptr->stride_y;
1369 0 : inputOriginIndex = input_picture_ptr->origin_x + sb_origin_x + (input_picture_ptr->origin_y + sb_origin_y) * input_picture_ptr->stride_y;
1370 0 : ptrIn = &(input_picture_ptr->buffer_y[inputOriginIndex]);
1371 :
1372 0 : inputOriginIndexPad = denoised_picture_ptr->origin_x + sb_origin_x + (denoised_picture_ptr->origin_y + sb_origin_y) * denoised_picture_ptr->stride_y;
1373 0 : strideOut = denoised_picture_ptr->stride_y;
1374 0 : ptr_denoised = &(denoised_picture_ptr->buffer_y[inputOriginIndexPad]);
1375 :
1376 0 : noiseOriginIndex = noise_picture_ptr->origin_x + sb_origin_x + noise_picture_ptr->origin_y * noise_picture_ptr->stride_y;
1377 0 : ptr_noise = &(noise_picture_ptr->buffer_y[noiseOriginIndex]);
1378 :
1379 0 : for (jj = 0; jj < sb_height; jj++) {
1380 0 : for (ii = idx; ii < sb_width; ii++) {
1381 0 : if ((jj > 0 || sb_origin_y > 0) && (jj < sb_height - 1 || sb_origin_y + sb_height < picHeight) && (ii > 0 || sb_origin_x > 0) && (ii + sb_origin_x) < picWidth - 1/* & ii < sb_width - 1*/) {
1382 0 : ptr_denoised[ii + jj * strideOut] = getFilteredTypes(&ptrIn[ii + jj * stride_in], stride_in, 0);
1383 0 : ptr_noise[ii + jj * strideOut] = CLIP3EQ(0, 255, ptrIn[ii + jj * stride_in] - ptr_denoised[ii + jj * strideOut]);
1384 : }
1385 : else {
1386 0 : ptr_denoised[ii + jj * strideOut] = ptrIn[ii + jj * stride_in];
1387 0 : ptr_noise[ii + jj * strideOut] = 0;
1388 : }
1389 : }
1390 : }
1391 : }
1392 0 : }
1393 :
1394 1200 : EbErrorType ZeroOutChromaBlockMean(
1395 : PictureParentControlSet *picture_control_set_ptr, // input parameter, Picture Control Set Ptr
1396 : uint32_t lcuCodingOrder // input parameter, SB address
1397 : )
1398 : {
1399 1200 : EbErrorType return_error = EB_ErrorNone;
1400 : // 16x16 mean
1401 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_0] = 0;
1402 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_1] = 0;
1403 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_2] = 0;
1404 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_3] = 0;
1405 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_4] = 0;
1406 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_5] = 0;
1407 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_6] = 0;
1408 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_7] = 0;
1409 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_8] = 0;
1410 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_9] = 0;
1411 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_10] = 0;
1412 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_11] = 0;
1413 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_12] = 0;
1414 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_13] = 0;
1415 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_14] = 0;
1416 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_15] = 0;
1417 :
1418 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_0] = 0;
1419 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_1] = 0;
1420 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_2] = 0;
1421 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_3] = 0;
1422 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_4] = 0;
1423 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_5] = 0;
1424 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_6] = 0;
1425 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_7] = 0;
1426 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_8] = 0;
1427 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_9] = 0;
1428 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_10] = 0;
1429 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_11] = 0;
1430 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_12] = 0;
1431 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_13] = 0;
1432 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_14] = 0;
1433 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_15] = 0;
1434 :
1435 : // 32x32 mean
1436 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_0] = 0;
1437 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_1] = 0;
1438 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_2] = 0;
1439 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_3] = 0;
1440 :
1441 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_0] = 0;
1442 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_1] = 0;
1443 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_2] = 0;
1444 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_3] = 0;
1445 :
1446 : // 64x64 mean
1447 1200 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_64x64] = 0;
1448 1200 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_64x64] = 0;
1449 :
1450 1200 : return return_error;
1451 : }
1452 : /*******************************************
1453 : * ComputeChromaBlockMean
1454 : * computes the chroma block mean for 64x64, 32x32 and 16x16 CUs inside the tree block
1455 : *******************************************/
1456 5985 : EbErrorType ComputeChromaBlockMean(
1457 : SequenceControlSet *sequence_control_set_ptr,
1458 : PictureParentControlSet *picture_control_set_ptr, // input parameter, Picture Control Set Ptr
1459 : EbPictureBufferDesc *input_padded_picture_ptr, // input parameter, Input Padded Picture
1460 : uint32_t lcuCodingOrder, // input parameter, SB address
1461 : uint32_t inputCbOriginIndex, // input parameter, SB index, used to point to source/reference samples
1462 : uint32_t inputCrOriginIndex) // input parameter, SB index, used to point to source/reference samples
1463 : {
1464 5985 : EbErrorType return_error = EB_ErrorNone;
1465 :
1466 : uint32_t cbBlockIndex, crBlockIndex;
1467 :
1468 : uint64_t cbMeanOf16x16Blocks[16];
1469 : uint64_t crMeanOf16x16Blocks[16];
1470 :
1471 : uint64_t cbMeanOf32x32Blocks[4];
1472 : uint64_t crMeanOf32x32Blocks[4];
1473 :
1474 : uint64_t cbMeanOf64x64Blocks;
1475 : uint64_t crMeanOf64x64Blocks;
1476 :
1477 : // (0,0) 16x16 block
1478 5985 : cbBlockIndex = inputCbOriginIndex;
1479 5985 : crBlockIndex = inputCrOriginIndex;
1480 5985 : if (sequence_control_set_ptr->block_mean_calc_prec == BLOCK_MEAN_PREC_FULL) {
1481 0 : cbMeanOf16x16Blocks[0] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
1482 0 : crMeanOf16x16Blocks[0] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
1483 :
1484 : // (0,1)
1485 0 : cbBlockIndex = cbBlockIndex + 8;
1486 0 : crBlockIndex = crBlockIndex + 8;
1487 0 : cbMeanOf16x16Blocks[1] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
1488 0 : crMeanOf16x16Blocks[1] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
1489 :
1490 : // (0,2)
1491 0 : cbBlockIndex = cbBlockIndex + 8;
1492 0 : crBlockIndex = crBlockIndex + 8;
1493 0 : cbMeanOf16x16Blocks[2] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
1494 0 : crMeanOf16x16Blocks[2] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
1495 :
1496 : // (0,3)
1497 0 : cbBlockIndex = cbBlockIndex + 8;
1498 0 : crBlockIndex = crBlockIndex + 8;
1499 0 : cbMeanOf16x16Blocks[3] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
1500 0 : crMeanOf16x16Blocks[3] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
1501 :
1502 : // (1,0)
1503 0 : cbBlockIndex = inputCbOriginIndex + (input_padded_picture_ptr->stride_cb << 3);
1504 0 : crBlockIndex = inputCrOriginIndex + (input_padded_picture_ptr->stride_cr << 3);
1505 0 : cbMeanOf16x16Blocks[4] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
1506 0 : crMeanOf16x16Blocks[4] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
1507 :
1508 : // (1,1)
1509 0 : cbBlockIndex = cbBlockIndex + 8;
1510 0 : crBlockIndex = crBlockIndex + 8;
1511 0 : cbMeanOf16x16Blocks[5] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
1512 0 : crMeanOf16x16Blocks[5] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
1513 :
1514 : // (1,2)
1515 0 : cbBlockIndex = cbBlockIndex + 8;
1516 0 : crBlockIndex = crBlockIndex + 8;
1517 0 : cbMeanOf16x16Blocks[6] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
1518 0 : crMeanOf16x16Blocks[6] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
1519 :
1520 : // (1,3)
1521 0 : cbBlockIndex = cbBlockIndex + 8;
1522 0 : crBlockIndex = crBlockIndex + 8;
1523 0 : cbMeanOf16x16Blocks[7] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
1524 0 : crMeanOf16x16Blocks[7] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
1525 :
1526 : // (2,0)
1527 0 : cbBlockIndex = inputCbOriginIndex + (input_padded_picture_ptr->stride_cb << 4);
1528 0 : crBlockIndex = inputCrOriginIndex + (input_padded_picture_ptr->stride_cr << 4);
1529 0 : cbMeanOf16x16Blocks[8] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
1530 0 : crMeanOf16x16Blocks[8] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
1531 :
1532 : // (2,1)
1533 0 : cbBlockIndex = cbBlockIndex + 8;
1534 0 : crBlockIndex = crBlockIndex + 8;
1535 0 : cbMeanOf16x16Blocks[9] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
1536 0 : crMeanOf16x16Blocks[9] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
1537 :
1538 : // (2,2)
1539 0 : cbBlockIndex = cbBlockIndex + 8;
1540 0 : crBlockIndex = crBlockIndex + 8;
1541 0 : cbMeanOf16x16Blocks[10] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
1542 0 : crMeanOf16x16Blocks[10] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
1543 :
1544 : // (2,3)
1545 0 : cbBlockIndex = cbBlockIndex + 8;
1546 0 : crBlockIndex = crBlockIndex + 8;
1547 0 : cbMeanOf16x16Blocks[11] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
1548 0 : crMeanOf16x16Blocks[11] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
1549 :
1550 : // (3,0)
1551 0 : cbBlockIndex = inputCbOriginIndex + (input_padded_picture_ptr->stride_cb * 24);
1552 0 : crBlockIndex = inputCrOriginIndex + (input_padded_picture_ptr->stride_cr * 24);
1553 0 : cbMeanOf16x16Blocks[12] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
1554 0 : crMeanOf16x16Blocks[12] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
1555 :
1556 : // (3,1)
1557 0 : cbBlockIndex = cbBlockIndex + 8;
1558 0 : crBlockIndex = crBlockIndex + 8;
1559 0 : cbMeanOf16x16Blocks[13] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
1560 0 : crMeanOf16x16Blocks[13] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
1561 :
1562 : // (3,2)
1563 0 : cbBlockIndex = cbBlockIndex + 8;
1564 0 : crBlockIndex = crBlockIndex + 8;
1565 0 : cbMeanOf16x16Blocks[14] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
1566 0 : crMeanOf16x16Blocks[14] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
1567 :
1568 : // (3,3)
1569 0 : cbBlockIndex = cbBlockIndex + 8;
1570 0 : crBlockIndex = crBlockIndex + 8;
1571 0 : cbMeanOf16x16Blocks[15] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), input_padded_picture_ptr->stride_cb, 8, 8);
1572 0 : crMeanOf16x16Blocks[15] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), input_padded_picture_ptr->stride_cr, 8, 8);
1573 : }
1574 : else {
1575 5985 : const uint16_t stride_cb = input_padded_picture_ptr->stride_cb;
1576 5985 : const uint16_t stride_cr = input_padded_picture_ptr->stride_cr;
1577 :
1578 5985 : cbMeanOf16x16Blocks[0] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
1579 5994 : crMeanOf16x16Blocks[0] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
1580 :
1581 : // (0,1)
1582 5997 : cbBlockIndex = cbBlockIndex + 8;
1583 5997 : crBlockIndex = crBlockIndex + 8;
1584 5997 : cbMeanOf16x16Blocks[1] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
1585 5996 : crMeanOf16x16Blocks[1] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
1586 :
1587 : // (0,2)
1588 5997 : cbBlockIndex = cbBlockIndex + 8;
1589 5997 : crBlockIndex = crBlockIndex + 8;
1590 5997 : cbMeanOf16x16Blocks[2] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
1591 5999 : crMeanOf16x16Blocks[2] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
1592 :
1593 : // (0,3)
1594 5997 : cbBlockIndex = cbBlockIndex + 8;
1595 5997 : crBlockIndex = crBlockIndex + 8;
1596 5997 : cbMeanOf16x16Blocks[3] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
1597 5998 : crMeanOf16x16Blocks[3] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
1598 :
1599 : // (1,0)
1600 5998 : cbBlockIndex = inputCbOriginIndex + (stride_cb << 3);
1601 5998 : crBlockIndex = inputCrOriginIndex + (stride_cr << 3);
1602 5998 : cbMeanOf16x16Blocks[4] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
1603 5999 : crMeanOf16x16Blocks[4] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
1604 :
1605 : // (1,1)
1606 5996 : cbBlockIndex = cbBlockIndex + 8;
1607 5996 : crBlockIndex = crBlockIndex + 8;
1608 5996 : cbMeanOf16x16Blocks[5] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
1609 5996 : crMeanOf16x16Blocks[5] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
1610 :
1611 : // (1,2)
1612 5996 : cbBlockIndex = cbBlockIndex + 8;
1613 5996 : crBlockIndex = crBlockIndex + 8;
1614 5996 : cbMeanOf16x16Blocks[6] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
1615 5995 : crMeanOf16x16Blocks[6] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
1616 :
1617 : // (1,3)
1618 5995 : cbBlockIndex = cbBlockIndex + 8;
1619 5995 : crBlockIndex = crBlockIndex + 8;
1620 5995 : cbMeanOf16x16Blocks[7] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
1621 5994 : crMeanOf16x16Blocks[7] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
1622 :
1623 : // (2,0)
1624 5995 : cbBlockIndex = inputCbOriginIndex + (stride_cb << 4);
1625 5995 : crBlockIndex = inputCrOriginIndex + (stride_cr << 4);
1626 5995 : cbMeanOf16x16Blocks[8] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
1627 5996 : crMeanOf16x16Blocks[8] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
1628 :
1629 : // (2,1)
1630 5996 : cbBlockIndex = cbBlockIndex + 8;
1631 5996 : crBlockIndex = crBlockIndex + 8;
1632 5996 : cbMeanOf16x16Blocks[9] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
1633 5995 : crMeanOf16x16Blocks[9] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
1634 :
1635 : // (2,2)
1636 5996 : cbBlockIndex = cbBlockIndex + 8;
1637 5996 : crBlockIndex = crBlockIndex + 8;
1638 5996 : cbMeanOf16x16Blocks[10] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
1639 5998 : crMeanOf16x16Blocks[10] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
1640 :
1641 : // (2,3)
1642 5997 : cbBlockIndex = cbBlockIndex + 8;
1643 5997 : crBlockIndex = crBlockIndex + 8;
1644 5997 : cbMeanOf16x16Blocks[11] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
1645 6000 : crMeanOf16x16Blocks[11] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
1646 :
1647 : // (3,0)
1648 5998 : cbBlockIndex = inputCbOriginIndex + (stride_cb * 24);
1649 5998 : crBlockIndex = inputCrOriginIndex + (stride_cr * 24);
1650 5998 : cbMeanOf16x16Blocks[12] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
1651 5996 : crMeanOf16x16Blocks[12] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
1652 :
1653 : // (3,1)
1654 5998 : cbBlockIndex = cbBlockIndex + 8;
1655 5998 : crBlockIndex = crBlockIndex + 8;
1656 5998 : cbMeanOf16x16Blocks[13] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
1657 5999 : crMeanOf16x16Blocks[13] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
1658 :
1659 : // (3,2)
1660 5997 : cbBlockIndex = cbBlockIndex + 8;
1661 5997 : crBlockIndex = crBlockIndex + 8;
1662 5997 : cbMeanOf16x16Blocks[14] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
1663 5997 : crMeanOf16x16Blocks[14] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
1664 :
1665 : // (3,3)
1666 5998 : cbBlockIndex = cbBlockIndex + 8;
1667 5998 : crBlockIndex = crBlockIndex + 8;
1668 5998 : cbMeanOf16x16Blocks[15] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cb[cbBlockIndex]), stride_cb);
1669 5999 : crMeanOf16x16Blocks[15] = compute_sub_mean8x8_sse2_intrin(&(input_padded_picture_ptr->buffer_cr[crBlockIndex]), stride_cr);
1670 : }
1671 :
1672 : // 32x32
1673 5995 : cbMeanOf32x32Blocks[0] = (cbMeanOf16x16Blocks[0] + cbMeanOf16x16Blocks[1] + cbMeanOf16x16Blocks[4] + cbMeanOf16x16Blocks[5]) >> 2;
1674 5995 : crMeanOf32x32Blocks[0] = (crMeanOf16x16Blocks[0] + crMeanOf16x16Blocks[1] + crMeanOf16x16Blocks[4] + crMeanOf16x16Blocks[5]) >> 2;
1675 :
1676 5995 : cbMeanOf32x32Blocks[1] = (cbMeanOf16x16Blocks[2] + cbMeanOf16x16Blocks[3] + cbMeanOf16x16Blocks[6] + cbMeanOf16x16Blocks[7]) >> 2;
1677 5995 : crMeanOf32x32Blocks[1] = (crMeanOf16x16Blocks[2] + crMeanOf16x16Blocks[3] + crMeanOf16x16Blocks[6] + crMeanOf16x16Blocks[7]) >> 2;
1678 :
1679 5995 : cbMeanOf32x32Blocks[2] = (cbMeanOf16x16Blocks[8] + cbMeanOf16x16Blocks[9] + cbMeanOf16x16Blocks[12] + cbMeanOf16x16Blocks[13]) >> 2;
1680 5995 : crMeanOf32x32Blocks[2] = (crMeanOf16x16Blocks[8] + crMeanOf16x16Blocks[9] + crMeanOf16x16Blocks[12] + crMeanOf16x16Blocks[13]) >> 2;
1681 :
1682 5995 : cbMeanOf32x32Blocks[3] = (cbMeanOf16x16Blocks[10] + cbMeanOf16x16Blocks[11] + cbMeanOf16x16Blocks[14] + cbMeanOf16x16Blocks[15]) >> 2;
1683 5995 : crMeanOf32x32Blocks[3] = (crMeanOf16x16Blocks[10] + crMeanOf16x16Blocks[11] + crMeanOf16x16Blocks[14] + crMeanOf16x16Blocks[15]) >> 2;
1684 :
1685 : // 64x64
1686 5995 : cbMeanOf64x64Blocks = (cbMeanOf32x32Blocks[0] + cbMeanOf32x32Blocks[1] + cbMeanOf32x32Blocks[3] + cbMeanOf32x32Blocks[3]) >> 2;
1687 5995 : crMeanOf64x64Blocks = (crMeanOf32x32Blocks[0] + crMeanOf32x32Blocks[1] + crMeanOf32x32Blocks[3] + crMeanOf32x32Blocks[3]) >> 2;
1688 : // 16x16 mean
1689 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_0] = (uint8_t)(cbMeanOf16x16Blocks[0] >> MEAN_PRECISION);
1690 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_1] = (uint8_t)(cbMeanOf16x16Blocks[1] >> MEAN_PRECISION);
1691 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_2] = (uint8_t)(cbMeanOf16x16Blocks[2] >> MEAN_PRECISION);
1692 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_3] = (uint8_t)(cbMeanOf16x16Blocks[3] >> MEAN_PRECISION);
1693 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_4] = (uint8_t)(cbMeanOf16x16Blocks[4] >> MEAN_PRECISION);
1694 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_5] = (uint8_t)(cbMeanOf16x16Blocks[5] >> MEAN_PRECISION);
1695 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_6] = (uint8_t)(cbMeanOf16x16Blocks[6] >> MEAN_PRECISION);
1696 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_7] = (uint8_t)(cbMeanOf16x16Blocks[7] >> MEAN_PRECISION);
1697 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_8] = (uint8_t)(cbMeanOf16x16Blocks[8] >> MEAN_PRECISION);
1698 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_9] = (uint8_t)(cbMeanOf16x16Blocks[9] >> MEAN_PRECISION);
1699 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_10] = (uint8_t)(cbMeanOf16x16Blocks[10] >> MEAN_PRECISION);
1700 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_11] = (uint8_t)(cbMeanOf16x16Blocks[11] >> MEAN_PRECISION);
1701 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_12] = (uint8_t)(cbMeanOf16x16Blocks[12] >> MEAN_PRECISION);
1702 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_13] = (uint8_t)(cbMeanOf16x16Blocks[13] >> MEAN_PRECISION);
1703 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_14] = (uint8_t)(cbMeanOf16x16Blocks[14] >> MEAN_PRECISION);
1704 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_15] = (uint8_t)(cbMeanOf16x16Blocks[15] >> MEAN_PRECISION);
1705 :
1706 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_0] = (uint8_t)(crMeanOf16x16Blocks[0] >> MEAN_PRECISION);
1707 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_1] = (uint8_t)(crMeanOf16x16Blocks[1] >> MEAN_PRECISION);
1708 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_2] = (uint8_t)(crMeanOf16x16Blocks[2] >> MEAN_PRECISION);
1709 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_3] = (uint8_t)(crMeanOf16x16Blocks[3] >> MEAN_PRECISION);
1710 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_4] = (uint8_t)(crMeanOf16x16Blocks[4] >> MEAN_PRECISION);
1711 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_5] = (uint8_t)(crMeanOf16x16Blocks[5] >> MEAN_PRECISION);
1712 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_6] = (uint8_t)(crMeanOf16x16Blocks[6] >> MEAN_PRECISION);
1713 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_7] = (uint8_t)(crMeanOf16x16Blocks[7] >> MEAN_PRECISION);
1714 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_8] = (uint8_t)(crMeanOf16x16Blocks[8] >> MEAN_PRECISION);
1715 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_9] = (uint8_t)(crMeanOf16x16Blocks[9] >> MEAN_PRECISION);
1716 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_10] = (uint8_t)(crMeanOf16x16Blocks[10] >> MEAN_PRECISION);
1717 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_11] = (uint8_t)(crMeanOf16x16Blocks[11] >> MEAN_PRECISION);
1718 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_12] = (uint8_t)(crMeanOf16x16Blocks[12] >> MEAN_PRECISION);
1719 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_13] = (uint8_t)(crMeanOf16x16Blocks[13] >> MEAN_PRECISION);
1720 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_14] = (uint8_t)(crMeanOf16x16Blocks[14] >> MEAN_PRECISION);
1721 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_16x16_15] = (uint8_t)(crMeanOf16x16Blocks[15] >> MEAN_PRECISION);
1722 :
1723 : // 32x32 mean
1724 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_0] = (uint8_t)(cbMeanOf32x32Blocks[0] >> MEAN_PRECISION);
1725 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_1] = (uint8_t)(cbMeanOf32x32Blocks[1] >> MEAN_PRECISION);
1726 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_2] = (uint8_t)(cbMeanOf32x32Blocks[2] >> MEAN_PRECISION);
1727 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_3] = (uint8_t)(cbMeanOf32x32Blocks[3] >> MEAN_PRECISION);
1728 :
1729 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_0] = (uint8_t)(crMeanOf32x32Blocks[0] >> MEAN_PRECISION);
1730 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_1] = (uint8_t)(crMeanOf32x32Blocks[1] >> MEAN_PRECISION);
1731 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_2] = (uint8_t)(crMeanOf32x32Blocks[2] >> MEAN_PRECISION);
1732 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_32x32_3] = (uint8_t)(crMeanOf32x32Blocks[3] >> MEAN_PRECISION);
1733 :
1734 : // 64x64 mean
1735 5995 : picture_control_set_ptr->cbMean[lcuCodingOrder][ME_TIER_ZERO_PU_64x64] = (uint8_t)(cbMeanOf64x64Blocks >> MEAN_PRECISION);
1736 5995 : picture_control_set_ptr->crMean[lcuCodingOrder][ME_TIER_ZERO_PU_64x64] = (uint8_t)(crMeanOf64x64Blocks >> MEAN_PRECISION);
1737 :
1738 5995 : return return_error;
1739 : }
1740 :
1741 : /*******************************************
1742 : * ComputeBlockMeanComputeVariance
1743 : * computes the variance and the block mean of all CUs inside the tree block
1744 : *******************************************/
1745 7188 : EbErrorType ComputeBlockMeanComputeVariance(
1746 : SequenceControlSet *sequence_control_set_ptr,
1747 : PictureParentControlSet *picture_control_set_ptr, // input parameter, Picture Control Set Ptr
1748 : EbPictureBufferDesc *input_padded_picture_ptr, // input parameter, Input Padded Picture
1749 : uint32_t sb_index, // input parameter, SB address
1750 : uint32_t inputLumaOriginIndex) // input parameter, SB index, used to point to source/reference samples
1751 : {
1752 7188 : EbErrorType return_error = EB_ErrorNone;
1753 :
1754 : uint32_t blockIndex;
1755 :
1756 : uint64_t mean_of8x8_blocks[64];
1757 : uint64_t meanOf8x8SquaredValuesBlocks[64];
1758 :
1759 : uint64_t meanOf16x16Blocks[16];
1760 : uint64_t meanOf16x16SquaredValuesBlocks[16];
1761 :
1762 : uint64_t meanOf32x32Blocks[4];
1763 : uint64_t meanOf32x32SquaredValuesBlocks[4];
1764 :
1765 : uint64_t meanOf64x64Blocks;
1766 : uint64_t meanOf64x64SquaredValuesBlocks;
1767 :
1768 : // (0,0)
1769 7188 : blockIndex = inputLumaOriginIndex;
1770 7188 : if (sequence_control_set_ptr->block_mean_calc_prec == BLOCK_MEAN_PREC_FULL) {
1771 0 : mean_of8x8_blocks[0] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1772 0 : meanOf8x8SquaredValuesBlocks[0] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1773 :
1774 : // (0,1)
1775 0 : blockIndex = blockIndex + 8;
1776 0 : mean_of8x8_blocks[1] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1777 0 : meanOf8x8SquaredValuesBlocks[1] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1778 :
1779 : // (0,2)
1780 0 : blockIndex = blockIndex + 8;
1781 0 : mean_of8x8_blocks[2] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1782 0 : meanOf8x8SquaredValuesBlocks[2] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1783 :
1784 : // (0,3)
1785 0 : blockIndex = blockIndex + 8;
1786 0 : mean_of8x8_blocks[3] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1787 0 : meanOf8x8SquaredValuesBlocks[3] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1788 :
1789 : // (0,4)
1790 0 : blockIndex = blockIndex + 8;
1791 0 : mean_of8x8_blocks[4] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1792 0 : meanOf8x8SquaredValuesBlocks[4] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1793 :
1794 : // (0,5)
1795 0 : blockIndex = blockIndex + 8;
1796 0 : mean_of8x8_blocks[5] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1797 0 : meanOf8x8SquaredValuesBlocks[5] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1798 :
1799 : // (0,6)
1800 0 : blockIndex = blockIndex + 8;
1801 0 : mean_of8x8_blocks[6] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1802 0 : meanOf8x8SquaredValuesBlocks[6] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1803 :
1804 : // (0,7)
1805 0 : blockIndex = blockIndex + 8;
1806 0 : mean_of8x8_blocks[7] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1807 0 : meanOf8x8SquaredValuesBlocks[7] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1808 :
1809 : // (1,0)
1810 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3);
1811 0 : mean_of8x8_blocks[8] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1812 0 : meanOf8x8SquaredValuesBlocks[8] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1813 :
1814 : // (1,1)
1815 0 : blockIndex = blockIndex + 8;
1816 0 : mean_of8x8_blocks[9] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1817 0 : meanOf8x8SquaredValuesBlocks[9] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1818 :
1819 : // (1,2)
1820 0 : blockIndex = blockIndex + 8;
1821 0 : mean_of8x8_blocks[10] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1822 0 : meanOf8x8SquaredValuesBlocks[10] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1823 :
1824 : // (1,3)
1825 0 : blockIndex = blockIndex + 8;
1826 0 : mean_of8x8_blocks[11] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1827 0 : meanOf8x8SquaredValuesBlocks[11] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1828 :
1829 : // (1,4)
1830 0 : blockIndex = blockIndex + 8;
1831 0 : mean_of8x8_blocks[12] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1832 0 : meanOf8x8SquaredValuesBlocks[12] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1833 :
1834 : // (1,5)
1835 0 : blockIndex = blockIndex + 8;
1836 0 : mean_of8x8_blocks[13] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1837 0 : meanOf8x8SquaredValuesBlocks[13] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1838 :
1839 : // (1,6)
1840 0 : blockIndex = blockIndex + 8;
1841 0 : mean_of8x8_blocks[14] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1842 0 : meanOf8x8SquaredValuesBlocks[14] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1843 :
1844 : // (1,7)
1845 0 : blockIndex = blockIndex + 8;
1846 0 : mean_of8x8_blocks[15] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1847 0 : meanOf8x8SquaredValuesBlocks[15] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1848 :
1849 : // (2,0)
1850 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 4);
1851 0 : mean_of8x8_blocks[16] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1852 0 : meanOf8x8SquaredValuesBlocks[16] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1853 :
1854 : // (2,1)
1855 0 : blockIndex = blockIndex + 8;
1856 0 : mean_of8x8_blocks[17] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1857 0 : meanOf8x8SquaredValuesBlocks[17] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1858 :
1859 : // (2,2)
1860 0 : blockIndex = blockIndex + 8;
1861 0 : mean_of8x8_blocks[18] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1862 0 : meanOf8x8SquaredValuesBlocks[18] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1863 :
1864 : // (2,3)
1865 0 : blockIndex = blockIndex + 8;
1866 0 : mean_of8x8_blocks[19] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1867 0 : meanOf8x8SquaredValuesBlocks[19] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1868 :
1869 : /// (2,4)
1870 0 : blockIndex = blockIndex + 8;
1871 0 : mean_of8x8_blocks[20] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1872 0 : meanOf8x8SquaredValuesBlocks[20] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1873 :
1874 : // (2,5)
1875 0 : blockIndex = blockIndex + 8;
1876 0 : mean_of8x8_blocks[21] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1877 0 : meanOf8x8SquaredValuesBlocks[21] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1878 :
1879 : // (2,6)
1880 0 : blockIndex = blockIndex + 8;
1881 0 : mean_of8x8_blocks[22] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1882 0 : meanOf8x8SquaredValuesBlocks[22] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1883 :
1884 : // (2,7)
1885 0 : blockIndex = blockIndex + 8;
1886 0 : mean_of8x8_blocks[23] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1887 0 : meanOf8x8SquaredValuesBlocks[23] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1888 :
1889 : // (3,0)
1890 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3) + (input_padded_picture_ptr->stride_y << 4);
1891 0 : mean_of8x8_blocks[24] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1892 0 : meanOf8x8SquaredValuesBlocks[24] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1893 :
1894 : // (3,1)
1895 0 : blockIndex = blockIndex + 8;
1896 0 : mean_of8x8_blocks[25] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1897 0 : meanOf8x8SquaredValuesBlocks[25] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1898 :
1899 : // (3,2)
1900 0 : blockIndex = blockIndex + 8;
1901 0 : mean_of8x8_blocks[26] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1902 0 : meanOf8x8SquaredValuesBlocks[26] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1903 :
1904 : // (3,3)
1905 0 : blockIndex = blockIndex + 8;
1906 0 : mean_of8x8_blocks[27] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1907 0 : meanOf8x8SquaredValuesBlocks[27] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1908 :
1909 : // (3,4)
1910 0 : blockIndex = blockIndex + 8;
1911 0 : mean_of8x8_blocks[28] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1912 0 : meanOf8x8SquaredValuesBlocks[28] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1913 :
1914 : // (3,5)
1915 0 : blockIndex = blockIndex + 8;
1916 0 : mean_of8x8_blocks[29] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1917 0 : meanOf8x8SquaredValuesBlocks[29] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1918 :
1919 : // (3,6)
1920 0 : blockIndex = blockIndex + 8;
1921 0 : mean_of8x8_blocks[30] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1922 0 : meanOf8x8SquaredValuesBlocks[30] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1923 :
1924 : // (3,7)
1925 0 : blockIndex = blockIndex + 8;
1926 0 : mean_of8x8_blocks[31] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1927 0 : meanOf8x8SquaredValuesBlocks[31] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1928 :
1929 : // (4,0)
1930 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 5);
1931 0 : mean_of8x8_blocks[32] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1932 0 : meanOf8x8SquaredValuesBlocks[32] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1933 :
1934 : // (4,1)
1935 0 : blockIndex = blockIndex + 8;
1936 0 : mean_of8x8_blocks[33] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1937 0 : meanOf8x8SquaredValuesBlocks[33] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1938 :
1939 : // (4,2)
1940 0 : blockIndex = blockIndex + 8;
1941 0 : mean_of8x8_blocks[34] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1942 0 : meanOf8x8SquaredValuesBlocks[34] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1943 :
1944 : // (4,3)
1945 0 : blockIndex = blockIndex + 8;
1946 0 : mean_of8x8_blocks[35] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1947 0 : meanOf8x8SquaredValuesBlocks[35] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1948 :
1949 : // (4,4)
1950 0 : blockIndex = blockIndex + 8;
1951 0 : mean_of8x8_blocks[36] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1952 0 : meanOf8x8SquaredValuesBlocks[36] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1953 :
1954 : // (4,5)
1955 0 : blockIndex = blockIndex + 8;
1956 0 : mean_of8x8_blocks[37] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1957 0 : meanOf8x8SquaredValuesBlocks[37] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1958 :
1959 : // (4,6)
1960 0 : blockIndex = blockIndex + 8;
1961 0 : mean_of8x8_blocks[38] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1962 0 : meanOf8x8SquaredValuesBlocks[38] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1963 :
1964 : // (4,7)
1965 0 : blockIndex = blockIndex + 8;
1966 0 : mean_of8x8_blocks[39] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1967 0 : meanOf8x8SquaredValuesBlocks[39] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1968 :
1969 : // (5,0)
1970 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3) + (input_padded_picture_ptr->stride_y << 5);
1971 0 : mean_of8x8_blocks[40] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1972 0 : meanOf8x8SquaredValuesBlocks[40] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1973 :
1974 : // (5,1)
1975 0 : blockIndex = blockIndex + 8;
1976 0 : mean_of8x8_blocks[41] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1977 0 : meanOf8x8SquaredValuesBlocks[41] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1978 :
1979 : // (5,2)
1980 0 : blockIndex = blockIndex + 8;
1981 0 : mean_of8x8_blocks[42] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1982 0 : meanOf8x8SquaredValuesBlocks[42] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1983 :
1984 : // (5,3)
1985 0 : blockIndex = blockIndex + 8;
1986 0 : mean_of8x8_blocks[43] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1987 0 : meanOf8x8SquaredValuesBlocks[43] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1988 :
1989 : // (5,4)
1990 0 : blockIndex = blockIndex + 8;
1991 0 : mean_of8x8_blocks[44] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1992 0 : meanOf8x8SquaredValuesBlocks[44] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1993 :
1994 : // (5,5)
1995 0 : blockIndex = blockIndex + 8;
1996 0 : mean_of8x8_blocks[45] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1997 0 : meanOf8x8SquaredValuesBlocks[45] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
1998 :
1999 : // (5,6)
2000 0 : blockIndex = blockIndex + 8;
2001 0 : mean_of8x8_blocks[46] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2002 0 : meanOf8x8SquaredValuesBlocks[46] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2003 :
2004 : // (5,7)
2005 0 : blockIndex = blockIndex + 8;
2006 0 : mean_of8x8_blocks[47] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2007 0 : meanOf8x8SquaredValuesBlocks[47] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2008 :
2009 : // (6,0)
2010 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 4) + (input_padded_picture_ptr->stride_y << 5);
2011 0 : mean_of8x8_blocks[48] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2012 0 : meanOf8x8SquaredValuesBlocks[48] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2013 :
2014 : // (6,1)
2015 0 : blockIndex = blockIndex + 8;
2016 0 : mean_of8x8_blocks[49] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2017 0 : meanOf8x8SquaredValuesBlocks[49] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2018 :
2019 : // (6,2)
2020 0 : blockIndex = blockIndex + 8;
2021 0 : mean_of8x8_blocks[50] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2022 0 : meanOf8x8SquaredValuesBlocks[50] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2023 :
2024 : // (6,3)
2025 0 : blockIndex = blockIndex + 8;
2026 0 : mean_of8x8_blocks[51] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2027 0 : meanOf8x8SquaredValuesBlocks[51] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2028 :
2029 : // (6,4)
2030 0 : blockIndex = blockIndex + 8;
2031 0 : mean_of8x8_blocks[52] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2032 0 : meanOf8x8SquaredValuesBlocks[52] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2033 :
2034 : // (6,5)
2035 0 : blockIndex = blockIndex + 8;
2036 0 : mean_of8x8_blocks[53] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2037 0 : meanOf8x8SquaredValuesBlocks[53] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2038 :
2039 : // (6,6)
2040 0 : blockIndex = blockIndex + 8;
2041 0 : mean_of8x8_blocks[54] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2042 0 : meanOf8x8SquaredValuesBlocks[54] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2043 :
2044 : // (6,7)
2045 0 : blockIndex = blockIndex + 8;
2046 0 : mean_of8x8_blocks[55] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2047 0 : meanOf8x8SquaredValuesBlocks[55] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2048 :
2049 : // (7,0)
2050 0 : blockIndex = inputLumaOriginIndex + (input_padded_picture_ptr->stride_y << 3) + (input_padded_picture_ptr->stride_y << 4) + (input_padded_picture_ptr->stride_y << 5);
2051 0 : mean_of8x8_blocks[56] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2052 0 : meanOf8x8SquaredValuesBlocks[56] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2053 :
2054 : // (7,1)
2055 0 : blockIndex = blockIndex + 8;
2056 0 : mean_of8x8_blocks[57] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2057 0 : meanOf8x8SquaredValuesBlocks[57] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2058 :
2059 : // (7,2)
2060 0 : blockIndex = blockIndex + 8;
2061 0 : mean_of8x8_blocks[58] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2062 0 : meanOf8x8SquaredValuesBlocks[58] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2063 :
2064 : // (7,3)
2065 0 : blockIndex = blockIndex + 8;
2066 0 : mean_of8x8_blocks[59] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2067 0 : meanOf8x8SquaredValuesBlocks[59] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2068 :
2069 : // (7,4)
2070 0 : blockIndex = blockIndex + 8;
2071 0 : mean_of8x8_blocks[60] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2072 0 : meanOf8x8SquaredValuesBlocks[60] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2073 :
2074 : // (7,5)
2075 0 : blockIndex = blockIndex + 8;
2076 0 : mean_of8x8_blocks[61] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2077 0 : meanOf8x8SquaredValuesBlocks[61] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2078 :
2079 : // (7,6)
2080 0 : blockIndex = blockIndex + 8;
2081 0 : mean_of8x8_blocks[62] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2082 0 : meanOf8x8SquaredValuesBlocks[62] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2083 :
2084 : // (7,7)
2085 0 : blockIndex = blockIndex + 8;
2086 0 : mean_of8x8_blocks[63] = compute_mean_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2087 0 : meanOf8x8SquaredValuesBlocks[63] = compute_mean_square_values_8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), input_padded_picture_ptr->stride_y, 8, 8);
2088 : }
2089 : else {
2090 7188 : const uint16_t stride_y = input_padded_picture_ptr->stride_y;
2091 :
2092 7188 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[0], &meanOf8x8SquaredValuesBlocks[0]);
2093 :
2094 : // (0,1)
2095 7189 : blockIndex = blockIndex + 32;
2096 :
2097 7189 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[4], &meanOf8x8SquaredValuesBlocks[4]);
2098 :
2099 : // (0,5)
2100 7197 : blockIndex = blockIndex + 24;
2101 :
2102 : // (1,0)
2103 7197 : blockIndex = inputLumaOriginIndex + (stride_y << 3);
2104 :
2105 7197 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[8], &meanOf8x8SquaredValuesBlocks[8]);
2106 :
2107 : // (1,1)
2108 7190 : blockIndex = blockIndex + 32;
2109 :
2110 7190 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[12], &meanOf8x8SquaredValuesBlocks[12]);
2111 :
2112 : // (1,5)
2113 7193 : blockIndex = blockIndex + 24;
2114 :
2115 : // (2,0)
2116 7193 : blockIndex = inputLumaOriginIndex + (stride_y << 4);
2117 :
2118 7193 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[16], &meanOf8x8SquaredValuesBlocks[16]);
2119 :
2120 : // (2,1)
2121 7192 : blockIndex = blockIndex + 32;
2122 :
2123 7192 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[20], &meanOf8x8SquaredValuesBlocks[20]);
2124 :
2125 : // (2,5)
2126 7193 : blockIndex = blockIndex + 24;
2127 :
2128 : // (3,0)
2129 7193 : blockIndex = inputLumaOriginIndex + (stride_y << 3) + (stride_y << 4);
2130 :
2131 7193 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[24], &meanOf8x8SquaredValuesBlocks[24]);
2132 :
2133 : // (3,1)
2134 7195 : blockIndex = blockIndex + 32;
2135 :
2136 7195 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[28], &meanOf8x8SquaredValuesBlocks[28]);
2137 :
2138 : // (3,5)
2139 7197 : blockIndex = blockIndex + 24;
2140 :
2141 : // (4,0)
2142 7197 : blockIndex = inputLumaOriginIndex + (stride_y << 5);
2143 :
2144 7197 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[32], &meanOf8x8SquaredValuesBlocks[32]);
2145 :
2146 : // (4,1)
2147 7195 : blockIndex = blockIndex + 32;
2148 :
2149 7195 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[36], &meanOf8x8SquaredValuesBlocks[36]);
2150 :
2151 : // (4,5)
2152 7195 : blockIndex = blockIndex + 24;
2153 :
2154 : // (5,0)
2155 7195 : blockIndex = inputLumaOriginIndex + (stride_y << 3) + (stride_y << 5);
2156 :
2157 7195 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[40], &meanOf8x8SquaredValuesBlocks[40]);
2158 :
2159 : // (5,1)
2160 7193 : blockIndex = blockIndex + 32;
2161 :
2162 7193 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[44], &meanOf8x8SquaredValuesBlocks[44]);
2163 :
2164 : // (5,5)
2165 7196 : blockIndex = blockIndex + 24;
2166 :
2167 : // (6,0)
2168 7196 : blockIndex = inputLumaOriginIndex + (stride_y << 4) + (stride_y << 5);
2169 :
2170 7196 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[48], &meanOf8x8SquaredValuesBlocks[48]);
2171 :
2172 : // (6,1)
2173 7192 : blockIndex = blockIndex + 32;
2174 :
2175 7192 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[52], &meanOf8x8SquaredValuesBlocks[52]);
2176 :
2177 : // (6,5)
2178 7193 : blockIndex = blockIndex + 24;
2179 :
2180 : // (7,0)
2181 7193 : blockIndex = inputLumaOriginIndex + (stride_y << 3) + (stride_y << 4) + (stride_y << 5);
2182 :
2183 7193 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[56], &meanOf8x8SquaredValuesBlocks[56]);
2184 :
2185 : // (7,1)
2186 7191 : blockIndex = blockIndex + 32;
2187 :
2188 7191 : compute_interm_var_four8x8(&(input_padded_picture_ptr->buffer_y[blockIndex]), stride_y, &mean_of8x8_blocks[60], &meanOf8x8SquaredValuesBlocks[60]);
2189 :
2190 : }
2191 :
2192 : // 16x16
2193 7190 : meanOf16x16Blocks[0] = (mean_of8x8_blocks[0] + mean_of8x8_blocks[1] + mean_of8x8_blocks[8] + mean_of8x8_blocks[9]) >> 2;
2194 7190 : meanOf16x16Blocks[1] = (mean_of8x8_blocks[2] + mean_of8x8_blocks[3] + mean_of8x8_blocks[10] + mean_of8x8_blocks[11]) >> 2;
2195 7190 : meanOf16x16Blocks[2] = (mean_of8x8_blocks[4] + mean_of8x8_blocks[5] + mean_of8x8_blocks[12] + mean_of8x8_blocks[13]) >> 2;
2196 7190 : meanOf16x16Blocks[3] = (mean_of8x8_blocks[6] + mean_of8x8_blocks[7] + mean_of8x8_blocks[14] + mean_of8x8_blocks[15]) >> 2;
2197 :
2198 7190 : meanOf16x16Blocks[4] = (mean_of8x8_blocks[16] + mean_of8x8_blocks[17] + mean_of8x8_blocks[24] + mean_of8x8_blocks[25]) >> 2;
2199 7190 : meanOf16x16Blocks[5] = (mean_of8x8_blocks[18] + mean_of8x8_blocks[19] + mean_of8x8_blocks[26] + mean_of8x8_blocks[27]) >> 2;
2200 7190 : meanOf16x16Blocks[6] = (mean_of8x8_blocks[20] + mean_of8x8_blocks[21] + mean_of8x8_blocks[28] + mean_of8x8_blocks[29]) >> 2;
2201 7190 : meanOf16x16Blocks[7] = (mean_of8x8_blocks[22] + mean_of8x8_blocks[23] + mean_of8x8_blocks[30] + mean_of8x8_blocks[31]) >> 2;
2202 :
2203 7190 : meanOf16x16Blocks[8] = (mean_of8x8_blocks[32] + mean_of8x8_blocks[33] + mean_of8x8_blocks[40] + mean_of8x8_blocks[41]) >> 2;
2204 7190 : meanOf16x16Blocks[9] = (mean_of8x8_blocks[34] + mean_of8x8_blocks[35] + mean_of8x8_blocks[42] + mean_of8x8_blocks[43]) >> 2;
2205 7190 : meanOf16x16Blocks[10] = (mean_of8x8_blocks[36] + mean_of8x8_blocks[37] + mean_of8x8_blocks[44] + mean_of8x8_blocks[45]) >> 2;
2206 7190 : meanOf16x16Blocks[11] = (mean_of8x8_blocks[38] + mean_of8x8_blocks[39] + mean_of8x8_blocks[46] + mean_of8x8_blocks[47]) >> 2;
2207 :
2208 7190 : meanOf16x16Blocks[12] = (mean_of8x8_blocks[48] + mean_of8x8_blocks[49] + mean_of8x8_blocks[56] + mean_of8x8_blocks[57]) >> 2;
2209 7190 : meanOf16x16Blocks[13] = (mean_of8x8_blocks[50] + mean_of8x8_blocks[51] + mean_of8x8_blocks[58] + mean_of8x8_blocks[59]) >> 2;
2210 7190 : meanOf16x16Blocks[14] = (mean_of8x8_blocks[52] + mean_of8x8_blocks[53] + mean_of8x8_blocks[60] + mean_of8x8_blocks[61]) >> 2;
2211 7190 : meanOf16x16Blocks[15] = (mean_of8x8_blocks[54] + mean_of8x8_blocks[55] + mean_of8x8_blocks[62] + mean_of8x8_blocks[63]) >> 2;
2212 :
2213 7190 : meanOf16x16SquaredValuesBlocks[0] = (meanOf8x8SquaredValuesBlocks[0] + meanOf8x8SquaredValuesBlocks[1] + meanOf8x8SquaredValuesBlocks[8] + meanOf8x8SquaredValuesBlocks[9]) >> 2;
2214 7190 : meanOf16x16SquaredValuesBlocks[1] = (meanOf8x8SquaredValuesBlocks[2] + meanOf8x8SquaredValuesBlocks[3] + meanOf8x8SquaredValuesBlocks[10] + meanOf8x8SquaredValuesBlocks[11]) >> 2;
2215 7190 : meanOf16x16SquaredValuesBlocks[2] = (meanOf8x8SquaredValuesBlocks[4] + meanOf8x8SquaredValuesBlocks[5] + meanOf8x8SquaredValuesBlocks[12] + meanOf8x8SquaredValuesBlocks[13]) >> 2;
2216 7190 : meanOf16x16SquaredValuesBlocks[3] = (meanOf8x8SquaredValuesBlocks[6] + meanOf8x8SquaredValuesBlocks[7] + meanOf8x8SquaredValuesBlocks[14] + meanOf8x8SquaredValuesBlocks[15]) >> 2;
2217 :
2218 7190 : meanOf16x16SquaredValuesBlocks[4] = (meanOf8x8SquaredValuesBlocks[16] + meanOf8x8SquaredValuesBlocks[17] + meanOf8x8SquaredValuesBlocks[24] + meanOf8x8SquaredValuesBlocks[25]) >> 2;
2219 7190 : meanOf16x16SquaredValuesBlocks[5] = (meanOf8x8SquaredValuesBlocks[18] + meanOf8x8SquaredValuesBlocks[19] + meanOf8x8SquaredValuesBlocks[26] + meanOf8x8SquaredValuesBlocks[27]) >> 2;
2220 7190 : meanOf16x16SquaredValuesBlocks[6] = (meanOf8x8SquaredValuesBlocks[20] + meanOf8x8SquaredValuesBlocks[21] + meanOf8x8SquaredValuesBlocks[28] + meanOf8x8SquaredValuesBlocks[29]) >> 2;
2221 7190 : meanOf16x16SquaredValuesBlocks[7] = (meanOf8x8SquaredValuesBlocks[22] + meanOf8x8SquaredValuesBlocks[23] + meanOf8x8SquaredValuesBlocks[30] + meanOf8x8SquaredValuesBlocks[31]) >> 2;
2222 :
2223 7190 : meanOf16x16SquaredValuesBlocks[8] = (meanOf8x8SquaredValuesBlocks[32] + meanOf8x8SquaredValuesBlocks[33] + meanOf8x8SquaredValuesBlocks[40] + meanOf8x8SquaredValuesBlocks[41]) >> 2;
2224 7190 : meanOf16x16SquaredValuesBlocks[9] = (meanOf8x8SquaredValuesBlocks[34] + meanOf8x8SquaredValuesBlocks[35] + meanOf8x8SquaredValuesBlocks[42] + meanOf8x8SquaredValuesBlocks[43]) >> 2;
2225 7190 : meanOf16x16SquaredValuesBlocks[10] = (meanOf8x8SquaredValuesBlocks[36] + meanOf8x8SquaredValuesBlocks[37] + meanOf8x8SquaredValuesBlocks[44] + meanOf8x8SquaredValuesBlocks[45]) >> 2;
2226 7190 : meanOf16x16SquaredValuesBlocks[11] = (meanOf8x8SquaredValuesBlocks[38] + meanOf8x8SquaredValuesBlocks[39] + meanOf8x8SquaredValuesBlocks[46] + meanOf8x8SquaredValuesBlocks[47]) >> 2;
2227 :
2228 7190 : meanOf16x16SquaredValuesBlocks[12] = (meanOf8x8SquaredValuesBlocks[48] + meanOf8x8SquaredValuesBlocks[49] + meanOf8x8SquaredValuesBlocks[56] + meanOf8x8SquaredValuesBlocks[57]) >> 2;
2229 7190 : meanOf16x16SquaredValuesBlocks[13] = (meanOf8x8SquaredValuesBlocks[50] + meanOf8x8SquaredValuesBlocks[51] + meanOf8x8SquaredValuesBlocks[58] + meanOf8x8SquaredValuesBlocks[59]) >> 2;
2230 7190 : meanOf16x16SquaredValuesBlocks[14] = (meanOf8x8SquaredValuesBlocks[52] + meanOf8x8SquaredValuesBlocks[53] + meanOf8x8SquaredValuesBlocks[60] + meanOf8x8SquaredValuesBlocks[61]) >> 2;
2231 7190 : meanOf16x16SquaredValuesBlocks[15] = (meanOf8x8SquaredValuesBlocks[54] + meanOf8x8SquaredValuesBlocks[55] + meanOf8x8SquaredValuesBlocks[62] + meanOf8x8SquaredValuesBlocks[63]) >> 2;
2232 :
2233 : // 32x32
2234 7190 : meanOf32x32Blocks[0] = (meanOf16x16Blocks[0] + meanOf16x16Blocks[1] + meanOf16x16Blocks[4] + meanOf16x16Blocks[5]) >> 2;
2235 7190 : meanOf32x32Blocks[1] = (meanOf16x16Blocks[2] + meanOf16x16Blocks[3] + meanOf16x16Blocks[6] + meanOf16x16Blocks[7]) >> 2;
2236 7190 : meanOf32x32Blocks[2] = (meanOf16x16Blocks[8] + meanOf16x16Blocks[9] + meanOf16x16Blocks[12] + meanOf16x16Blocks[13]) >> 2;
2237 7190 : meanOf32x32Blocks[3] = (meanOf16x16Blocks[10] + meanOf16x16Blocks[11] + meanOf16x16Blocks[14] + meanOf16x16Blocks[15]) >> 2;
2238 :
2239 7190 : meanOf32x32SquaredValuesBlocks[0] = (meanOf16x16SquaredValuesBlocks[0] + meanOf16x16SquaredValuesBlocks[1] + meanOf16x16SquaredValuesBlocks[4] + meanOf16x16SquaredValuesBlocks[5]) >> 2;
2240 7190 : meanOf32x32SquaredValuesBlocks[1] = (meanOf16x16SquaredValuesBlocks[2] + meanOf16x16SquaredValuesBlocks[3] + meanOf16x16SquaredValuesBlocks[6] + meanOf16x16SquaredValuesBlocks[7]) >> 2;
2241 7190 : meanOf32x32SquaredValuesBlocks[2] = (meanOf16x16SquaredValuesBlocks[8] + meanOf16x16SquaredValuesBlocks[9] + meanOf16x16SquaredValuesBlocks[12] + meanOf16x16SquaredValuesBlocks[13]) >> 2;
2242 7190 : meanOf32x32SquaredValuesBlocks[3] = (meanOf16x16SquaredValuesBlocks[10] + meanOf16x16SquaredValuesBlocks[11] + meanOf16x16SquaredValuesBlocks[14] + meanOf16x16SquaredValuesBlocks[15]) >> 2;
2243 :
2244 : // 64x64
2245 7190 : meanOf64x64Blocks = (meanOf32x32Blocks[0] + meanOf32x32Blocks[1] + meanOf32x32Blocks[2] + meanOf32x32Blocks[3]) >> 2;
2246 7190 : meanOf64x64SquaredValuesBlocks = (meanOf32x32SquaredValuesBlocks[0] + meanOf32x32SquaredValuesBlocks[1] + meanOf32x32SquaredValuesBlocks[2] + meanOf32x32SquaredValuesBlocks[3]) >> 2;
2247 :
2248 : // 8x8 means
2249 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_0] = (uint8_t)(mean_of8x8_blocks[0] >> MEAN_PRECISION);
2250 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_1] = (uint8_t)(mean_of8x8_blocks[1] >> MEAN_PRECISION);
2251 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_2] = (uint8_t)(mean_of8x8_blocks[2] >> MEAN_PRECISION);
2252 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_3] = (uint8_t)(mean_of8x8_blocks[3] >> MEAN_PRECISION);
2253 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_4] = (uint8_t)(mean_of8x8_blocks[4] >> MEAN_PRECISION);
2254 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_5] = (uint8_t)(mean_of8x8_blocks[5] >> MEAN_PRECISION);
2255 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_6] = (uint8_t)(mean_of8x8_blocks[6] >> MEAN_PRECISION);
2256 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_7] = (uint8_t)(mean_of8x8_blocks[7] >> MEAN_PRECISION);
2257 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_8] = (uint8_t)(mean_of8x8_blocks[8] >> MEAN_PRECISION);
2258 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_9] = (uint8_t)(mean_of8x8_blocks[9] >> MEAN_PRECISION);
2259 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_10] = (uint8_t)(mean_of8x8_blocks[10] >> MEAN_PRECISION);
2260 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_11] = (uint8_t)(mean_of8x8_blocks[11] >> MEAN_PRECISION);
2261 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_12] = (uint8_t)(mean_of8x8_blocks[12] >> MEAN_PRECISION);
2262 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_13] = (uint8_t)(mean_of8x8_blocks[13] >> MEAN_PRECISION);
2263 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_14] = (uint8_t)(mean_of8x8_blocks[14] >> MEAN_PRECISION);
2264 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_15] = (uint8_t)(mean_of8x8_blocks[15] >> MEAN_PRECISION);
2265 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_16] = (uint8_t)(mean_of8x8_blocks[16] >> MEAN_PRECISION);
2266 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_17] = (uint8_t)(mean_of8x8_blocks[17] >> MEAN_PRECISION);
2267 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_18] = (uint8_t)(mean_of8x8_blocks[18] >> MEAN_PRECISION);
2268 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_19] = (uint8_t)(mean_of8x8_blocks[19] >> MEAN_PRECISION);
2269 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_20] = (uint8_t)(mean_of8x8_blocks[20] >> MEAN_PRECISION);
2270 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_21] = (uint8_t)(mean_of8x8_blocks[21] >> MEAN_PRECISION);
2271 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_22] = (uint8_t)(mean_of8x8_blocks[22] >> MEAN_PRECISION);
2272 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_23] = (uint8_t)(mean_of8x8_blocks[23] >> MEAN_PRECISION);
2273 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_24] = (uint8_t)(mean_of8x8_blocks[24] >> MEAN_PRECISION);
2274 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_25] = (uint8_t)(mean_of8x8_blocks[25] >> MEAN_PRECISION);
2275 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_26] = (uint8_t)(mean_of8x8_blocks[26] >> MEAN_PRECISION);
2276 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_27] = (uint8_t)(mean_of8x8_blocks[27] >> MEAN_PRECISION);
2277 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_28] = (uint8_t)(mean_of8x8_blocks[28] >> MEAN_PRECISION);
2278 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_29] = (uint8_t)(mean_of8x8_blocks[29] >> MEAN_PRECISION);
2279 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_30] = (uint8_t)(mean_of8x8_blocks[30] >> MEAN_PRECISION);
2280 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_31] = (uint8_t)(mean_of8x8_blocks[31] >> MEAN_PRECISION);
2281 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_32] = (uint8_t)(mean_of8x8_blocks[32] >> MEAN_PRECISION);
2282 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_33] = (uint8_t)(mean_of8x8_blocks[33] >> MEAN_PRECISION);
2283 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_34] = (uint8_t)(mean_of8x8_blocks[34] >> MEAN_PRECISION);
2284 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_35] = (uint8_t)(mean_of8x8_blocks[35] >> MEAN_PRECISION);
2285 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_36] = (uint8_t)(mean_of8x8_blocks[36] >> MEAN_PRECISION);
2286 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_37] = (uint8_t)(mean_of8x8_blocks[37] >> MEAN_PRECISION);
2287 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_38] = (uint8_t)(mean_of8x8_blocks[38] >> MEAN_PRECISION);
2288 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_39] = (uint8_t)(mean_of8x8_blocks[39] >> MEAN_PRECISION);
2289 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_40] = (uint8_t)(mean_of8x8_blocks[40] >> MEAN_PRECISION);
2290 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_41] = (uint8_t)(mean_of8x8_blocks[41] >> MEAN_PRECISION);
2291 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_42] = (uint8_t)(mean_of8x8_blocks[42] >> MEAN_PRECISION);
2292 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_43] = (uint8_t)(mean_of8x8_blocks[43] >> MEAN_PRECISION);
2293 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_44] = (uint8_t)(mean_of8x8_blocks[44] >> MEAN_PRECISION);
2294 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_45] = (uint8_t)(mean_of8x8_blocks[45] >> MEAN_PRECISION);
2295 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_46] = (uint8_t)(mean_of8x8_blocks[46] >> MEAN_PRECISION);
2296 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_47] = (uint8_t)(mean_of8x8_blocks[47] >> MEAN_PRECISION);
2297 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_48] = (uint8_t)(mean_of8x8_blocks[48] >> MEAN_PRECISION);
2298 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_49] = (uint8_t)(mean_of8x8_blocks[49] >> MEAN_PRECISION);
2299 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_50] = (uint8_t)(mean_of8x8_blocks[50] >> MEAN_PRECISION);
2300 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_51] = (uint8_t)(mean_of8x8_blocks[51] >> MEAN_PRECISION);
2301 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_52] = (uint8_t)(mean_of8x8_blocks[52] >> MEAN_PRECISION);
2302 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_53] = (uint8_t)(mean_of8x8_blocks[53] >> MEAN_PRECISION);
2303 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_54] = (uint8_t)(mean_of8x8_blocks[54] >> MEAN_PRECISION);
2304 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_55] = (uint8_t)(mean_of8x8_blocks[55] >> MEAN_PRECISION);
2305 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_56] = (uint8_t)(mean_of8x8_blocks[56] >> MEAN_PRECISION);
2306 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_57] = (uint8_t)(mean_of8x8_blocks[57] >> MEAN_PRECISION);
2307 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_58] = (uint8_t)(mean_of8x8_blocks[58] >> MEAN_PRECISION);
2308 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_59] = (uint8_t)(mean_of8x8_blocks[59] >> MEAN_PRECISION);
2309 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_60] = (uint8_t)(mean_of8x8_blocks[60] >> MEAN_PRECISION);
2310 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_61] = (uint8_t)(mean_of8x8_blocks[61] >> MEAN_PRECISION);
2311 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_62] = (uint8_t)(mean_of8x8_blocks[62] >> MEAN_PRECISION);
2312 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_8x8_63] = (uint8_t)(mean_of8x8_blocks[63] >> MEAN_PRECISION);
2313 :
2314 : // 16x16 mean
2315 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_0] = (uint8_t)(meanOf16x16Blocks[0] >> MEAN_PRECISION);
2316 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_1] = (uint8_t)(meanOf16x16Blocks[1] >> MEAN_PRECISION);
2317 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_2] = (uint8_t)(meanOf16x16Blocks[2] >> MEAN_PRECISION);
2318 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_3] = (uint8_t)(meanOf16x16Blocks[3] >> MEAN_PRECISION);
2319 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_4] = (uint8_t)(meanOf16x16Blocks[4] >> MEAN_PRECISION);
2320 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_5] = (uint8_t)(meanOf16x16Blocks[5] >> MEAN_PRECISION);
2321 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_6] = (uint8_t)(meanOf16x16Blocks[6] >> MEAN_PRECISION);
2322 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_7] = (uint8_t)(meanOf16x16Blocks[7] >> MEAN_PRECISION);
2323 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_8] = (uint8_t)(meanOf16x16Blocks[8] >> MEAN_PRECISION);
2324 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_9] = (uint8_t)(meanOf16x16Blocks[9] >> MEAN_PRECISION);
2325 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_10] = (uint8_t)(meanOf16x16Blocks[10] >> MEAN_PRECISION);
2326 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_11] = (uint8_t)(meanOf16x16Blocks[11] >> MEAN_PRECISION);
2327 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_12] = (uint8_t)(meanOf16x16Blocks[12] >> MEAN_PRECISION);
2328 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_13] = (uint8_t)(meanOf16x16Blocks[13] >> MEAN_PRECISION);
2329 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_14] = (uint8_t)(meanOf16x16Blocks[14] >> MEAN_PRECISION);
2330 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_16x16_15] = (uint8_t)(meanOf16x16Blocks[15] >> MEAN_PRECISION);
2331 :
2332 : // 32x32 mean
2333 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_32x32_0] = (uint8_t)(meanOf32x32Blocks[0] >> MEAN_PRECISION);
2334 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_32x32_1] = (uint8_t)(meanOf32x32Blocks[1] >> MEAN_PRECISION);
2335 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_32x32_2] = (uint8_t)(meanOf32x32Blocks[2] >> MEAN_PRECISION);
2336 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_32x32_3] = (uint8_t)(meanOf32x32Blocks[3] >> MEAN_PRECISION);
2337 :
2338 : // 64x64 mean
2339 7190 : picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_64x64] = (uint8_t)(meanOf64x64Blocks >> MEAN_PRECISION);
2340 :
2341 : // 8x8 variances
2342 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_0] = (uint16_t)((meanOf8x8SquaredValuesBlocks[0] - (mean_of8x8_blocks[0] * mean_of8x8_blocks[0])) >> VARIANCE_PRECISION);
2343 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_1] = (uint16_t)((meanOf8x8SquaredValuesBlocks[1] - (mean_of8x8_blocks[1] * mean_of8x8_blocks[1])) >> VARIANCE_PRECISION);
2344 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_2] = (uint16_t)((meanOf8x8SquaredValuesBlocks[2] - (mean_of8x8_blocks[2] * mean_of8x8_blocks[2])) >> VARIANCE_PRECISION);
2345 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_3] = (uint16_t)((meanOf8x8SquaredValuesBlocks[3] - (mean_of8x8_blocks[3] * mean_of8x8_blocks[3])) >> VARIANCE_PRECISION);
2346 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_4] = (uint16_t)((meanOf8x8SquaredValuesBlocks[4] - (mean_of8x8_blocks[4] * mean_of8x8_blocks[4])) >> VARIANCE_PRECISION);
2347 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_5] = (uint16_t)((meanOf8x8SquaredValuesBlocks[5] - (mean_of8x8_blocks[5] * mean_of8x8_blocks[5])) >> VARIANCE_PRECISION);
2348 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_6] = (uint16_t)((meanOf8x8SquaredValuesBlocks[6] - (mean_of8x8_blocks[6] * mean_of8x8_blocks[6])) >> VARIANCE_PRECISION);
2349 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_7] = (uint16_t)((meanOf8x8SquaredValuesBlocks[7] - (mean_of8x8_blocks[7] * mean_of8x8_blocks[7])) >> VARIANCE_PRECISION);
2350 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_8] = (uint16_t)((meanOf8x8SquaredValuesBlocks[8] - (mean_of8x8_blocks[8] * mean_of8x8_blocks[8])) >> VARIANCE_PRECISION);
2351 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_9] = (uint16_t)((meanOf8x8SquaredValuesBlocks[9] - (mean_of8x8_blocks[9] * mean_of8x8_blocks[9])) >> VARIANCE_PRECISION);
2352 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_10] = (uint16_t)((meanOf8x8SquaredValuesBlocks[10] - (mean_of8x8_blocks[10] * mean_of8x8_blocks[10])) >> VARIANCE_PRECISION);
2353 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_11] = (uint16_t)((meanOf8x8SquaredValuesBlocks[11] - (mean_of8x8_blocks[11] * mean_of8x8_blocks[11])) >> VARIANCE_PRECISION);
2354 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_12] = (uint16_t)((meanOf8x8SquaredValuesBlocks[12] - (mean_of8x8_blocks[12] * mean_of8x8_blocks[12])) >> VARIANCE_PRECISION);
2355 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_13] = (uint16_t)((meanOf8x8SquaredValuesBlocks[13] - (mean_of8x8_blocks[13] * mean_of8x8_blocks[13])) >> VARIANCE_PRECISION);
2356 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_14] = (uint16_t)((meanOf8x8SquaredValuesBlocks[14] - (mean_of8x8_blocks[14] * mean_of8x8_blocks[14])) >> VARIANCE_PRECISION);
2357 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_15] = (uint16_t)((meanOf8x8SquaredValuesBlocks[15] - (mean_of8x8_blocks[15] * mean_of8x8_blocks[15])) >> VARIANCE_PRECISION);
2358 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_16] = (uint16_t)((meanOf8x8SquaredValuesBlocks[16] - (mean_of8x8_blocks[16] * mean_of8x8_blocks[16])) >> VARIANCE_PRECISION);
2359 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_17] = (uint16_t)((meanOf8x8SquaredValuesBlocks[17] - (mean_of8x8_blocks[17] * mean_of8x8_blocks[17])) >> VARIANCE_PRECISION);
2360 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_18] = (uint16_t)((meanOf8x8SquaredValuesBlocks[18] - (mean_of8x8_blocks[18] * mean_of8x8_blocks[18])) >> VARIANCE_PRECISION);
2361 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_19] = (uint16_t)((meanOf8x8SquaredValuesBlocks[19] - (mean_of8x8_blocks[19] * mean_of8x8_blocks[19])) >> VARIANCE_PRECISION);
2362 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_20] = (uint16_t)((meanOf8x8SquaredValuesBlocks[20] - (mean_of8x8_blocks[20] * mean_of8x8_blocks[20])) >> VARIANCE_PRECISION);
2363 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_21] = (uint16_t)((meanOf8x8SquaredValuesBlocks[21] - (mean_of8x8_blocks[21] * mean_of8x8_blocks[21])) >> VARIANCE_PRECISION);
2364 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_22] = (uint16_t)((meanOf8x8SquaredValuesBlocks[22] - (mean_of8x8_blocks[22] * mean_of8x8_blocks[22])) >> VARIANCE_PRECISION);
2365 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_23] = (uint16_t)((meanOf8x8SquaredValuesBlocks[23] - (mean_of8x8_blocks[23] * mean_of8x8_blocks[23])) >> VARIANCE_PRECISION);
2366 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_24] = (uint16_t)((meanOf8x8SquaredValuesBlocks[24] - (mean_of8x8_blocks[24] * mean_of8x8_blocks[24])) >> VARIANCE_PRECISION);
2367 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_25] = (uint16_t)((meanOf8x8SquaredValuesBlocks[25] - (mean_of8x8_blocks[25] * mean_of8x8_blocks[25])) >> VARIANCE_PRECISION);
2368 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_26] = (uint16_t)((meanOf8x8SquaredValuesBlocks[26] - (mean_of8x8_blocks[26] * mean_of8x8_blocks[26])) >> VARIANCE_PRECISION);
2369 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_27] = (uint16_t)((meanOf8x8SquaredValuesBlocks[27] - (mean_of8x8_blocks[27] * mean_of8x8_blocks[27])) >> VARIANCE_PRECISION);
2370 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_28] = (uint16_t)((meanOf8x8SquaredValuesBlocks[28] - (mean_of8x8_blocks[28] * mean_of8x8_blocks[28])) >> VARIANCE_PRECISION);
2371 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_29] = (uint16_t)((meanOf8x8SquaredValuesBlocks[29] - (mean_of8x8_blocks[29] * mean_of8x8_blocks[29])) >> VARIANCE_PRECISION);
2372 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_30] = (uint16_t)((meanOf8x8SquaredValuesBlocks[30] - (mean_of8x8_blocks[30] * mean_of8x8_blocks[30])) >> VARIANCE_PRECISION);
2373 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_31] = (uint16_t)((meanOf8x8SquaredValuesBlocks[31] - (mean_of8x8_blocks[31] * mean_of8x8_blocks[31])) >> VARIANCE_PRECISION);
2374 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_32] = (uint16_t)((meanOf8x8SquaredValuesBlocks[32] - (mean_of8x8_blocks[32] * mean_of8x8_blocks[32])) >> VARIANCE_PRECISION);
2375 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_33] = (uint16_t)((meanOf8x8SquaredValuesBlocks[33] - (mean_of8x8_blocks[33] * mean_of8x8_blocks[33])) >> VARIANCE_PRECISION);
2376 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_34] = (uint16_t)((meanOf8x8SquaredValuesBlocks[34] - (mean_of8x8_blocks[34] * mean_of8x8_blocks[34])) >> VARIANCE_PRECISION);
2377 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_35] = (uint16_t)((meanOf8x8SquaredValuesBlocks[35] - (mean_of8x8_blocks[35] * mean_of8x8_blocks[35])) >> VARIANCE_PRECISION);
2378 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_36] = (uint16_t)((meanOf8x8SquaredValuesBlocks[36] - (mean_of8x8_blocks[36] * mean_of8x8_blocks[36])) >> VARIANCE_PRECISION);
2379 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_37] = (uint16_t)((meanOf8x8SquaredValuesBlocks[37] - (mean_of8x8_blocks[37] * mean_of8x8_blocks[37])) >> VARIANCE_PRECISION);
2380 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_38] = (uint16_t)((meanOf8x8SquaredValuesBlocks[38] - (mean_of8x8_blocks[38] * mean_of8x8_blocks[38])) >> VARIANCE_PRECISION);
2381 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_39] = (uint16_t)((meanOf8x8SquaredValuesBlocks[39] - (mean_of8x8_blocks[39] * mean_of8x8_blocks[39])) >> VARIANCE_PRECISION);
2382 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_40] = (uint16_t)((meanOf8x8SquaredValuesBlocks[40] - (mean_of8x8_blocks[40] * mean_of8x8_blocks[40])) >> VARIANCE_PRECISION);
2383 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_41] = (uint16_t)((meanOf8x8SquaredValuesBlocks[41] - (mean_of8x8_blocks[41] * mean_of8x8_blocks[41])) >> VARIANCE_PRECISION);
2384 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_42] = (uint16_t)((meanOf8x8SquaredValuesBlocks[42] - (mean_of8x8_blocks[42] * mean_of8x8_blocks[42])) >> VARIANCE_PRECISION);
2385 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_43] = (uint16_t)((meanOf8x8SquaredValuesBlocks[43] - (mean_of8x8_blocks[43] * mean_of8x8_blocks[43])) >> VARIANCE_PRECISION);
2386 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_44] = (uint16_t)((meanOf8x8SquaredValuesBlocks[44] - (mean_of8x8_blocks[44] * mean_of8x8_blocks[44])) >> VARIANCE_PRECISION);
2387 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_45] = (uint16_t)((meanOf8x8SquaredValuesBlocks[45] - (mean_of8x8_blocks[45] * mean_of8x8_blocks[45])) >> VARIANCE_PRECISION);
2388 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_46] = (uint16_t)((meanOf8x8SquaredValuesBlocks[46] - (mean_of8x8_blocks[46] * mean_of8x8_blocks[46])) >> VARIANCE_PRECISION);
2389 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_47] = (uint16_t)((meanOf8x8SquaredValuesBlocks[47] - (mean_of8x8_blocks[47] * mean_of8x8_blocks[47])) >> VARIANCE_PRECISION);
2390 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_48] = (uint16_t)((meanOf8x8SquaredValuesBlocks[48] - (mean_of8x8_blocks[48] * mean_of8x8_blocks[48])) >> VARIANCE_PRECISION);
2391 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_49] = (uint16_t)((meanOf8x8SquaredValuesBlocks[49] - (mean_of8x8_blocks[49] * mean_of8x8_blocks[49])) >> VARIANCE_PRECISION);
2392 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_50] = (uint16_t)((meanOf8x8SquaredValuesBlocks[50] - (mean_of8x8_blocks[50] * mean_of8x8_blocks[50])) >> VARIANCE_PRECISION);
2393 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_51] = (uint16_t)((meanOf8x8SquaredValuesBlocks[51] - (mean_of8x8_blocks[51] * mean_of8x8_blocks[51])) >> VARIANCE_PRECISION);
2394 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_52] = (uint16_t)((meanOf8x8SquaredValuesBlocks[52] - (mean_of8x8_blocks[52] * mean_of8x8_blocks[52])) >> VARIANCE_PRECISION);
2395 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_53] = (uint16_t)((meanOf8x8SquaredValuesBlocks[53] - (mean_of8x8_blocks[53] * mean_of8x8_blocks[53])) >> VARIANCE_PRECISION);
2396 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_54] = (uint16_t)((meanOf8x8SquaredValuesBlocks[54] - (mean_of8x8_blocks[54] * mean_of8x8_blocks[54])) >> VARIANCE_PRECISION);
2397 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_55] = (uint16_t)((meanOf8x8SquaredValuesBlocks[55] - (mean_of8x8_blocks[55] * mean_of8x8_blocks[55])) >> VARIANCE_PRECISION);
2398 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_56] = (uint16_t)((meanOf8x8SquaredValuesBlocks[56] - (mean_of8x8_blocks[56] * mean_of8x8_blocks[56])) >> VARIANCE_PRECISION);
2399 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_57] = (uint16_t)((meanOf8x8SquaredValuesBlocks[57] - (mean_of8x8_blocks[57] * mean_of8x8_blocks[57])) >> VARIANCE_PRECISION);
2400 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_58] = (uint16_t)((meanOf8x8SquaredValuesBlocks[58] - (mean_of8x8_blocks[58] * mean_of8x8_blocks[58])) >> VARIANCE_PRECISION);
2401 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_59] = (uint16_t)((meanOf8x8SquaredValuesBlocks[59] - (mean_of8x8_blocks[59] * mean_of8x8_blocks[59])) >> VARIANCE_PRECISION);
2402 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_60] = (uint16_t)((meanOf8x8SquaredValuesBlocks[60] - (mean_of8x8_blocks[60] * mean_of8x8_blocks[60])) >> VARIANCE_PRECISION);
2403 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_61] = (uint16_t)((meanOf8x8SquaredValuesBlocks[61] - (mean_of8x8_blocks[61] * mean_of8x8_blocks[61])) >> VARIANCE_PRECISION);
2404 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_62] = (uint16_t)((meanOf8x8SquaredValuesBlocks[62] - (mean_of8x8_blocks[62] * mean_of8x8_blocks[62])) >> VARIANCE_PRECISION);
2405 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_8x8_63] = (uint16_t)((meanOf8x8SquaredValuesBlocks[63] - (mean_of8x8_blocks[63] * mean_of8x8_blocks[63])) >> VARIANCE_PRECISION);
2406 :
2407 : // 16x16 variances
2408 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_0] = (uint16_t)((meanOf16x16SquaredValuesBlocks[0] - (meanOf16x16Blocks[0] * meanOf16x16Blocks[0])) >> VARIANCE_PRECISION);
2409 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_1] = (uint16_t)((meanOf16x16SquaredValuesBlocks[1] - (meanOf16x16Blocks[1] * meanOf16x16Blocks[1])) >> VARIANCE_PRECISION);
2410 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_2] = (uint16_t)((meanOf16x16SquaredValuesBlocks[2] - (meanOf16x16Blocks[2] * meanOf16x16Blocks[2])) >> VARIANCE_PRECISION);
2411 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_3] = (uint16_t)((meanOf16x16SquaredValuesBlocks[3] - (meanOf16x16Blocks[3] * meanOf16x16Blocks[3])) >> VARIANCE_PRECISION);
2412 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_4] = (uint16_t)((meanOf16x16SquaredValuesBlocks[4] - (meanOf16x16Blocks[4] * meanOf16x16Blocks[4])) >> VARIANCE_PRECISION);
2413 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_5] = (uint16_t)((meanOf16x16SquaredValuesBlocks[5] - (meanOf16x16Blocks[5] * meanOf16x16Blocks[5])) >> VARIANCE_PRECISION);
2414 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_6] = (uint16_t)((meanOf16x16SquaredValuesBlocks[6] - (meanOf16x16Blocks[6] * meanOf16x16Blocks[6])) >> VARIANCE_PRECISION);
2415 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_7] = (uint16_t)((meanOf16x16SquaredValuesBlocks[7] - (meanOf16x16Blocks[7] * meanOf16x16Blocks[7])) >> VARIANCE_PRECISION);
2416 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_8] = (uint16_t)((meanOf16x16SquaredValuesBlocks[8] - (meanOf16x16Blocks[8] * meanOf16x16Blocks[8])) >> VARIANCE_PRECISION);
2417 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_9] = (uint16_t)((meanOf16x16SquaredValuesBlocks[9] - (meanOf16x16Blocks[9] * meanOf16x16Blocks[9])) >> VARIANCE_PRECISION);
2418 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_10] = (uint16_t)((meanOf16x16SquaredValuesBlocks[10] - (meanOf16x16Blocks[10] * meanOf16x16Blocks[10])) >> VARIANCE_PRECISION);
2419 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_11] = (uint16_t)((meanOf16x16SquaredValuesBlocks[11] - (meanOf16x16Blocks[11] * meanOf16x16Blocks[11])) >> VARIANCE_PRECISION);
2420 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_12] = (uint16_t)((meanOf16x16SquaredValuesBlocks[12] - (meanOf16x16Blocks[12] * meanOf16x16Blocks[12])) >> VARIANCE_PRECISION);
2421 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_13] = (uint16_t)((meanOf16x16SquaredValuesBlocks[13] - (meanOf16x16Blocks[13] * meanOf16x16Blocks[13])) >> VARIANCE_PRECISION);
2422 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_14] = (uint16_t)((meanOf16x16SquaredValuesBlocks[14] - (meanOf16x16Blocks[14] * meanOf16x16Blocks[14])) >> VARIANCE_PRECISION);
2423 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_16x16_15] = (uint16_t)((meanOf16x16SquaredValuesBlocks[15] - (meanOf16x16Blocks[15] * meanOf16x16Blocks[15])) >> VARIANCE_PRECISION);
2424 :
2425 : // 32x32 variances
2426 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_32x32_0] = (uint16_t)((meanOf32x32SquaredValuesBlocks[0] - (meanOf32x32Blocks[0] * meanOf32x32Blocks[0])) >> VARIANCE_PRECISION);
2427 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_32x32_1] = (uint16_t)((meanOf32x32SquaredValuesBlocks[1] - (meanOf32x32Blocks[1] * meanOf32x32Blocks[1])) >> VARIANCE_PRECISION);
2428 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_32x32_2] = (uint16_t)((meanOf32x32SquaredValuesBlocks[2] - (meanOf32x32Blocks[2] * meanOf32x32Blocks[2])) >> VARIANCE_PRECISION);
2429 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_32x32_3] = (uint16_t)((meanOf32x32SquaredValuesBlocks[3] - (meanOf32x32Blocks[3] * meanOf32x32Blocks[3])) >> VARIANCE_PRECISION);
2430 :
2431 : // 64x64 variance
2432 7190 : picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_64x64] = (uint16_t)((meanOf64x64SquaredValuesBlocks - (meanOf64x64Blocks * meanOf64x64Blocks)) >> VARIANCE_PRECISION);
2433 :
2434 7190 : return return_error;
2435 : }
2436 :
2437 0 : EbErrorType DenoiseInputPicture(
2438 : PictureAnalysisContext *context_ptr,
2439 : SequenceControlSet *sequence_control_set_ptr,
2440 : PictureParentControlSet *picture_control_set_ptr,
2441 : uint32_t sb_total_count,
2442 : EbPictureBufferDesc *input_picture_ptr,
2443 : EbPictureBufferDesc *denoised_picture_ptr,
2444 : uint32_t picture_width_in_sb)
2445 : {
2446 0 : EbErrorType return_error = EB_ErrorNone;
2447 :
2448 : uint32_t lcuCodingOrder;
2449 : uint32_t sb_origin_x;
2450 : uint32_t sb_origin_y;
2451 : uint16_t verticalIdx;
2452 :
2453 0 : uint32_t color_format = input_picture_ptr->color_format;
2454 0 : const uint16_t subsampling_x = (color_format == EB_YUV444 ? 1 : 2) - 1;
2455 0 : const uint16_t subsampling_y = (color_format >= EB_YUV422 ? 1 : 2) - 1;
2456 :
2457 : //use denoised input if the source is extremly noisy
2458 0 : if (picture_control_set_ptr->pic_noise_class >= PIC_NOISE_CLASS_4) {
2459 0 : uint32_t inLumaOffSet = input_picture_ptr->origin_x + input_picture_ptr->origin_y * input_picture_ptr->stride_y;
2460 0 : uint32_t inChromaOffSet = (input_picture_ptr->origin_x >> subsampling_x) + (input_picture_ptr->origin_y >> subsampling_y) * input_picture_ptr->stride_cb;
2461 0 : uint32_t denLumaOffSet = denoised_picture_ptr->origin_x + denoised_picture_ptr->origin_y * denoised_picture_ptr->stride_y;
2462 0 : uint32_t denChromaOffSet = (denoised_picture_ptr->origin_x >> subsampling_x) + (denoised_picture_ptr->origin_y >> subsampling_y) * denoised_picture_ptr->stride_cb;
2463 :
2464 : //filter Luma
2465 0 : for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
2466 0 : sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2467 0 : sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2468 :
2469 0 : if (sb_origin_x == 0)
2470 0 : noise_extract_luma_strong(
2471 : input_picture_ptr,
2472 : denoised_picture_ptr,
2473 : sb_origin_y,
2474 : sb_origin_x);
2475 :
2476 0 : if (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width)
2477 : {
2478 0 : noise_extract_luma_strong_c(
2479 : input_picture_ptr,
2480 : denoised_picture_ptr,
2481 : sb_origin_y,
2482 : sb_origin_x);
2483 : }
2484 : }
2485 :
2486 : //copy Luma
2487 0 : for (verticalIdx = 0; verticalIdx < input_picture_ptr->height; ++verticalIdx) {
2488 0 : EB_MEMCPY(input_picture_ptr->buffer_y + inLumaOffSet + verticalIdx * input_picture_ptr->stride_y,
2489 : denoised_picture_ptr->buffer_y + denLumaOffSet + verticalIdx * denoised_picture_ptr->stride_y,
2490 : sizeof(uint8_t) * input_picture_ptr->width);
2491 : }
2492 :
2493 : //copy chroma
2494 0 : for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
2495 0 : sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2496 0 : sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2497 :
2498 0 : if (sb_origin_x == 0)
2499 0 : noise_extract_chroma_strong(
2500 : input_picture_ptr,
2501 : denoised_picture_ptr,
2502 : sb_origin_y >> subsampling_y,
2503 : sb_origin_x >> subsampling_x);
2504 :
2505 0 : if (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width)
2506 : {
2507 0 : noise_extract_chroma_strong_c(
2508 : input_picture_ptr,
2509 : denoised_picture_ptr,
2510 : sb_origin_y >> subsampling_y,
2511 : sb_origin_x >> subsampling_x);
2512 : }
2513 : }
2514 :
2515 : //copy chroma
2516 0 : for (verticalIdx = 0; verticalIdx < input_picture_ptr->height >> subsampling_y; ++verticalIdx) {
2517 0 : EB_MEMCPY(input_picture_ptr->buffer_cb + inChromaOffSet + verticalIdx * input_picture_ptr->stride_cb,
2518 : denoised_picture_ptr->buffer_cb + denChromaOffSet + verticalIdx * denoised_picture_ptr->stride_cb,
2519 : sizeof(uint8_t) * input_picture_ptr->width >> subsampling_x);
2520 :
2521 0 : EB_MEMCPY(input_picture_ptr->buffer_cr + inChromaOffSet + verticalIdx * input_picture_ptr->stride_cr,
2522 : denoised_picture_ptr->buffer_cr + denChromaOffSet + verticalIdx * denoised_picture_ptr->stride_cr,
2523 : sizeof(uint8_t) * input_picture_ptr->width >> subsampling_x);
2524 : }
2525 : }
2526 0 : else if (picture_control_set_ptr->pic_noise_class >= PIC_NOISE_CLASS_3_1) {
2527 0 : uint32_t inLumaOffSet = input_picture_ptr->origin_x + input_picture_ptr->origin_y * input_picture_ptr->stride_y;
2528 0 : uint32_t inChromaOffSet = (input_picture_ptr->origin_x >> subsampling_x) + (input_picture_ptr->origin_y >> subsampling_y) * input_picture_ptr->stride_cb;
2529 0 : uint32_t denLumaOffSet = denoised_picture_ptr->origin_x + denoised_picture_ptr->origin_y * denoised_picture_ptr->stride_y;
2530 0 : uint32_t denChromaOffSet = (denoised_picture_ptr->origin_x >> subsampling_x) + (denoised_picture_ptr->origin_y >> subsampling_y) * denoised_picture_ptr->stride_cb;
2531 :
2532 0 : for (verticalIdx = 0; verticalIdx < input_picture_ptr->height; ++verticalIdx) {
2533 0 : EB_MEMCPY(input_picture_ptr->buffer_y + inLumaOffSet + verticalIdx * input_picture_ptr->stride_y,
2534 : denoised_picture_ptr->buffer_y + denLumaOffSet + verticalIdx * denoised_picture_ptr->stride_y,
2535 : sizeof(uint8_t) * input_picture_ptr->width);
2536 : }
2537 :
2538 : //copy chroma
2539 0 : for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
2540 0 : sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2541 0 : sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2542 :
2543 0 : if (sb_origin_x == 0)
2544 0 : noise_extract_chroma_weak(
2545 : input_picture_ptr,
2546 : denoised_picture_ptr,
2547 : sb_origin_y >> subsampling_y,
2548 : sb_origin_x >> subsampling_x);
2549 :
2550 0 : if (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width)
2551 : {
2552 0 : noise_extract_chroma_weak_c(
2553 : input_picture_ptr,
2554 : denoised_picture_ptr,
2555 : sb_origin_y >> subsampling_y,
2556 : sb_origin_x >> subsampling_x);
2557 : }
2558 : }
2559 :
2560 0 : for (verticalIdx = 0; verticalIdx < input_picture_ptr->height >> subsampling_y; ++verticalIdx) {
2561 0 : EB_MEMCPY(input_picture_ptr->buffer_cb + inChromaOffSet + verticalIdx * input_picture_ptr->stride_cb,
2562 : denoised_picture_ptr->buffer_cb + denChromaOffSet + verticalIdx * denoised_picture_ptr->stride_cb,
2563 : sizeof(uint8_t) * input_picture_ptr->width >> subsampling_x);
2564 :
2565 0 : EB_MEMCPY(input_picture_ptr->buffer_cr + inChromaOffSet + verticalIdx * input_picture_ptr->stride_cr,
2566 : denoised_picture_ptr->buffer_cr + denChromaOffSet + verticalIdx * denoised_picture_ptr->stride_cr,
2567 : sizeof(uint8_t) * input_picture_ptr->width >> subsampling_x);
2568 : }
2569 : }
2570 0 : else if (context_ptr->pic_noise_variance_float >= 1.0) {
2571 : //Luma : use filtered only for flatNoise LCUs
2572 0 : for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
2573 0 : sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2574 0 : sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2575 0 : uint32_t sb_height = MIN(BLOCK_SIZE_64, input_picture_ptr->height - sb_origin_y);
2576 0 : uint32_t sb_width = MIN(BLOCK_SIZE_64, input_picture_ptr->width - sb_origin_x);
2577 :
2578 0 : uint32_t inLumaOffSet = input_picture_ptr->origin_x + sb_origin_x + (input_picture_ptr->origin_y + sb_origin_y) * input_picture_ptr->stride_y;
2579 0 : uint32_t denLumaOffSet = denoised_picture_ptr->origin_x + sb_origin_x + (denoised_picture_ptr->origin_y + sb_origin_y) * denoised_picture_ptr->stride_y;
2580 :
2581 0 : if (picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] == 1) {
2582 0 : for (verticalIdx = 0; verticalIdx < sb_height; ++verticalIdx) {
2583 0 : EB_MEMCPY(input_picture_ptr->buffer_y + inLumaOffSet + verticalIdx * input_picture_ptr->stride_y,
2584 : denoised_picture_ptr->buffer_y + denLumaOffSet + verticalIdx * denoised_picture_ptr->stride_y,
2585 : sizeof(uint8_t) * sb_width);
2586 : }
2587 : }
2588 : }
2589 : }
2590 :
2591 0 : return return_error;
2592 : }
2593 :
2594 0 : EbErrorType DetectInputPictureNoise(
2595 : PictureAnalysisContext *context_ptr,
2596 : SequenceControlSet *sequence_control_set_ptr,
2597 : PictureParentControlSet *picture_control_set_ptr,
2598 : uint32_t sb_total_count,
2599 : EbPictureBufferDesc *input_picture_ptr,
2600 : EbPictureBufferDesc *noise_picture_ptr,
2601 : EbPictureBufferDesc *denoised_picture_ptr,
2602 : uint32_t picture_width_in_sb)
2603 : {
2604 0 : EbErrorType return_error = EB_ErrorNone;
2605 : uint32_t lcuCodingOrder;
2606 :
2607 : uint64_t picNoiseVariance;
2608 :
2609 : uint32_t totLcuCount, noiseTh;
2610 :
2611 : uint32_t sb_origin_x;
2612 : uint32_t sb_origin_y;
2613 : uint32_t inputLumaOriginIndex;
2614 :
2615 0 : picNoiseVariance = 0;
2616 0 : totLcuCount = 0;
2617 :
2618 : //Variance calc for noise picture
2619 0 : for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
2620 0 : sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2621 0 : sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2622 0 : inputLumaOriginIndex = (noise_picture_ptr->origin_y + sb_origin_y) * noise_picture_ptr->stride_y +
2623 0 : noise_picture_ptr->origin_x + sb_origin_x;
2624 :
2625 0 : uint32_t noiseOriginIndex = noise_picture_ptr->origin_x + sb_origin_x + noise_picture_ptr->origin_y * noise_picture_ptr->stride_y;
2626 :
2627 0 : if (sb_origin_x == 0)
2628 0 : noise_extract_luma_weak(
2629 : input_picture_ptr,
2630 : denoised_picture_ptr,
2631 : noise_picture_ptr,
2632 : sb_origin_y,
2633 : sb_origin_x);
2634 :
2635 0 : if (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width)
2636 : {
2637 0 : noise_extract_luma_weak_c(
2638 : input_picture_ptr,
2639 : denoised_picture_ptr,
2640 : noise_picture_ptr,
2641 : sb_origin_y,
2642 : sb_origin_x);
2643 : }
2644 :
2645 : //do it only for complete 64x64 blocks
2646 0 : if (sb_origin_x + 64 <= input_picture_ptr->width && sb_origin_y + 64 <= input_picture_ptr->height)
2647 : {
2648 : uint64_t noiseBlkVar32x32[4], denoiseBlkVar32x32[4];
2649 :
2650 0 : uint64_t noiseBlkVar = ComputeVariance64x64(
2651 : sequence_control_set_ptr,
2652 : noise_picture_ptr,
2653 : noiseOriginIndex,
2654 : noiseBlkVar32x32);
2655 :
2656 : uint64_t noiseBlkVarTh;
2657 0 : uint64_t denBlkVarTh = FLAT_MAX_VAR;
2658 0 : noiseBlkVarTh = NOISE_MIN_LEVEL_M6_M7;
2659 :
2660 0 : picNoiseVariance += (noiseBlkVar >> 16);
2661 :
2662 0 : uint64_t denBlkVar = ComputeVariance64x64(
2663 : sequence_control_set_ptr,
2664 : denoised_picture_ptr,
2665 : inputLumaOriginIndex,
2666 : denoiseBlkVar32x32) >> 16;
2667 :
2668 0 : if (denBlkVar < denBlkVarTh && noiseBlkVar > noiseBlkVarTh)
2669 0 : picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 1;
2670 0 : totLcuCount++;
2671 : }
2672 : }
2673 :
2674 0 : if (totLcuCount > 0) {
2675 0 : context_ptr->pic_noise_variance_float = (double)picNoiseVariance / (double)totLcuCount;
2676 0 : picNoiseVariance = picNoiseVariance / totLcuCount;
2677 : }
2678 :
2679 : //the variance of a 64x64 noise area tends to be bigger for small resolutions.
2680 0 : if (sequence_control_set_ptr->seq_header.max_frame_height <= 720)
2681 0 : noiseTh = 25;
2682 : else
2683 0 : noiseTh = 0;
2684 :
2685 0 : if (picNoiseVariance >= 80 + noiseTh)
2686 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_10;
2687 0 : else if (picNoiseVariance >= 70 + noiseTh)
2688 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_9;
2689 0 : else if (picNoiseVariance >= 60 + noiseTh)
2690 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_8;
2691 0 : else if (picNoiseVariance >= 50 + noiseTh)
2692 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_7;
2693 0 : else if (picNoiseVariance >= 40 + noiseTh)
2694 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_6;
2695 0 : else if (picNoiseVariance >= 30 + noiseTh)
2696 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_5;
2697 0 : else if (picNoiseVariance >= 20 + noiseTh)
2698 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_4;
2699 0 : else if (picNoiseVariance >= 17 + noiseTh)
2700 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_3_1;
2701 0 : else if (picNoiseVariance >= 10 + noiseTh)
2702 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_3;
2703 0 : else if (picNoiseVariance >= 5 + noiseTh)
2704 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_2;
2705 : else
2706 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_1;
2707 :
2708 0 : if (picture_control_set_ptr->pic_noise_class >= PIC_NOISE_CLASS_4)
2709 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_3_1;
2710 :
2711 0 : return return_error;
2712 : }
2713 :
2714 0 : static int32_t apply_denoise_2d(SequenceControlSet *scs_ptr,
2715 : PictureParentControlSet *pcs_ptr,
2716 : EbPictureBufferDesc *inputPicturePointer)
2717 : {
2718 0 : if (eb_aom_denoise_and_model_run(pcs_ptr->denoise_and_model, inputPicturePointer,
2719 : &pcs_ptr->frm_hdr.film_grain_params,
2720 0 : scs_ptr->static_config.encoder_bit_depth > EB_8BIT)){
2721 : }
2722 0 : return 0;
2723 : }
2724 :
2725 0 : EbErrorType denoise_estimate_film_grain(
2726 : SequenceControlSet *sequence_control_set_ptr,
2727 : PictureParentControlSet *picture_control_set_ptr)
2728 : {
2729 0 : EbErrorType return_error = EB_ErrorNone;
2730 :
2731 0 : FrameHeader *frm_hdr = &picture_control_set_ptr->frm_hdr;
2732 :
2733 0 : EbPictureBufferDesc *input_picture_ptr = picture_control_set_ptr->enhanced_picture_ptr;
2734 0 : frm_hdr->film_grain_params.apply_grain = 0;
2735 :
2736 0 : if (sequence_control_set_ptr->film_grain_denoise_strength) {
2737 0 : if (apply_denoise_2d(sequence_control_set_ptr, picture_control_set_ptr, input_picture_ptr) < 0)
2738 0 : return 1;
2739 : }
2740 :
2741 0 : sequence_control_set_ptr->seq_header.film_grain_params_present |= frm_hdr->film_grain_params.apply_grain;
2742 :
2743 0 : return return_error; //todo: add proper error handling
2744 : }
2745 :
2746 0 : EbErrorType FullSampleDenoise(
2747 : PictureAnalysisContext *context_ptr,
2748 : SequenceControlSet *sequence_control_set_ptr,
2749 : PictureParentControlSet *picture_control_set_ptr,
2750 : uint32_t sb_total_count,
2751 : EbBool denoise_flag,
2752 : uint32_t picture_width_in_sb)
2753 : {
2754 0 : EbErrorType return_error = EB_ErrorNone;
2755 :
2756 : uint32_t lcuCodingOrder;
2757 0 : EbPictureBufferDesc *input_picture_ptr = picture_control_set_ptr->enhanced_picture_ptr;
2758 0 : EbPictureBufferDesc *denoised_picture_ptr = context_ptr->denoised_picture_ptr;
2759 0 : EbPictureBufferDesc *noise_picture_ptr = context_ptr->noise_picture_ptr;
2760 :
2761 : //Reset the flat noise flag array to False for both RealTime/HighComplexity Modes
2762 0 : for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder)
2763 0 : picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 0;
2764 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_INV; //this init is for both REAL-TIME and BEST-QUALITY
2765 :
2766 0 : DetectInputPictureNoise(
2767 : context_ptr,
2768 : sequence_control_set_ptr,
2769 : picture_control_set_ptr,
2770 : sb_total_count,
2771 : input_picture_ptr,
2772 : noise_picture_ptr,
2773 : denoised_picture_ptr,
2774 : picture_width_in_sb);
2775 :
2776 0 : if (denoise_flag == EB_TRUE)
2777 : {
2778 0 : DenoiseInputPicture(
2779 : context_ptr,
2780 : sequence_control_set_ptr,
2781 : picture_control_set_ptr,
2782 : sb_total_count,
2783 : input_picture_ptr,
2784 : denoised_picture_ptr,
2785 : picture_width_in_sb);
2786 : }
2787 :
2788 0 : return return_error;
2789 : }
2790 :
2791 0 : EbErrorType SubSampleFilterNoise(
2792 : SequenceControlSet *sequence_control_set_ptr,
2793 : PictureParentControlSet *picture_control_set_ptr,
2794 : uint32_t sb_total_count,
2795 : EbPictureBufferDesc *input_picture_ptr,
2796 : EbPictureBufferDesc *noise_picture_ptr,
2797 : EbPictureBufferDesc *denoised_picture_ptr,
2798 : uint32_t picture_width_in_sb)
2799 : {
2800 0 : EbErrorType return_error = EB_ErrorNone;
2801 :
2802 : uint32_t lcuCodingOrder;
2803 : uint32_t sb_origin_x;
2804 : uint32_t sb_origin_y;
2805 : uint16_t verticalIdx;
2806 :
2807 0 : uint32_t color_format = input_picture_ptr->color_format;
2808 0 : const uint16_t subsampling_x = (color_format == EB_YUV444 ? 1 : 2) - 1;
2809 0 : const uint16_t subsampling_y = (color_format >= EB_YUV422 ? 1 : 2) - 1;
2810 :
2811 0 : if (picture_control_set_ptr->pic_noise_class == PIC_NOISE_CLASS_3_1) {
2812 0 : uint32_t inLumaOffSet = input_picture_ptr->origin_x + input_picture_ptr->origin_y * input_picture_ptr->stride_y;
2813 0 : uint32_t inChromaOffSet = (input_picture_ptr->origin_x >> subsampling_x) + (input_picture_ptr->origin_y >> subsampling_y) * input_picture_ptr->stride_cb;
2814 0 : uint32_t denLumaOffSet = denoised_picture_ptr->origin_x + denoised_picture_ptr->origin_y * denoised_picture_ptr->stride_y;
2815 0 : uint32_t denChromaOffSet = (denoised_picture_ptr->origin_x >> subsampling_x) + (denoised_picture_ptr->origin_y >> subsampling_y) * denoised_picture_ptr->stride_cb;
2816 :
2817 : //filter Luma
2818 0 : for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
2819 0 : sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2820 0 : sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2821 :
2822 0 : if (sb_origin_x == 0)
2823 0 : noise_extract_luma_weak(
2824 : input_picture_ptr,
2825 : denoised_picture_ptr,
2826 : noise_picture_ptr,
2827 : sb_origin_y,
2828 : sb_origin_x);
2829 :
2830 0 : if (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width)
2831 : {
2832 0 : noise_extract_luma_weak_c(
2833 : input_picture_ptr,
2834 : denoised_picture_ptr,
2835 : noise_picture_ptr,
2836 : sb_origin_y,
2837 : sb_origin_x);
2838 : }
2839 : }
2840 :
2841 : //copy luma
2842 0 : for (verticalIdx = 0; verticalIdx < input_picture_ptr->height; ++verticalIdx) {
2843 0 : EB_MEMCPY(input_picture_ptr->buffer_y + inLumaOffSet + verticalIdx * input_picture_ptr->stride_y,
2844 : denoised_picture_ptr->buffer_y + denLumaOffSet + verticalIdx * denoised_picture_ptr->stride_y,
2845 : sizeof(uint8_t) * input_picture_ptr->width);
2846 : }
2847 :
2848 : //filter chroma
2849 0 : for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
2850 0 : sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2851 0 : sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2852 :
2853 0 : if (sb_origin_x == 0)
2854 0 : noise_extract_chroma_weak(
2855 : input_picture_ptr,
2856 : denoised_picture_ptr,
2857 : sb_origin_y >> subsampling_y,
2858 : sb_origin_x >> subsampling_x);
2859 :
2860 0 : if (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width)
2861 : {
2862 0 : noise_extract_chroma_weak_c(
2863 : input_picture_ptr,
2864 : denoised_picture_ptr,
2865 : sb_origin_y >> subsampling_y,
2866 : sb_origin_x >> subsampling_x);
2867 : }
2868 : }
2869 :
2870 : //copy chroma
2871 0 : for (verticalIdx = 0; verticalIdx < input_picture_ptr->height >> subsampling_y; ++verticalIdx) {
2872 0 : EB_MEMCPY(input_picture_ptr->buffer_cb + inChromaOffSet + verticalIdx * input_picture_ptr->stride_cb,
2873 : denoised_picture_ptr->buffer_cb + denChromaOffSet + verticalIdx * denoised_picture_ptr->stride_cb,
2874 : sizeof(uint8_t) * input_picture_ptr->width >> subsampling_x);
2875 :
2876 0 : EB_MEMCPY(input_picture_ptr->buffer_cr + inChromaOffSet + verticalIdx * input_picture_ptr->stride_cr,
2877 : denoised_picture_ptr->buffer_cr + denChromaOffSet + verticalIdx * denoised_picture_ptr->stride_cr,
2878 : sizeof(uint8_t) * input_picture_ptr->width >> subsampling_x);
2879 : }
2880 : }
2881 0 : else if (picture_control_set_ptr->pic_noise_class == PIC_NOISE_CLASS_2) {
2882 0 : uint32_t newTotFN = 0;
2883 :
2884 : //for each SB ,re check the FN information for only the FNdecim ones
2885 0 : for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
2886 0 : sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2887 0 : sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2888 0 : uint32_t inputLumaOriginIndex = noise_picture_ptr->origin_x + sb_origin_x + (noise_picture_ptr->origin_y + sb_origin_y) * noise_picture_ptr->stride_y;
2889 0 : uint32_t noiseOriginIndex = noise_picture_ptr->origin_x + sb_origin_x + (noise_picture_ptr->origin_y * noise_picture_ptr->stride_y);
2890 :
2891 0 : if (sb_origin_x + 64 <= input_picture_ptr->width && sb_origin_y + 64 <= input_picture_ptr->height && picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] == 1)
2892 : {
2893 0 : noise_extract_luma_weak_lcu(
2894 : input_picture_ptr,
2895 : denoised_picture_ptr,
2896 : noise_picture_ptr,
2897 : sb_origin_y,
2898 : sb_origin_x);
2899 :
2900 0 : if (sb_origin_x + BLOCK_SIZE_64 > input_picture_ptr->width)
2901 : {
2902 0 : noise_extract_luma_weak_lcu_c(
2903 : input_picture_ptr,
2904 : denoised_picture_ptr,
2905 : noise_picture_ptr,
2906 : sb_origin_y,
2907 : sb_origin_x);
2908 : }
2909 :
2910 : uint64_t noiseBlkVar32x32[4], denoiseBlkVar32x32[4];
2911 0 : uint64_t noiseBlkVar = ComputeVariance64x64(
2912 : sequence_control_set_ptr,
2913 : noise_picture_ptr,
2914 : noiseOriginIndex,
2915 : noiseBlkVar32x32);
2916 0 : uint64_t denBlkVar = ComputeVariance64x64(
2917 : sequence_control_set_ptr,
2918 : denoised_picture_ptr,
2919 : inputLumaOriginIndex,
2920 : denoiseBlkVar32x32) >> 16;
2921 :
2922 : uint64_t noiseBlkVarTh;
2923 0 : uint64_t denBlkVarTh = FLAT_MAX_VAR;
2924 0 : noiseBlkVarTh = NOISE_MIN_LEVEL_M6_M7;
2925 :
2926 0 : if (denBlkVar<denBlkVarTh && noiseBlkVar> noiseBlkVarTh) {
2927 0 : picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 1;
2928 : //printf("POC %i (%i,%i) denBlkVar: %i noiseBlkVar :%i\n", picture_control_set_ptr->picture_number,sb_origin_x,sb_origin_y, denBlkVar, noiseBlkVar);
2929 0 : newTotFN++;
2930 : }
2931 : else
2932 0 : picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 0;
2933 : }
2934 : }
2935 :
2936 0 : for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder) {
2937 0 : sb_origin_x = (lcuCodingOrder % picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2938 0 : sb_origin_y = (lcuCodingOrder / picture_width_in_sb) * sequence_control_set_ptr->sb_sz;
2939 :
2940 0 : if (sb_origin_x + 64 <= input_picture_ptr->width && sb_origin_y + 64 <= input_picture_ptr->height)
2941 : {
2942 : //use the denoised for FN LCUs
2943 0 : if (picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] == 1) {
2944 0 : uint32_t sb_height = MIN(BLOCK_SIZE_64, input_picture_ptr->height - sb_origin_y);
2945 0 : uint32_t sb_width = MIN(BLOCK_SIZE_64, input_picture_ptr->width - sb_origin_x);
2946 :
2947 0 : uint32_t inLumaOffSet = input_picture_ptr->origin_x + sb_origin_x + (input_picture_ptr->origin_y + sb_origin_y) * input_picture_ptr->stride_y;
2948 0 : uint32_t denLumaOffSet = denoised_picture_ptr->origin_x + sb_origin_x + (denoised_picture_ptr->origin_y + sb_origin_y) * denoised_picture_ptr->stride_y;
2949 :
2950 0 : for (verticalIdx = 0; verticalIdx < sb_height; ++verticalIdx) {
2951 0 : EB_MEMCPY(input_picture_ptr->buffer_y + inLumaOffSet + verticalIdx * input_picture_ptr->stride_y,
2952 : denoised_picture_ptr->buffer_y + denLumaOffSet + verticalIdx * denoised_picture_ptr->stride_y,
2953 : sizeof(uint8_t) * sb_width);
2954 : }
2955 : }
2956 : }
2957 : }
2958 : }
2959 0 : return return_error;
2960 : }
2961 :
2962 0 : EbErrorType QuarterSampleDetectNoise(
2963 : PictureAnalysisContext *context_ptr,
2964 : PictureParentControlSet *picture_control_set_ptr,
2965 : EbPictureBufferDesc *quarter_decimated_picture_ptr,
2966 : EbPictureBufferDesc *noise_picture_ptr,
2967 : EbPictureBufferDesc *denoised_picture_ptr,
2968 : uint32_t picture_width_in_sb)
2969 : {
2970 0 : EbErrorType return_error = EB_ErrorNone;
2971 :
2972 : uint64_t picNoiseVariance;
2973 :
2974 : uint32_t totLcuCount, noiseTh;
2975 :
2976 : uint32_t blockIndex;
2977 :
2978 0 : picNoiseVariance = 0;
2979 0 : totLcuCount = 0;
2980 :
2981 : uint16_t vert64x64Index;
2982 : uint16_t horz64x64Index;
2983 : uint32_t block64x64X;
2984 : uint32_t block64x64Y;
2985 : uint32_t vert32x32Index;
2986 : uint32_t horz32x32Index;
2987 : uint32_t block32x32X;
2988 : uint32_t block32x32Y;
2989 : uint32_t noiseOriginIndex;
2990 : uint32_t lcuCodingOrder;
2991 :
2992 : // Loop over 64x64 blocks on the downsampled domain (each block would contain 16 LCUs on the full sampled domain)
2993 0 : for (vert64x64Index = 0; vert64x64Index < (quarter_decimated_picture_ptr->height / 64); vert64x64Index++) {
2994 0 : for (horz64x64Index = 0; horz64x64Index < (quarter_decimated_picture_ptr->width / 64); horz64x64Index++) {
2995 0 : block64x64X = horz64x64Index * 64;
2996 0 : block64x64Y = vert64x64Index * 64;
2997 :
2998 0 : if (block64x64X == 0)
2999 0 : noise_extract_luma_weak(
3000 : quarter_decimated_picture_ptr,
3001 : denoised_picture_ptr,
3002 : noise_picture_ptr,
3003 : block64x64Y,
3004 : block64x64X);
3005 :
3006 0 : if (block64x64Y + BLOCK_SIZE_64 > quarter_decimated_picture_ptr->width)
3007 : {
3008 0 : noise_extract_luma_weak_c(
3009 : quarter_decimated_picture_ptr,
3010 : denoised_picture_ptr,
3011 : noise_picture_ptr,
3012 : block64x64Y,
3013 : block64x64X);
3014 : }
3015 :
3016 : // Loop over 32x32 blocks (i.e, 64x64 blocks in full resolution)
3017 0 : for (vert32x32Index = 0; vert32x32Index < 2; vert32x32Index++) {
3018 0 : for (horz32x32Index = 0; horz32x32Index < 2; horz32x32Index++) {
3019 0 : block32x32X = block64x64X + horz32x32Index * 32;
3020 0 : block32x32Y = block64x64Y + vert32x32Index * 32;
3021 :
3022 : //do it only for complete 32x32 blocks (i.e, complete 64x64 blocks in full resolution)
3023 0 : if ((block32x32X + 32 <= quarter_decimated_picture_ptr->width) && (block32x32Y + 32 <= quarter_decimated_picture_ptr->height))
3024 : {
3025 0 : lcuCodingOrder = ((vert64x64Index * 2) + vert32x32Index) * picture_width_in_sb + ((horz64x64Index * 2) + horz32x32Index);
3026 :
3027 : uint64_t noiseBlkVar8x8[16], denoiseBlkVar8x8[16];
3028 :
3029 0 : noiseOriginIndex = noise_picture_ptr->origin_x + block32x32X + noise_picture_ptr->origin_y * noise_picture_ptr->stride_y;
3030 :
3031 0 : uint64_t noiseBlkVar = ComputeVariance32x32(
3032 : noise_picture_ptr,
3033 : noiseOriginIndex,
3034 : noiseBlkVar8x8);
3035 :
3036 0 : picNoiseVariance += (noiseBlkVar >> 16);
3037 :
3038 0 : blockIndex = (noise_picture_ptr->origin_y + block32x32Y) * noise_picture_ptr->stride_y + noise_picture_ptr->origin_x + block32x32X;
3039 :
3040 0 : uint64_t denBlkVar = ComputeVariance32x32(
3041 : denoised_picture_ptr,
3042 : blockIndex,
3043 : denoiseBlkVar8x8) >> 16;
3044 :
3045 : uint64_t denBlkVarDecTh;
3046 0 : denBlkVarDecTh = NOISE_MIN_LEVEL_DECIM_M6_M7;
3047 0 : if (denBlkVar < FLAT_MAX_VAR_DECIM && noiseBlkVar> denBlkVarDecTh)
3048 0 : picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 1;
3049 0 : totLcuCount++;
3050 : }
3051 : }
3052 : }
3053 : }
3054 : }
3055 :
3056 0 : if (totLcuCount > 0) {
3057 0 : context_ptr->pic_noise_variance_float = (double)picNoiseVariance / (double)totLcuCount;
3058 0 : picNoiseVariance = picNoiseVariance / totLcuCount;
3059 : }
3060 :
3061 : //the variance of a 64x64 noise area tends to be bigger for small resolutions.
3062 : //if (sequence_control_set_ptr->seq_header.max_frame_height <= 720)
3063 : // noiseTh = 25;
3064 : //else if (sequence_control_set_ptr->seq_header.max_frame_height <= 1080)
3065 : // noiseTh = 10;
3066 : //else
3067 0 : noiseTh = 0;
3068 :
3069 : //look for extreme noise or big enough flat noisy area to be denoised.
3070 0 : if (picNoiseVariance > 60)
3071 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_3_1; //Noise+Edge information is too big, so may be this is all noise (action: frame based denoising)
3072 0 : else if (picNoiseVariance >= 10 + noiseTh)
3073 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_3; //Noise+Edge information is big enough, so there is no big enough flat noisy area (action : no denoising)
3074 0 : else if (picNoiseVariance >= 5 + noiseTh)
3075 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_2; //Noise+Edge information is relatively small, so there might be a big enough flat noisy area(action : denoising only for FN blocks)
3076 : else
3077 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_1; //Noise+Edge information is very small, so no noise nor edge area (action : no denoising)
3078 :
3079 0 : return return_error;
3080 : }
3081 :
3082 0 : EbErrorType SubSampleDetectNoise(
3083 : PictureAnalysisContext *context_ptr,
3084 : SequenceControlSet *sequence_control_set_ptr,
3085 : PictureParentControlSet *picture_control_set_ptr,
3086 : EbPictureBufferDesc *sixteenth_decimated_picture_ptr,
3087 : EbPictureBufferDesc *noise_picture_ptr,
3088 : EbPictureBufferDesc *denoised_picture_ptr,
3089 : uint32_t picture_width_in_sb)
3090 : {
3091 0 : EbErrorType return_error = EB_ErrorNone;
3092 :
3093 : uint64_t picNoiseVariance;
3094 :
3095 : uint32_t totLcuCount, noiseTh;
3096 :
3097 : uint32_t blockIndex;
3098 :
3099 0 : picNoiseVariance = 0;
3100 0 : totLcuCount = 0;
3101 :
3102 : uint16_t vert64x64Index;
3103 : uint16_t horz64x64Index;
3104 : uint32_t block64x64X;
3105 : uint32_t block64x64Y;
3106 : uint32_t vert16x16Index;
3107 : uint32_t horz16x16Index;
3108 : uint32_t block16x16X;
3109 : uint32_t block16x16Y;
3110 : uint32_t noiseOriginIndex;
3111 : uint32_t lcuCodingOrder;
3112 :
3113 : // Loop over 64x64 blocks on the downsampled domain (each block would contain 16 LCUs on the full sampled domain)
3114 0 : for (vert64x64Index = 0; vert64x64Index < (sixteenth_decimated_picture_ptr->height / 64); vert64x64Index++) {
3115 0 : for (horz64x64Index = 0; horz64x64Index < (sixteenth_decimated_picture_ptr->width / 64); horz64x64Index++) {
3116 0 : block64x64X = horz64x64Index * 64;
3117 0 : block64x64Y = vert64x64Index * 64;
3118 :
3119 0 : if (block64x64X == 0)
3120 0 : noise_extract_luma_weak(
3121 : sixteenth_decimated_picture_ptr,
3122 : denoised_picture_ptr,
3123 : noise_picture_ptr,
3124 : block64x64Y,
3125 : block64x64X);
3126 :
3127 0 : if (block64x64Y + BLOCK_SIZE_64 > sixteenth_decimated_picture_ptr->width)
3128 : {
3129 0 : noise_extract_luma_weak_c(
3130 : sixteenth_decimated_picture_ptr,
3131 : denoised_picture_ptr,
3132 : noise_picture_ptr,
3133 : block64x64Y,
3134 : block64x64X);
3135 : }
3136 :
3137 : // Loop over 16x16 blocks (i.e, 64x64 blocks in full resolution)
3138 0 : for (vert16x16Index = 0; vert16x16Index < 4; vert16x16Index++) {
3139 0 : for (horz16x16Index = 0; horz16x16Index < 4; horz16x16Index++) {
3140 0 : block16x16X = block64x64X + horz16x16Index * 16;
3141 0 : block16x16Y = block64x64Y + vert16x16Index * 16;
3142 :
3143 : //do it only for complete 16x16 blocks (i.e, complete 64x64 blocks in full resolution)
3144 0 : if (block16x16X + 16 <= sixteenth_decimated_picture_ptr->width && block16x16Y + 16 <= sixteenth_decimated_picture_ptr->height)
3145 : {
3146 0 : lcuCodingOrder = ((vert64x64Index * 4) + vert16x16Index) * picture_width_in_sb + ((horz64x64Index * 4) + horz16x16Index);
3147 :
3148 : uint64_t noiseBlkVar8x8[4], denoiseBlkVar8x8[4];
3149 :
3150 0 : noiseOriginIndex = noise_picture_ptr->origin_x + block16x16X + noise_picture_ptr->origin_y * noise_picture_ptr->stride_y;
3151 :
3152 0 : uint64_t noiseBlkVar = ComputeVariance16x16(
3153 : noise_picture_ptr,
3154 : noiseOriginIndex,
3155 : noiseBlkVar8x8);
3156 :
3157 0 : picNoiseVariance += (noiseBlkVar >> 16);
3158 :
3159 0 : blockIndex = (noise_picture_ptr->origin_y + block16x16Y) * noise_picture_ptr->stride_y + noise_picture_ptr->origin_x + block16x16X;
3160 :
3161 0 : uint64_t denBlkVar = ComputeVariance16x16(
3162 : denoised_picture_ptr,
3163 : blockIndex,
3164 : denoiseBlkVar8x8) >> 16;
3165 :
3166 : uint64_t noiseBlkVarDecTh;
3167 0 : uint64_t denBlkVarDecTh = FLAT_MAX_VAR_DECIM;
3168 :
3169 0 : noiseBlkVarDecTh = NOISE_MIN_LEVEL_DECIM_M6_M7;
3170 0 : if (denBlkVar < denBlkVarDecTh && noiseBlkVar> noiseBlkVarDecTh)
3171 0 : picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 1;
3172 0 : totLcuCount++;
3173 : }
3174 : }
3175 : }
3176 : }
3177 : }
3178 :
3179 0 : if (totLcuCount > 0) {
3180 0 : context_ptr->pic_noise_variance_float = (double)picNoiseVariance / (double)totLcuCount;
3181 0 : picNoiseVariance = picNoiseVariance / totLcuCount;
3182 : }
3183 :
3184 : //the variance of a 64x64 noise area tends to be bigger for small resolutions.
3185 0 : if (sequence_control_set_ptr->seq_header.max_frame_height <= 720)
3186 0 : noiseTh = 25;
3187 0 : else if (sequence_control_set_ptr->seq_header.max_frame_height <= 1080)
3188 0 : noiseTh = 10;
3189 : else
3190 0 : noiseTh = 0;
3191 :
3192 : //look for extreme noise or big enough flat noisy area to be denoised.
3193 0 : if (picNoiseVariance >= 55 + noiseTh)
3194 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_3_1; //Noise+Edge information is too big, so may be this is all noise (action: frame based denoising)
3195 0 : else if (picNoiseVariance >= 10 + noiseTh)
3196 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_3; //Noise+Edge information is big enough, so there is no big enough flat noisy area (action : no denoising)
3197 0 : else if (picNoiseVariance >= 5 + noiseTh)
3198 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_2; //Noise+Edge information is relatively small, so there might be a big enough flat noisy area(action : denoising only for FN blocks)
3199 : else
3200 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_1; //Noise+Edge information is very small, so no noise nor edge area (action : no denoising)
3201 :
3202 0 : return return_error;
3203 : }
3204 :
3205 0 : EbErrorType QuarterSampleDenoise(
3206 : PictureAnalysisContext *context_ptr,
3207 : SequenceControlSet *sequence_control_set_ptr,
3208 : PictureParentControlSet *picture_control_set_ptr,
3209 : EbPictureBufferDesc *quarter_decimated_picture_ptr,
3210 : uint32_t sb_total_count,
3211 : EbBool denoise_flag,
3212 : uint32_t picture_width_in_sb)
3213 : {
3214 0 : EbErrorType return_error = EB_ErrorNone;
3215 :
3216 : uint32_t lcuCodingOrder;
3217 0 : EbPictureBufferDesc *input_picture_ptr = picture_control_set_ptr->enhanced_picture_ptr;
3218 0 : EbPictureBufferDesc *denoised_picture_ptr = context_ptr->denoised_picture_ptr;
3219 0 : EbPictureBufferDesc *noise_picture_ptr = context_ptr->noise_picture_ptr;
3220 :
3221 : //Reset the flat noise flag array to False for both RealTime/HighComplexity Modes
3222 0 : for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder)
3223 0 : picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 0;
3224 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_INV; //this init is for both REAL-TIME and BEST-QUALITY
3225 :
3226 0 : decimation_2d(
3227 0 : &input_picture_ptr->buffer_y[input_picture_ptr->origin_x + input_picture_ptr->origin_y * input_picture_ptr->stride_y],
3228 0 : input_picture_ptr->stride_y,
3229 0 : input_picture_ptr->width,
3230 0 : input_picture_ptr->height,
3231 0 : &quarter_decimated_picture_ptr->buffer_y[quarter_decimated_picture_ptr->origin_x + (quarter_decimated_picture_ptr->origin_y * quarter_decimated_picture_ptr->stride_y)],
3232 0 : quarter_decimated_picture_ptr->stride_y,
3233 : 2);
3234 :
3235 0 : QuarterSampleDetectNoise(
3236 : context_ptr,
3237 : picture_control_set_ptr,
3238 : quarter_decimated_picture_ptr,
3239 : noise_picture_ptr,
3240 : denoised_picture_ptr,
3241 : picture_width_in_sb);
3242 :
3243 0 : if (denoise_flag == EB_TRUE) {
3244 : // Turn OFF the de-noiser for Class 2 at QP=29 and lower (for Fixed_QP) and at the target rate of 14Mbps and higher (for RC=ON)
3245 0 : if ((picture_control_set_ptr->pic_noise_class == PIC_NOISE_CLASS_3_1) ||
3246 0 : ((picture_control_set_ptr->pic_noise_class == PIC_NOISE_CLASS_2) && ((sequence_control_set_ptr->static_config.rate_control_mode == 0 && sequence_control_set_ptr->qp > DENOISER_QP_TH) || (sequence_control_set_ptr->static_config.rate_control_mode != 0 && sequence_control_set_ptr->static_config.target_bit_rate < DENOISER_BITRATE_TH)))) {
3247 0 : SubSampleFilterNoise(
3248 : sequence_control_set_ptr,
3249 : picture_control_set_ptr,
3250 : sb_total_count,
3251 : input_picture_ptr,
3252 : noise_picture_ptr,
3253 : denoised_picture_ptr,
3254 : picture_width_in_sb);
3255 : }
3256 : }
3257 :
3258 0 : return return_error;
3259 : }
3260 :
3261 0 : EbErrorType SubSampleDenoise(
3262 : PictureAnalysisContext *context_ptr,
3263 : SequenceControlSet *sequence_control_set_ptr,
3264 : PictureParentControlSet *picture_control_set_ptr,
3265 : EbPictureBufferDesc *sixteenth_decimated_picture_ptr,
3266 : uint32_t sb_total_count,
3267 : EbBool denoise_flag,
3268 : uint32_t picture_width_in_sb)
3269 : {
3270 0 : EbErrorType return_error = EB_ErrorNone;
3271 :
3272 : uint32_t lcuCodingOrder;
3273 0 : EbPictureBufferDesc *input_picture_ptr = picture_control_set_ptr->enhanced_picture_ptr;
3274 0 : EbPictureBufferDesc *denoised_picture_ptr = context_ptr->denoised_picture_ptr;
3275 0 : EbPictureBufferDesc *noise_picture_ptr = context_ptr->noise_picture_ptr;
3276 :
3277 : //Reset the flat noise flag array to False for both RealTime/HighComplexity Modes
3278 0 : for (lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder)
3279 0 : picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 0;
3280 0 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_INV; //this init is for both REAL-TIME and BEST-QUALITY
3281 :
3282 0 : decimation_2d(
3283 0 : &input_picture_ptr->buffer_y[input_picture_ptr->origin_x + input_picture_ptr->origin_y * input_picture_ptr->stride_y],
3284 0 : input_picture_ptr->stride_y,
3285 0 : input_picture_ptr->width,
3286 0 : input_picture_ptr->height,
3287 0 : &sixteenth_decimated_picture_ptr->buffer_y[sixteenth_decimated_picture_ptr->origin_x + (sixteenth_decimated_picture_ptr->origin_y * sixteenth_decimated_picture_ptr->stride_y)],
3288 0 : sixteenth_decimated_picture_ptr->stride_y,
3289 : 4);
3290 :
3291 0 : SubSampleDetectNoise(
3292 : context_ptr,
3293 : sequence_control_set_ptr,
3294 : picture_control_set_ptr,
3295 : sixteenth_decimated_picture_ptr,
3296 : noise_picture_ptr,
3297 : denoised_picture_ptr,
3298 : picture_width_in_sb);
3299 :
3300 0 : if (denoise_flag == EB_TRUE) {
3301 : // Turn OFF the de-noiser for Class 2 at QP=29 and lower (for Fixed_QP) and at the target rate of 14Mbps and higher (for RC=ON)
3302 0 : if ((picture_control_set_ptr->pic_noise_class == PIC_NOISE_CLASS_3_1) ||
3303 0 : ((picture_control_set_ptr->pic_noise_class == PIC_NOISE_CLASS_2) && ((sequence_control_set_ptr->static_config.rate_control_mode == 0 && sequence_control_set_ptr->qp > DENOISER_QP_TH) || (sequence_control_set_ptr->static_config.rate_control_mode != 0 && sequence_control_set_ptr->static_config.target_bit_rate < DENOISER_BITRATE_TH)))) {
3304 0 : SubSampleFilterNoise(
3305 : sequence_control_set_ptr,
3306 : picture_control_set_ptr,
3307 : sb_total_count,
3308 : input_picture_ptr,
3309 : noise_picture_ptr,
3310 : denoised_picture_ptr,
3311 : picture_width_in_sb);
3312 : }
3313 : }
3314 :
3315 0 : return return_error;
3316 : }
3317 :
3318 : /************************************************
3319 : * Set Picture Parameters based on input configuration
3320 : ** Setting Number of regions per resolution
3321 : ** Setting width and height for subpicture and when picture scan type is 1
3322 : ************************************************/
3323 117 : void SetPictureParametersForStatisticsGathering(
3324 : SequenceControlSet *sequence_control_set_ptr
3325 : )
3326 : {
3327 117 : sequence_control_set_ptr->picture_analysis_number_of_regions_per_width = HIGHER_THAN_CLASS_1_REGION_SPLIT_PER_WIDTH;
3328 117 : sequence_control_set_ptr->picture_analysis_number_of_regions_per_height = HIGHER_THAN_CLASS_1_REGION_SPLIT_PER_HEIGHT;
3329 :
3330 117 : return;
3331 : }
3332 : /************************************************
3333 : * Picture Pre Processing Operations *
3334 : *** A function that groups all of the Pre proceesing
3335 : * operations performed on the input picture
3336 : *** Operations included at this point:
3337 : ***** Borders preprocessing
3338 : ***** Denoising
3339 : ************************************************/
3340 118 : void PicturePreProcessingOperations(
3341 : PictureParentControlSet *picture_control_set_ptr,
3342 : SequenceControlSet *sequence_control_set_ptr,
3343 : uint32_t sb_total_count)
3344 : {
3345 118 : if (sequence_control_set_ptr->film_grain_denoise_strength) {
3346 0 : denoise_estimate_film_grain(
3347 : sequence_control_set_ptr,
3348 : picture_control_set_ptr);
3349 : }
3350 : else {
3351 : //Reset the flat noise flag array to False for both RealTime/HighComplexity Modes
3352 7160 : for (uint32_t lcuCodingOrder = 0; lcuCodingOrder < sb_total_count; ++lcuCodingOrder)
3353 7042 : picture_control_set_ptr->sb_flat_noise_array[lcuCodingOrder] = 0;
3354 118 : picture_control_set_ptr->pic_noise_class = PIC_NOISE_CLASS_INV; //this init is for both REAL-TIME and BEST-QUALITY
3355 : }
3356 119 : return;
3357 : }
3358 :
3359 : /**************************************************************
3360 : * Generate picture histogram bins for YUV pixel intensity *
3361 : * Calculation is done on a region based (Set previously, resolution dependent)
3362 : **************************************************************/
3363 120 : void SubSampleLumaGeneratePixelIntensityHistogramBins(
3364 : SequenceControlSet *sequence_control_set_ptr,
3365 : PictureParentControlSet *picture_control_set_ptr,
3366 : EbPictureBufferDesc *input_picture_ptr,
3367 : uint64_t *sumAverageIntensityTotalRegionsLuma) {
3368 : uint32_t regionWidth;
3369 : uint32_t regionHeight;
3370 : uint32_t regionWidthOffset;
3371 : uint32_t regionHeightOffset;
3372 : uint32_t regionInPictureWidthIndex;
3373 : uint32_t regionInPictureHeightIndex;
3374 : uint32_t histogramBin;
3375 : uint64_t sum;
3376 :
3377 120 : regionWidth = input_picture_ptr->width / sequence_control_set_ptr->picture_analysis_number_of_regions_per_width;
3378 120 : regionHeight = input_picture_ptr->height / sequence_control_set_ptr->picture_analysis_number_of_regions_per_height;
3379 :
3380 : // Loop over regions inside the picture
3381 599 : for (regionInPictureWidthIndex = 0; regionInPictureWidthIndex < sequence_control_set_ptr->picture_analysis_number_of_regions_per_width; regionInPictureWidthIndex++) { // loop over horizontal regions
3382 2398 : for (regionInPictureHeightIndex = 0; regionInPictureHeightIndex < sequence_control_set_ptr->picture_analysis_number_of_regions_per_height; regionInPictureHeightIndex++) { // loop over vertical regions
3383 :
3384 : // Initialize bins to 1
3385 1919 : initialize_buffer_32bits(picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][0], 64, 0, 1);
3386 :
3387 3840 : regionWidthOffset = (regionInPictureWidthIndex == sequence_control_set_ptr->picture_analysis_number_of_regions_per_width - 1) ?
3388 1920 : input_picture_ptr->width - (sequence_control_set_ptr->picture_analysis_number_of_regions_per_width * regionWidth) :
3389 : 0;
3390 :
3391 3840 : regionHeightOffset = (regionInPictureHeightIndex == sequence_control_set_ptr->picture_analysis_number_of_regions_per_height - 1) ?
3392 1920 : input_picture_ptr->height - (sequence_control_set_ptr->picture_analysis_number_of_regions_per_height * regionHeight) :
3393 : 0;
3394 :
3395 : // Y Histogram
3396 1920 : CalculateHistogram(
3397 1920 : &input_picture_ptr->buffer_y[(input_picture_ptr->origin_x + regionInPictureWidthIndex * regionWidth) + ((input_picture_ptr->origin_y + regionInPictureHeightIndex * regionHeight) * input_picture_ptr->stride_y)],
3398 : regionWidth + regionWidthOffset,
3399 : regionHeight + regionHeightOffset,
3400 1920 : input_picture_ptr->stride_y,
3401 : 1,
3402 1920 : picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][0],
3403 : &sum);
3404 :
3405 1918 : picture_control_set_ptr->average_intensity_per_region[regionInPictureWidthIndex][regionInPictureHeightIndex][0] = (uint8_t)((sum + (((regionWidth + regionWidthOffset)*(regionHeight + regionHeightOffset)) >> 1)) / ((regionWidth + regionWidthOffset)*(regionHeight + regionHeightOffset)));
3406 1918 : (*sumAverageIntensityTotalRegionsLuma) += (sum << 4);
3407 487256 : for (histogramBin = 0; histogramBin < HISTOGRAM_NUMBER_OF_BINS; histogramBin++) { // Loop over the histogram bins
3408 485338 : picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][0][histogramBin] =
3409 485338 : picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][0][histogramBin] << 4;
3410 : }
3411 : }
3412 : }
3413 :
3414 119 : return;
3415 : }
3416 :
3417 120 : void SubSampleChromaGeneratePixelIntensityHistogramBins(
3418 : SequenceControlSet *sequence_control_set_ptr,
3419 : PictureParentControlSet *picture_control_set_ptr,
3420 : EbPictureBufferDesc *input_picture_ptr,
3421 : uint64_t *sumAverageIntensityTotalRegionsCb,
3422 : uint64_t *sumAverageIntensityTotalRegionsCr) {
3423 : uint64_t sum;
3424 : uint32_t regionWidth;
3425 : uint32_t regionHeight;
3426 : uint32_t regionWidthOffset;
3427 : uint32_t regionHeightOffset;
3428 : uint32_t regionInPictureWidthIndex;
3429 : uint32_t regionInPictureHeightIndex;
3430 :
3431 : uint16_t histogramBin;
3432 120 : uint8_t decim_step = 4;
3433 :
3434 120 : regionWidth = input_picture_ptr->width / sequence_control_set_ptr->picture_analysis_number_of_regions_per_width;
3435 120 : regionHeight = input_picture_ptr->height / sequence_control_set_ptr->picture_analysis_number_of_regions_per_height;
3436 :
3437 : // Loop over regions inside the picture
3438 599 : for (regionInPictureWidthIndex = 0; regionInPictureWidthIndex < sequence_control_set_ptr->picture_analysis_number_of_regions_per_width; regionInPictureWidthIndex++) { // loop over horizontal regions
3439 2399 : for (regionInPictureHeightIndex = 0; regionInPictureHeightIndex < sequence_control_set_ptr->picture_analysis_number_of_regions_per_height; regionInPictureHeightIndex++) { // loop over vertical regions
3440 :
3441 : // Initialize bins to 1
3442 1920 : initialize_buffer_32bits(picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][1], 64, 0, 1);
3443 1920 : initialize_buffer_32bits(picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][2], 64, 0, 1);
3444 :
3445 3838 : regionWidthOffset = (regionInPictureWidthIndex == sequence_control_set_ptr->picture_analysis_number_of_regions_per_width - 1) ?
3446 1919 : input_picture_ptr->width - (sequence_control_set_ptr->picture_analysis_number_of_regions_per_width * regionWidth) :
3447 : 0;
3448 :
3449 3838 : regionHeightOffset = (regionInPictureHeightIndex == sequence_control_set_ptr->picture_analysis_number_of_regions_per_height - 1) ?
3450 1919 : input_picture_ptr->height - (sequence_control_set_ptr->picture_analysis_number_of_regions_per_height * regionHeight) :
3451 : 0;
3452 :
3453 : // U Histogram
3454 1919 : CalculateHistogram(
3455 1919 : &input_picture_ptr->buffer_cb[((input_picture_ptr->origin_x + regionInPictureWidthIndex * regionWidth) >> 1) + (((input_picture_ptr->origin_y + regionInPictureHeightIndex * regionHeight) >> 1) * input_picture_ptr->stride_cb)],
3456 1919 : (regionWidth + regionWidthOffset) >> 1,
3457 1919 : (regionHeight + regionHeightOffset) >> 1,
3458 1919 : input_picture_ptr->stride_cb,
3459 : decim_step,
3460 1919 : picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][1],
3461 : &sum);
3462 :
3463 1919 : sum = (sum << decim_step);
3464 1919 : *sumAverageIntensityTotalRegionsCb += sum;
3465 1919 : picture_control_set_ptr->average_intensity_per_region[regionInPictureWidthIndex][regionInPictureHeightIndex][1] = (uint8_t)((sum + (((regionWidth + regionWidthOffset)*(regionHeight + regionHeightOffset)) >> 3)) / (((regionWidth + regionWidthOffset)*(regionHeight + regionHeightOffset)) >> 2));
3466 :
3467 485917 : for (histogramBin = 0; histogramBin < HISTOGRAM_NUMBER_OF_BINS; histogramBin++) { // Loop over the histogram bins
3468 483998 : picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][1][histogramBin] =
3469 483998 : picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][1][histogramBin] << decim_step;
3470 : }
3471 :
3472 : // V Histogram
3473 1919 : CalculateHistogram(
3474 1919 : &input_picture_ptr->buffer_cr[((input_picture_ptr->origin_x + regionInPictureWidthIndex * regionWidth) >> 1) + (((input_picture_ptr->origin_y + regionInPictureHeightIndex * regionHeight) >> 1) * input_picture_ptr->stride_cr)],
3475 1919 : (regionWidth + regionWidthOffset) >> 1,
3476 1919 : (regionHeight + regionHeightOffset) >> 1,
3477 1919 : input_picture_ptr->stride_cr,
3478 : decim_step,
3479 1919 : picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][2],
3480 : &sum);
3481 :
3482 1919 : sum = (sum << decim_step);
3483 1919 : *sumAverageIntensityTotalRegionsCr += sum;
3484 1919 : picture_control_set_ptr->average_intensity_per_region[regionInPictureWidthIndex][regionInPictureHeightIndex][2] = (uint8_t)((sum + (((regionWidth + regionWidthOffset)*(regionHeight + regionHeightOffset)) >> 3)) / (((regionWidth + regionWidthOffset)*(regionHeight + regionHeightOffset)) >> 2));
3485 :
3486 486504 : for (histogramBin = 0; histogramBin < HISTOGRAM_NUMBER_OF_BINS; histogramBin++) { // Loop over the histogram bins
3487 484585 : picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][2][histogramBin] =
3488 484585 : picture_control_set_ptr->picture_histogram[regionInPictureWidthIndex][regionInPictureHeightIndex][2][histogramBin] << decim_step;
3489 : }
3490 : }
3491 : }
3492 119 : return;
3493 : }
3494 :
3495 120 : void EdgeDetectionMeanLumaChroma16x16(
3496 : SequenceControlSet *sequence_control_set_ptr,
3497 : PictureParentControlSet *picture_control_set_ptr,
3498 : uint32_t totalLcuCount)
3499 : {
3500 : uint32_t sb_index;
3501 :
3502 120 : uint32_t maxGrad = 1;
3503 :
3504 : // The values are calculated for every 4th frame
3505 120 : if ((picture_control_set_ptr->picture_number & 3) == 0) {
3506 1830 : for (sb_index = 0; sb_index < totalLcuCount; sb_index++) {
3507 1800 : SbStat *sb_stat_ptr = &picture_control_set_ptr->sb_stat_array[sb_index];
3508 :
3509 1800 : EB_MEMSET(sb_stat_ptr, 0, sizeof(SbStat));
3510 1800 : SbParams *sb_params = &sequence_control_set_ptr->sb_params_array[sb_index];
3511 1800 : if (sb_params->potential_logo_sb &&sb_params->is_complete_sb)
3512 :
3513 : {
3514 660 : uint8_t *y_mean_ptr = picture_control_set_ptr->y_mean[sb_index];
3515 660 : uint8_t *cr_mean_ptr = picture_control_set_ptr->crMean[sb_index];
3516 660 : uint8_t *cb_mean_ptr = picture_control_set_ptr->cbMean[sb_index];
3517 :
3518 : uint8_t rasterScanCuIndex;
3519 :
3520 11220 : for (rasterScanCuIndex = RASTER_SCAN_CU_INDEX_16x16_0; rasterScanCuIndex <= RASTER_SCAN_CU_INDEX_16x16_15; rasterScanCuIndex++) {
3521 10560 : uint8_t cu_index = rasterScanCuIndex - 5;
3522 10560 : uint8_t x = cu_index & 3;
3523 10560 : uint8_t y = (cu_index >> 2);
3524 10560 : int32_t gradx = 0;
3525 10560 : int32_t grady = 0;
3526 10560 : int32_t nbcompx = 0;
3527 10560 : int32_t nbcompy = 0;
3528 10560 : if (x != 0)
3529 : {
3530 7920 : gradx += ABS((int32_t)(y_mean_ptr[rasterScanCuIndex]) - (int32_t)(y_mean_ptr[rasterScanCuIndex - 1]));
3531 7920 : gradx += ABS((int32_t)(cr_mean_ptr[rasterScanCuIndex]) - (int32_t)(cr_mean_ptr[rasterScanCuIndex - 1]));
3532 7920 : gradx += ABS((int32_t)(cb_mean_ptr[rasterScanCuIndex]) - (int32_t)(cb_mean_ptr[rasterScanCuIndex - 1]));
3533 7920 : nbcompx++;
3534 : }
3535 10560 : if (x != 3)
3536 : {
3537 7920 : gradx += ABS((int32_t)(y_mean_ptr[rasterScanCuIndex + 1]) - (int32_t)(y_mean_ptr[rasterScanCuIndex]));
3538 7920 : gradx += ABS((int32_t)(cr_mean_ptr[rasterScanCuIndex + 1]) - (int32_t)(cr_mean_ptr[rasterScanCuIndex]));
3539 7920 : gradx += ABS((int32_t)(cb_mean_ptr[rasterScanCuIndex + 1]) - (int32_t)(cb_mean_ptr[rasterScanCuIndex]));
3540 7920 : nbcompx++;
3541 : }
3542 10560 : gradx = gradx / nbcompx;
3543 :
3544 10560 : if (y != 0)
3545 : {
3546 7920 : grady += ABS((int32_t)(y_mean_ptr[rasterScanCuIndex]) - (int32_t)(y_mean_ptr[rasterScanCuIndex - 4]));
3547 7920 : grady += ABS((int32_t)(cr_mean_ptr[rasterScanCuIndex]) - (int32_t)(cr_mean_ptr[rasterScanCuIndex - 4]));
3548 7920 : grady += ABS((int32_t)(cb_mean_ptr[rasterScanCuIndex]) - (int32_t)(cb_mean_ptr[rasterScanCuIndex - 4]));
3549 7920 : nbcompy++;
3550 : }
3551 10560 : if (y != 3)
3552 : {
3553 7920 : grady += ABS((int32_t)(y_mean_ptr[rasterScanCuIndex + 4]) - (int32_t)(y_mean_ptr[rasterScanCuIndex]));
3554 7920 : grady += ABS((int32_t)(cr_mean_ptr[rasterScanCuIndex + 4]) - (int32_t)(cr_mean_ptr[rasterScanCuIndex]));
3555 7920 : grady += ABS((int32_t)(cb_mean_ptr[rasterScanCuIndex + 4]) - (int32_t)(cb_mean_ptr[rasterScanCuIndex]));
3556 :
3557 7920 : nbcompy++;
3558 : }
3559 :
3560 10560 : grady = grady / nbcompy;
3561 10560 : sb_stat_ptr->cu_stat_array[rasterScanCuIndex].grad = (uint32_t)ABS(gradx) + ABS(grady);
3562 10560 : if (sb_stat_ptr->cu_stat_array[rasterScanCuIndex].grad > maxGrad)
3563 222 : maxGrad = sb_stat_ptr->cu_stat_array[rasterScanCuIndex].grad;
3564 : }
3565 : }
3566 : }
3567 :
3568 1830 : for (sb_index = 0; sb_index < totalLcuCount; sb_index++) {
3569 1800 : SbParams *sb_params = &sequence_control_set_ptr->sb_params_array[sb_index];
3570 1800 : if (sb_params->potential_logo_sb &&sb_params->is_complete_sb) {
3571 660 : SbStat *sb_stat_ptr = &picture_control_set_ptr->sb_stat_array[sb_index];
3572 :
3573 : uint32_t rasterScanCuIndex;
3574 11220 : for (rasterScanCuIndex = RASTER_SCAN_CU_INDEX_16x16_0; rasterScanCuIndex <= RASTER_SCAN_CU_INDEX_16x16_15; rasterScanCuIndex++)
3575 10560 : sb_stat_ptr->cu_stat_array[rasterScanCuIndex].edge_cu = (uint16_t)MIN(((sb_stat_ptr->cu_stat_array[rasterScanCuIndex].grad * (255 * 3)) / maxGrad), 255) < 30 ? 0 : 1;
3576 : }
3577 : }
3578 : }
3579 : else {
3580 5487 : for (sb_index = 0; sb_index < totalLcuCount; sb_index++) {
3581 5397 : SbStat *sb_stat_ptr = &picture_control_set_ptr->sb_stat_array[sb_index];
3582 :
3583 5397 : EB_MEMSET(sb_stat_ptr, 0, sizeof(SbStat));
3584 : }
3585 : }
3586 120 : }
3587 :
3588 : /******************************************************
3589 : * Edge map derivation
3590 : ******************************************************/
3591 120 : void EdgeDetection(
3592 : SequenceControlSet *sequence_control_set_ptr,
3593 : PictureParentControlSet *picture_control_set_ptr)
3594 : {
3595 : uint16_t *variancePtr;
3596 120 : uint32_t sb_total_count = picture_control_set_ptr->sb_total_count;
3597 120 : uint64_t thrsldLevel0 = (picture_control_set_ptr->pic_avg_variance * 70) / 100;
3598 : uint8_t *meanPtr;
3599 120 : uint32_t picture_width_in_sb = (sequence_control_set_ptr->seq_header.max_frame_width + sequence_control_set_ptr->sb_sz - 1) / sequence_control_set_ptr->sb_sz;
3600 120 : uint32_t picture_height_in_sb = (sequence_control_set_ptr->seq_header.max_frame_height + sequence_control_set_ptr->sb_sz - 1) / sequence_control_set_ptr->sb_sz;
3601 120 : uint32_t neighbourLcuIndex = 0;
3602 120 : uint64_t similarityCount = 0;
3603 120 : uint64_t similarityCount0 = 0;
3604 120 : uint64_t similarityCount1 = 0;
3605 120 : uint64_t similarityCount2 = 0;
3606 120 : uint64_t similarityCount3 = 0;
3607 120 : uint32_t sb_x = 0;
3608 120 : uint32_t sb_y = 0;
3609 : uint32_t sb_index;
3610 : EbBool highVarianceLucFlag;
3611 :
3612 120 : uint32_t rasterScanCuIndex = 0;
3613 120 : uint32_t numberOfEdgeLcu = 0;
3614 : EbBool highIntensityLcuFlag;
3615 :
3616 : uint64_t neighbourLcuMean;
3617 : int32_t i, j;
3618 :
3619 120 : uint8_t highIntensityTh = 180;
3620 120 : uint8_t lowIntensityTh = 120;
3621 120 : uint8_t highIntensityTh1 = 200;
3622 120 : uint8_t veryLowIntensityTh = 20;
3623 :
3624 7308 : for (sb_index = 0; sb_index < sb_total_count; ++sb_index) {
3625 7188 : sb_x = sb_index % picture_width_in_sb;
3626 7188 : sb_y = sb_index / picture_width_in_sb;
3627 :
3628 7188 : EdgeLcuResults *edge_results_ptr = picture_control_set_ptr->edge_results_ptr;
3629 7188 : picture_control_set_ptr->edge_results_ptr[sb_index].edge_block_num = 0;
3630 7188 : picture_control_set_ptr->edge_results_ptr[sb_index].isolated_high_intensity_sb = 0;
3631 7188 : picture_control_set_ptr->sharp_edge_sb_flag[sb_index] = 0;
3632 :
3633 7188 : if (sb_x > 0 && sb_x < (uint32_t)(picture_width_in_sb - 1) && sb_y > 0 && sb_y < (uint32_t)(picture_height_in_sb - 1)) {
3634 3838 : variancePtr = picture_control_set_ptr->variance[sb_index];
3635 3838 : meanPtr = picture_control_set_ptr->y_mean[sb_index];
3636 :
3637 3838 : similarityCount = 0;
3638 :
3639 3838 : highVarianceLucFlag =
3640 3838 : (variancePtr[RASTER_SCAN_CU_INDEX_64x64] > thrsldLevel0) ? EB_TRUE : EB_FALSE;
3641 3838 : edge_results_ptr[sb_index].edge_block_num = highVarianceLucFlag;
3642 3838 : if (variancePtr[0] > highIntensityTh1) {
3643 1945 : uint8_t sharpEdge = 0;
3644 33014 : for (rasterScanCuIndex = RASTER_SCAN_CU_INDEX_16x16_0; rasterScanCuIndex <= RASTER_SCAN_CU_INDEX_16x16_15; rasterScanCuIndex++)
3645 31069 : sharpEdge = (variancePtr[rasterScanCuIndex] < veryLowIntensityTh) ? sharpEdge + 1 : sharpEdge;
3646 1945 : if (sharpEdge > 4)
3647 424 : picture_control_set_ptr->sharp_edge_sb_flag[sb_index] = 1;
3648 : }
3649 :
3650 3838 : if (sb_x > 3 && sb_x < (uint32_t)(picture_width_in_sb - 4) && sb_y > 3 && sb_y < (uint32_t)(picture_height_in_sb - 4)) {
3651 0 : highIntensityLcuFlag =
3652 0 : (meanPtr[RASTER_SCAN_CU_INDEX_64x64] > highIntensityTh) ? EB_TRUE : EB_FALSE;
3653 :
3654 0 : if (highIntensityLcuFlag) {
3655 0 : neighbourLcuIndex = sb_index - 1;
3656 0 : neighbourLcuMean = picture_control_set_ptr->y_mean[neighbourLcuIndex][RASTER_SCAN_CU_INDEX_64x64];
3657 :
3658 0 : similarityCount0 = (neighbourLcuMean < lowIntensityTh) ? 1 : 0;
3659 :
3660 0 : neighbourLcuIndex = sb_index + 1;
3661 :
3662 0 : neighbourLcuMean = picture_control_set_ptr->y_mean[neighbourLcuIndex][RASTER_SCAN_CU_INDEX_64x64];
3663 0 : similarityCount1 = (neighbourLcuMean < lowIntensityTh) ? 1 : 0;
3664 :
3665 0 : neighbourLcuIndex = sb_index - picture_width_in_sb;
3666 0 : neighbourLcuMean = picture_control_set_ptr->y_mean[neighbourLcuIndex][RASTER_SCAN_CU_INDEX_64x64];
3667 0 : similarityCount2 = (neighbourLcuMean < lowIntensityTh) ? 1 : 0;
3668 :
3669 0 : neighbourLcuIndex = sb_index + picture_width_in_sb;
3670 0 : neighbourLcuMean = picture_control_set_ptr->y_mean[neighbourLcuIndex][RASTER_SCAN_CU_INDEX_64x64];
3671 0 : similarityCount3 = (neighbourLcuMean < lowIntensityTh) ? 1 : 0;
3672 :
3673 0 : similarityCount = similarityCount0 + similarityCount1 + similarityCount2 + similarityCount3;
3674 :
3675 0 : if (similarityCount > 0) {
3676 0 : for (i = -4; i < 5; i++) {
3677 0 : for (j = -4; j < 5; j++) {
3678 0 : neighbourLcuIndex = sb_index + (i * picture_width_in_sb) + j;
3679 0 : picture_control_set_ptr->edge_results_ptr[neighbourLcuIndex].isolated_high_intensity_sb = 1;
3680 : }
3681 : }
3682 : }
3683 : }
3684 : }
3685 :
3686 3838 : if (highVarianceLucFlag)
3687 1567 : numberOfEdgeLcu += edge_results_ptr[sb_index].edge_block_num;
3688 : }
3689 : }
3690 120 : return;
3691 : }
3692 :
3693 : /******************************************************
3694 : * Calculate the variance of variance to determine Homogeneous regions. Note: Variance calculation should be on.
3695 : ******************************************************/
3696 120 : void DetermineHomogeneousRegionInPicture(
3697 : SequenceControlSet *sequence_control_set_ptr,
3698 : PictureParentControlSet *picture_control_set_ptr)
3699 : {
3700 : uint16_t *variancePtr;
3701 : uint32_t sb_index;
3702 120 : uint64_t nullVarCnt = 0;
3703 120 : uint64_t veryLowVarCnt = 0;
3704 120 : uint64_t varLcuCnt = 0;
3705 120 : uint32_t sb_total_count = picture_control_set_ptr->sb_total_count;
3706 :
3707 7315 : for (sb_index = 0; sb_index < sb_total_count; ++sb_index) {
3708 7195 : SbParams sb_params = sequence_control_set_ptr->sb_params_array[sb_index];
3709 7195 : variancePtr = picture_control_set_ptr->variance[sb_index];
3710 :
3711 7195 : if (sb_params.is_complete_sb) {
3712 5999 : nullVarCnt += (variancePtr[ME_TIER_ZERO_PU_64x64] == 0) ? 1 : 0;
3713 :
3714 5999 : varLcuCnt++;
3715 :
3716 5999 : veryLowVarCnt += ((variancePtr[ME_TIER_ZERO_PU_64x64]) < LCU_LOW_VAR_TH) ? 1 : 0;
3717 : }
3718 : }
3719 120 : picture_control_set_ptr->very_low_var_pic_flag = EB_FALSE;
3720 120 : if ((varLcuCnt > 0) && (((veryLowVarCnt * 100) / varLcuCnt) > PIC_LOW_VAR_PERCENTAGE_TH))
3721 0 : picture_control_set_ptr->very_low_var_pic_flag = EB_TRUE;
3722 120 : picture_control_set_ptr->logo_pic_flag = EB_FALSE;
3723 120 : if ((varLcuCnt > 0) && (((veryLowVarCnt * 100) / varLcuCnt) > 80))
3724 0 : picture_control_set_ptr->logo_pic_flag = EB_TRUE;
3725 120 : return;
3726 : }
3727 : /************************************************
3728 : * ComputePictureSpatialStatistics
3729 : ** Compute Block Variance
3730 : ** Compute Picture Variance
3731 : ** Compute Block Mean for all blocks in the picture
3732 : ************************************************/
3733 120 : void ComputePictureSpatialStatistics(
3734 : SequenceControlSet *sequence_control_set_ptr,
3735 : PictureParentControlSet *picture_control_set_ptr,
3736 : EbPictureBufferDesc *input_picture_ptr,
3737 : EbPictureBufferDesc *input_padded_picture_ptr,
3738 : uint32_t sb_total_count)
3739 : {
3740 : uint32_t sb_index;
3741 : uint32_t sb_origin_x; // to avoid using child PCS
3742 : uint32_t sb_origin_y;
3743 : uint32_t inputLumaOriginIndex;
3744 : uint32_t inputCbOriginIndex;
3745 : uint32_t inputCrOriginIndex;
3746 : uint64_t picTotVariance;
3747 :
3748 : // Variance
3749 120 : picTotVariance = 0;
3750 :
3751 7311 : for (sb_index = 0; sb_index < picture_control_set_ptr->sb_total_count; ++sb_index) {
3752 7192 : SbParams *sb_params = &sequence_control_set_ptr->sb_params_array[sb_index];
3753 :
3754 7192 : sb_origin_x = sb_params->origin_x;
3755 7192 : sb_origin_y = sb_params->origin_y;
3756 7192 : inputLumaOriginIndex = (input_padded_picture_ptr->origin_y + sb_origin_y) * input_padded_picture_ptr->stride_y +
3757 7192 : input_padded_picture_ptr->origin_x + sb_origin_x;
3758 :
3759 7192 : inputCbOriginIndex = ((input_picture_ptr->origin_y + sb_origin_y) >> 1) * input_picture_ptr->stride_cb + ((input_picture_ptr->origin_x + sb_origin_x) >> 1);
3760 7192 : inputCrOriginIndex = ((input_picture_ptr->origin_y + sb_origin_y) >> 1) * input_picture_ptr->stride_cr + ((input_picture_ptr->origin_x + sb_origin_x) >> 1);
3761 :
3762 7192 : ComputeBlockMeanComputeVariance(
3763 : sequence_control_set_ptr,
3764 : picture_control_set_ptr,
3765 : input_padded_picture_ptr,
3766 : sb_index,
3767 : inputLumaOriginIndex);
3768 :
3769 7186 : if (sb_params->is_complete_sb) {
3770 5991 : ComputeChromaBlockMean(
3771 : sequence_control_set_ptr,
3772 : picture_control_set_ptr,
3773 : input_picture_ptr,
3774 : sb_index,
3775 : inputCbOriginIndex,
3776 : inputCrOriginIndex);
3777 : }
3778 : else {
3779 1195 : ZeroOutChromaBlockMean(
3780 : picture_control_set_ptr,
3781 : sb_index);
3782 : }
3783 :
3784 7191 : picTotVariance += (picture_control_set_ptr->variance[sb_index][RASTER_SCAN_CU_INDEX_64x64]);
3785 : }
3786 :
3787 119 : picture_control_set_ptr->pic_avg_variance = (uint16_t)(picTotVariance / sb_total_count);
3788 :
3789 : // Calculate the variance of variance to determine Homogeneous regions. Note: Variance calculation should be on.
3790 119 : DetermineHomogeneousRegionInPicture(
3791 : sequence_control_set_ptr,
3792 : picture_control_set_ptr);
3793 :
3794 120 : EdgeDetectionMeanLumaChroma16x16(
3795 : sequence_control_set_ptr,
3796 : picture_control_set_ptr,
3797 120 : sequence_control_set_ptr->sb_total_count);
3798 :
3799 120 : EdgeDetection(
3800 : sequence_control_set_ptr,
3801 : picture_control_set_ptr);
3802 :
3803 120 : return;
3804 : }
3805 :
3806 120 : void CalculateInputAverageIntensity(
3807 : SequenceControlSet *sequence_control_set_ptr,
3808 : PictureParentControlSet *picture_control_set_ptr,
3809 : EbPictureBufferDesc *input_picture_ptr,
3810 : uint64_t sumAverageIntensityTotalRegionsLuma,
3811 : uint64_t sumAverageIntensityTotalRegionsCb,
3812 : uint64_t sumAverageIntensityTotalRegionsCr)
3813 : {
3814 120 : if (sequence_control_set_ptr->scd_mode == SCD_MODE_0) {
3815 : uint16_t blockIndexInWidth;
3816 : uint16_t blockIndexInHeight;
3817 120 : uint64_t mean = 0;
3818 :
3819 120 : const uint16_t stride_y = input_picture_ptr->stride_y;
3820 : // Loop over 8x8 blocks and calculates the mean value
3821 120 : if (sequence_control_set_ptr->block_mean_calc_prec == BLOCK_MEAN_PREC_FULL) {
3822 0 : for (blockIndexInHeight = 0; blockIndexInHeight < input_picture_ptr->height >> 3; ++blockIndexInHeight) {
3823 0 : for (blockIndexInWidth = 0; blockIndexInWidth < input_picture_ptr->width >> 3; ++blockIndexInWidth)
3824 0 : mean += compute_mean_8x8(&(input_picture_ptr->buffer_y[(blockIndexInWidth << 3) + (blockIndexInHeight << 3) * input_picture_ptr->stride_y]), input_picture_ptr->stride_y, 8, 8);
3825 : }
3826 : }
3827 : else {
3828 4659 : for (blockIndexInHeight = 0; blockIndexInHeight < input_picture_ptr->height >> 3; ++blockIndexInHeight) {
3829 423373 : for (blockIndexInWidth = 0; blockIndexInWidth < input_picture_ptr->width >> 3; ++blockIndexInWidth)
3830 418834 : mean += compute_sub_mean8x8_sse2_intrin(&(input_picture_ptr->buffer_y[(blockIndexInWidth << 3) + (blockIndexInHeight << 3) * stride_y]), stride_y);
3831 : }
3832 : }
3833 0 : mean = ((mean + ((input_picture_ptr->height* input_picture_ptr->width) >> 7)) / ((input_picture_ptr->height* input_picture_ptr->width) >> 6));
3834 0 : mean = (mean + (1 << (MEAN_PRECISION - 1))) >> MEAN_PRECISION;
3835 0 : picture_control_set_ptr->average_intensity[0] = (uint8_t)mean;
3836 : }
3837 :
3838 : else {
3839 0 : picture_control_set_ptr->average_intensity[0] = (uint8_t)((sumAverageIntensityTotalRegionsLuma + ((input_picture_ptr->width*input_picture_ptr->height) >> 1)) / (input_picture_ptr->width*input_picture_ptr->height));
3840 0 : picture_control_set_ptr->average_intensity[1] = (uint8_t)((sumAverageIntensityTotalRegionsCb + ((input_picture_ptr->width*input_picture_ptr->height) >> 3)) / ((input_picture_ptr->width*input_picture_ptr->height) >> 2));
3841 0 : picture_control_set_ptr->average_intensity[2] = (uint8_t)((sumAverageIntensityTotalRegionsCr + ((input_picture_ptr->width*input_picture_ptr->height) >> 3)) / ((input_picture_ptr->width*input_picture_ptr->height) >> 2));
3842 : }
3843 :
3844 0 : return;
3845 : }
3846 :
3847 : /************************************************
3848 : * Gathering statistics per picture
3849 : ** Calculating the pixel intensity histogram bins per picture needed for SCD
3850 : ** Computing Picture Variance
3851 : ************************************************/
3852 120 : void GatheringPictureStatistics(
3853 : SequenceControlSet *sequence_control_set_ptr,
3854 : PictureParentControlSet *picture_control_set_ptr,
3855 : EbPictureBufferDesc *input_picture_ptr,
3856 : EbPictureBufferDesc *input_padded_picture_ptr,
3857 : EbPictureBufferDesc *sixteenth_decimated_picture_ptr,
3858 : uint32_t sb_total_count)
3859 : {
3860 120 : uint64_t sumAverageIntensityTotalRegionsLuma = 0;
3861 120 : uint64_t sumAverageIntensityTotalRegionsCb = 0;
3862 120 : uint64_t sumAverageIntensityTotalRegionsCr = 0;
3863 :
3864 : // Histogram bins
3865 : // Use 1/16 Luma for Histogram generation
3866 : // 1/16 input ready
3867 120 : SubSampleLumaGeneratePixelIntensityHistogramBins(
3868 : sequence_control_set_ptr,
3869 : picture_control_set_ptr,
3870 : sixteenth_decimated_picture_ptr,
3871 : &sumAverageIntensityTotalRegionsLuma);
3872 :
3873 : // Use 1/4 Chroma for Histogram generation
3874 : // 1/4 input not ready => perform operation on the fly
3875 120 : SubSampleChromaGeneratePixelIntensityHistogramBins(
3876 : sequence_control_set_ptr,
3877 : picture_control_set_ptr,
3878 : input_picture_ptr,
3879 : &sumAverageIntensityTotalRegionsCb,
3880 : &sumAverageIntensityTotalRegionsCr);
3881 : //
3882 : // Calculate the LUMA average intensity
3883 120 : CalculateInputAverageIntensity(
3884 : sequence_control_set_ptr,
3885 : picture_control_set_ptr,
3886 : input_picture_ptr,
3887 : sumAverageIntensityTotalRegionsLuma,
3888 : sumAverageIntensityTotalRegionsCb,
3889 : sumAverageIntensityTotalRegionsCr);
3890 :
3891 120 : ComputePictureSpatialStatistics(
3892 : sequence_control_set_ptr,
3893 : picture_control_set_ptr,
3894 : input_picture_ptr,
3895 : input_padded_picture_ptr,
3896 : sb_total_count);
3897 :
3898 120 : return;
3899 : }
3900 : /************************************************
3901 : * Pad Picture at the right and bottom sides
3902 : ** To match a multiple of min CU size in width and height
3903 : ************************************************/
3904 118 : void PadPictureToMultipleOfMinCuSizeDimensions(
3905 : SequenceControlSet *sequence_control_set_ptr,
3906 : EbPictureBufferDesc *input_picture_ptr)
3907 : {
3908 118 : EbBool is16BitInput = (EbBool)(sequence_control_set_ptr->static_config.encoder_bit_depth > EB_8BIT);
3909 :
3910 118 : uint32_t color_format = input_picture_ptr->color_format;
3911 118 : const uint16_t subsampling_x = (color_format == EB_YUV444 ? 1 : 2) - 1;
3912 118 : const uint16_t subsampling_y = (color_format >= EB_YUV422 ? 1 : 2) - 1;
3913 :
3914 : // Input Picture Padding
3915 118 : pad_input_picture(
3916 118 : &input_picture_ptr->buffer_y[input_picture_ptr->origin_x + (input_picture_ptr->origin_y * input_picture_ptr->stride_y)],
3917 118 : input_picture_ptr->stride_y,
3918 118 : (input_picture_ptr->width - sequence_control_set_ptr->pad_right),
3919 118 : (input_picture_ptr->height - sequence_control_set_ptr->pad_bottom),
3920 : sequence_control_set_ptr->pad_right,
3921 : sequence_control_set_ptr->pad_bottom);
3922 :
3923 120 : pad_input_picture(
3924 120 : &input_picture_ptr->buffer_cb[(input_picture_ptr->origin_x >> subsampling_x) + ((input_picture_ptr->origin_y >> subsampling_y) * input_picture_ptr->stride_cb)],
3925 120 : input_picture_ptr->stride_cb,
3926 120 : (input_picture_ptr->width - sequence_control_set_ptr->pad_right) >> subsampling_x,
3927 120 : (input_picture_ptr->height - sequence_control_set_ptr->pad_bottom) >> subsampling_y,
3928 120 : sequence_control_set_ptr->pad_right >> subsampling_x,
3929 120 : sequence_control_set_ptr->pad_bottom >> subsampling_y);
3930 :
3931 120 : pad_input_picture(
3932 120 : &input_picture_ptr->buffer_cr[(input_picture_ptr->origin_x >> subsampling_x) + ((input_picture_ptr->origin_y >> subsampling_y) * input_picture_ptr->stride_cb)],
3933 120 : input_picture_ptr->stride_cr,
3934 120 : (input_picture_ptr->width - sequence_control_set_ptr->pad_right) >> subsampling_x,
3935 120 : (input_picture_ptr->height - sequence_control_set_ptr->pad_bottom) >> subsampling_y,
3936 120 : sequence_control_set_ptr->pad_right >> subsampling_x,
3937 120 : sequence_control_set_ptr->pad_bottom >> subsampling_y);
3938 :
3939 120 : if (is16BitInput)
3940 : {
3941 0 : pad_input_picture(
3942 0 : &input_picture_ptr->buffer_bit_inc_y[input_picture_ptr->origin_x + (input_picture_ptr->origin_y * input_picture_ptr->stride_bit_inc_y)],
3943 0 : input_picture_ptr->stride_bit_inc_y,
3944 0 : (input_picture_ptr->width - sequence_control_set_ptr->pad_right),
3945 0 : (input_picture_ptr->height - sequence_control_set_ptr->pad_bottom),
3946 : sequence_control_set_ptr->pad_right,
3947 : sequence_control_set_ptr->pad_bottom);
3948 :
3949 0 : pad_input_picture(
3950 0 : &input_picture_ptr->buffer_bit_inc_cb[(input_picture_ptr->origin_x >> subsampling_x) + ((input_picture_ptr->origin_y >> subsampling_y) * input_picture_ptr->stride_bit_inc_cb)],
3951 0 : input_picture_ptr->stride_bit_inc_cb,
3952 0 : (input_picture_ptr->width - sequence_control_set_ptr->pad_right) >> subsampling_x,
3953 0 : (input_picture_ptr->height - sequence_control_set_ptr->pad_bottom) >> subsampling_y,
3954 0 : sequence_control_set_ptr->pad_right >> subsampling_x,
3955 0 : sequence_control_set_ptr->pad_bottom >> subsampling_y);
3956 :
3957 0 : pad_input_picture(
3958 0 : &input_picture_ptr->buffer_bit_inc_cr[(input_picture_ptr->origin_x >> subsampling_x) + ((input_picture_ptr->origin_y >> subsampling_y) * input_picture_ptr->stride_bit_inc_cb)],
3959 0 : input_picture_ptr->stride_bit_inc_cr,
3960 0 : (input_picture_ptr->width - sequence_control_set_ptr->pad_right) >> subsampling_x,
3961 0 : (input_picture_ptr->height - sequence_control_set_ptr->pad_bottom) >> subsampling_y,
3962 0 : sequence_control_set_ptr->pad_right >> subsampling_x,
3963 0 : sequence_control_set_ptr->pad_bottom >> subsampling_y);
3964 : }
3965 :
3966 120 : return;
3967 : }
3968 :
3969 : /************************************************
3970 : * Pad Picture at the right and bottom sides
3971 : ** To complete border SB smaller than SB size
3972 : ************************************************/
3973 117 : void PadPictureToMultipleOfLcuDimensions(
3974 : EbPictureBufferDesc *input_padded_picture_ptr
3975 : )
3976 : {
3977 : // Generate Padding
3978 117 : generate_padding(
3979 : &input_padded_picture_ptr->buffer_y[0],
3980 117 : input_padded_picture_ptr->stride_y,
3981 117 : input_padded_picture_ptr->width,
3982 117 : input_padded_picture_ptr->height,
3983 117 : input_padded_picture_ptr->origin_x,
3984 117 : input_padded_picture_ptr->origin_y);
3985 :
3986 120 : return;
3987 : }
3988 :
3989 : /************************************************
3990 : * 1/4 & 1/16 input picture decimation
3991 : ************************************************/
3992 128 : void DownsampleDecimationInputPicture(
3993 : PictureParentControlSet *picture_control_set_ptr,
3994 : EbPictureBufferDesc *input_padded_picture_ptr,
3995 : EbPictureBufferDesc *quarter_decimated_picture_ptr,
3996 : EbPictureBufferDesc *sixteenth_decimated_picture_ptr) {
3997 : // Decimate input picture for HME L0 and L1
3998 128 : if (picture_control_set_ptr->enable_hme_flag || picture_control_set_ptr->tf_enable_hme_flag) {
3999 128 : if (picture_control_set_ptr->enable_hme_level1_flag || picture_control_set_ptr->tf_enable_hme_level1_flag) {
4000 128 : decimation_2d(
4001 128 : &input_padded_picture_ptr->buffer_y[input_padded_picture_ptr->origin_x + input_padded_picture_ptr->origin_y * input_padded_picture_ptr->stride_y],
4002 128 : input_padded_picture_ptr->stride_y,
4003 128 : input_padded_picture_ptr->width,
4004 128 : input_padded_picture_ptr->height,
4005 128 : &quarter_decimated_picture_ptr->buffer_y[quarter_decimated_picture_ptr->origin_x + quarter_decimated_picture_ptr->origin_x*quarter_decimated_picture_ptr->stride_y],
4006 128 : quarter_decimated_picture_ptr->stride_y,
4007 : 2);
4008 128 : generate_padding(
4009 : &quarter_decimated_picture_ptr->buffer_y[0],
4010 128 : quarter_decimated_picture_ptr->stride_y,
4011 128 : quarter_decimated_picture_ptr->width,
4012 128 : quarter_decimated_picture_ptr->height,
4013 128 : quarter_decimated_picture_ptr->origin_x,
4014 128 : quarter_decimated_picture_ptr->origin_y);
4015 : }
4016 : }
4017 :
4018 : // Always perform 1/16th decimation as
4019 : // Sixteenth Input Picture Decimation
4020 128 : decimation_2d(
4021 128 : &input_padded_picture_ptr->buffer_y[input_padded_picture_ptr->origin_x + input_padded_picture_ptr->origin_y * input_padded_picture_ptr->stride_y],
4022 128 : input_padded_picture_ptr->stride_y,
4023 128 : input_padded_picture_ptr->width,
4024 128 : input_padded_picture_ptr->height,
4025 128 : &sixteenth_decimated_picture_ptr->buffer_y[sixteenth_decimated_picture_ptr->origin_x + sixteenth_decimated_picture_ptr->origin_x*sixteenth_decimated_picture_ptr->stride_y],
4026 128 : sixteenth_decimated_picture_ptr->stride_y,
4027 : 4);
4028 :
4029 128 : generate_padding(
4030 : &sixteenth_decimated_picture_ptr->buffer_y[0],
4031 128 : sixteenth_decimated_picture_ptr->stride_y,
4032 128 : sixteenth_decimated_picture_ptr->width,
4033 128 : sixteenth_decimated_picture_ptr->height,
4034 128 : sixteenth_decimated_picture_ptr->origin_x,
4035 128 : sixteenth_decimated_picture_ptr->origin_y);
4036 :
4037 128 : }
4038 : #if PAL_SUP
4039 0 : int av1_count_colors_highbd(uint16_t *src, int stride, int rows, int cols,
4040 : int bit_depth, int *val_count) {
4041 0 : assert(bit_depth <= 12);
4042 0 : const int max_pix_val = 1 << bit_depth;
4043 : // const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
4044 0 : memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
4045 0 : for (int r = 0; r < rows; ++r) {
4046 0 : for (int c = 0; c < cols; ++c) {
4047 0 : const int this_val = src[r * stride + c];
4048 0 : assert(this_val < max_pix_val);
4049 0 : if (this_val >= max_pix_val) return 0;
4050 0 : ++val_count[this_val];
4051 : }
4052 : }
4053 0 : int n = 0;
4054 0 : for (int i = 0; i < max_pix_val; ++i) {
4055 0 : if (val_count[i]) ++n;
4056 : }
4057 0 : return n;
4058 : }
4059 : #endif
4060 102696 : int eb_av1_count_colors(const uint8_t *src, int stride, int rows, int cols,
4061 : int *val_count) {
4062 102696 : const int max_pix_val = 1 << 8;
4063 102696 : memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
4064 1252300 : for (int r = 0; r < rows; ++r) {
4065 14823600 : for (int c = 0; c < cols; ++c) {
4066 13674000 : const int this_val = src[r * stride + c];
4067 13674000 : assert(this_val < max_pix_val);
4068 13674000 : ++val_count[this_val];
4069 : }
4070 : }
4071 102696 : int n = 0;
4072 16500500 : for (int i = 0; i < max_pix_val; ++i)
4073 16397800 : if (val_count[i]) ++n;
4074 102696 : return n;
4075 : }
4076 : extern aom_variance_fn_ptr_t mefn_ptr[BlockSizeS_ALL];
4077 :
4078 : // This is used as a reference when computing the source variance for the
4079 : // purposes of activity masking.
4080 : // Eventually this should be replaced by custom no-reference routines,
4081 : // which will be faster.
4082 : const uint8_t eb_AV1_VAR_OFFS[MAX_SB_SIZE] = {
4083 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
4084 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
4085 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
4086 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
4087 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
4088 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
4089 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
4090 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
4091 : 128, 128, 128, 128, 128, 128, 128, 128
4092 : };
4093 :
4094 814145 : unsigned int eb_av1_get_sby_perpixel_variance(const aom_variance_fn_ptr_t *fn_ptr, //const AV1_COMP *cpi,
4095 : const uint8_t *src,int stride,//const struct buf_2d *ref,
4096 : BlockSize bs) {
4097 : unsigned int sse;
4098 : const unsigned int var =
4099 : //cpi->fn_ptr[bs].vf(ref->buf, ref->stride, eb_AV1_VAR_OFFS, 0, &sse);
4100 814145 : fn_ptr->vf(src, stride, eb_AV1_VAR_OFFS, 0, &sse);
4101 814222 : return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
4102 : }
4103 :
4104 : // Estimate if the source frame is screen content, based on the portion of
4105 : // blocks that have no more than 4 (experimentally selected) luma colors.
4106 120 : static void is_screen_content(
4107 : PictureParentControlSet *picture_control_set_ptr,
4108 : const uint8_t *src,
4109 : int use_hbd,
4110 : int stride,
4111 : int width,
4112 : int height) {
4113 120 : assert(src != NULL);
4114 120 : const int blk_w = 16;
4115 120 : const int blk_h = 16;
4116 : // These threshold values are selected experimentally.
4117 120 : const int color_thresh = 4;
4118 120 : const unsigned int var_thresh = 0;
4119 : // Counts of blocks with no more than color_thresh colors.
4120 120 : int counts_1 = 0;
4121 : // Counts of blocks with no more than color_thresh colors and variance larger
4122 : // than var_thresh.
4123 120 : int counts_2 = 0;
4124 :
4125 2651 : for (int r = 0; r + blk_h <= height; r += blk_h) {
4126 104706 : for (int c = 0; c + blk_w <= width; c += blk_w) {
4127 : int count_buf[1 << 12]; // Maximum (1 << 12) color levels.
4128 102061 : const int n_colors =
4129 : use_hbd ? 0 /*av1_count_colors_highbd(src + r * stride + c, stride, blk_w,
4130 : blk_h, bd, count_buf)*/
4131 102175 : : eb_av1_count_colors(src + r * stride + c, stride, blk_w, blk_h,
4132 : count_buf);
4133 102061 : if (n_colors > 1 && n_colors <= color_thresh) {
4134 2787 : ++counts_1;
4135 : //struct buf_2d buf;
4136 : //buf.stride = stride;
4137 : //buf.buf = (uint8_t *)src;
4138 2787 : const aom_variance_fn_ptr_t *fn_ptr = &mefn_ptr[BLOCK_16X16];
4139 :
4140 2787 : const unsigned int var = eb_av1_get_sby_perpixel_variance(fn_ptr, src + r * stride + c,stride, BLOCK_16X16);
4141 : /* use_hbd
4142 : ? av1_high_get_sby_perpixel_variance(cpi, &buf, BLOCK_16X16, bd)
4143 : : */
4144 2792 : if (var > var_thresh) ++counts_2;
4145 : }
4146 : }
4147 : }
4148 :
4149 11 : picture_control_set_ptr->sc_content_detected =
4150 11 : (counts_1 * blk_h * blk_w * 10 > width * height) &&
4151 0 : ( counts_2 * blk_h * blk_w * 15 > width * height) ;
4152 11 : }
4153 :
4154 :
4155 : /************************************************
4156 : * 1/4 & 1/16 input picture downsampling (filtering)
4157 : ************************************************/
4158 64 : void DownsampleFilteringInputPicture(
4159 : PictureParentControlSet *picture_control_set_ptr,
4160 : EbPictureBufferDesc *input_padded_picture_ptr,
4161 : EbPictureBufferDesc *quarter_picture_ptr,
4162 : EbPictureBufferDesc *sixteenth_picture_ptr) {
4163 :
4164 : // Downsample input picture for HME L0 and L1
4165 64 : if (picture_control_set_ptr->enable_hme_flag || picture_control_set_ptr->tf_enable_hme_flag) {
4166 64 : if (picture_control_set_ptr->enable_hme_level1_flag || picture_control_set_ptr->tf_enable_hme_level1_flag) {
4167 :
4168 64 : downsample_2d(
4169 64 : &input_padded_picture_ptr->buffer_y[input_padded_picture_ptr->origin_x + input_padded_picture_ptr->origin_y * input_padded_picture_ptr->stride_y],
4170 64 : input_padded_picture_ptr->stride_y,
4171 64 : input_padded_picture_ptr->width,
4172 64 : input_padded_picture_ptr->height,
4173 64 : &quarter_picture_ptr->buffer_y[quarter_picture_ptr->origin_x + quarter_picture_ptr->origin_x * quarter_picture_ptr->stride_y],
4174 64 : quarter_picture_ptr->stride_y,
4175 : 2);
4176 64 : generate_padding(
4177 : &quarter_picture_ptr->buffer_y[0],
4178 64 : quarter_picture_ptr->stride_y,
4179 64 : quarter_picture_ptr->width,
4180 64 : quarter_picture_ptr->height,
4181 64 : quarter_picture_ptr->origin_x,
4182 64 : quarter_picture_ptr->origin_y);
4183 :
4184 : }
4185 :
4186 64 : if (picture_control_set_ptr->enable_hme_level0_flag || picture_control_set_ptr->tf_enable_hme_level0_flag) {
4187 : // Sixteenth Input Picture Downsampling
4188 64 : if (picture_control_set_ptr->enable_hme_level1_flag || picture_control_set_ptr->tf_enable_hme_level1_flag)
4189 64 : downsample_2d(
4190 64 : &quarter_picture_ptr->buffer_y[quarter_picture_ptr->origin_x + quarter_picture_ptr->origin_y * quarter_picture_ptr->stride_y],
4191 64 : quarter_picture_ptr->stride_y,
4192 64 : quarter_picture_ptr->width,
4193 64 : quarter_picture_ptr->height,
4194 64 : &sixteenth_picture_ptr->buffer_y[sixteenth_picture_ptr->origin_x + sixteenth_picture_ptr->origin_x*sixteenth_picture_ptr->stride_y],
4195 64 : sixteenth_picture_ptr->stride_y,
4196 : 2);
4197 : else
4198 0 : downsample_2d(
4199 0 : &input_padded_picture_ptr->buffer_y[input_padded_picture_ptr->origin_x + input_padded_picture_ptr->origin_y * input_padded_picture_ptr->stride_y],
4200 0 : input_padded_picture_ptr->stride_y,
4201 0 : input_padded_picture_ptr->width,
4202 0 : input_padded_picture_ptr->height,
4203 0 : &sixteenth_picture_ptr->buffer_y[sixteenth_picture_ptr->origin_x + sixteenth_picture_ptr->origin_x*sixteenth_picture_ptr->stride_y],
4204 0 : sixteenth_picture_ptr->stride_y,
4205 : 4);
4206 :
4207 64 : generate_padding(
4208 : &sixteenth_picture_ptr->buffer_y[0],
4209 64 : sixteenth_picture_ptr->stride_y,
4210 64 : sixteenth_picture_ptr->width,
4211 64 : sixteenth_picture_ptr->height,
4212 64 : sixteenth_picture_ptr->origin_x,
4213 64 : sixteenth_picture_ptr->origin_y);
4214 :
4215 : }
4216 : }
4217 64 : }
4218 :
4219 : /************************************************
4220 : * Picture Analysis Kernel
4221 : * The Picture Analysis Process pads & decimates the input pictures.
4222 : * The Picture Analysis also includes creating an n-bin Histogram,
4223 : * gathering picture 1st and 2nd moment statistics for each 8x8 block,
4224 : * which are used to compute variance.
4225 : * The Picture Analysis process is multithreaded, so pictures can be
4226 : * processed out of order as long as all inputs are available.
4227 : ************************************************/
4228 8 : void* picture_analysis_kernel(void *input_ptr)
4229 : {
4230 8 : PictureAnalysisContext *context_ptr = (PictureAnalysisContext*)input_ptr;
4231 : PictureParentControlSet *picture_control_set_ptr;
4232 : SequenceControlSet *sequence_control_set_ptr;
4233 :
4234 : EbObjectWrapper *inputResultsWrapperPtr;
4235 : ResourceCoordinationResults *inputResultsPtr;
4236 : EbObjectWrapper *outputResultsWrapperPtr;
4237 : PictureAnalysisResults *outputResultsPtr;
4238 : EbPaReferenceObject *paReferenceObject;
4239 :
4240 : EbPictureBufferDesc *input_padded_picture_ptr;
4241 : EbPictureBufferDesc *input_picture_ptr;
4242 :
4243 : // Variance
4244 : uint32_t picture_width_in_sb;
4245 : uint32_t pictureHeighInLcu;
4246 : uint32_t sb_total_count;
4247 :
4248 : for (;;) {
4249 : // Get Input Full Object
4250 128 : eb_get_full_object(
4251 : context_ptr->resource_coordination_results_input_fifo_ptr,
4252 : &inputResultsWrapperPtr);
4253 :
4254 118 : inputResultsPtr = (ResourceCoordinationResults*)inputResultsWrapperPtr->object_ptr;
4255 118 : picture_control_set_ptr = (PictureParentControlSet*)inputResultsPtr->picture_control_set_wrapper_ptr->object_ptr;
4256 :
4257 : // There is no need to do processing for overlay picture. Overlay and AltRef share the same results.
4258 118 : if (!picture_control_set_ptr->is_overlay)
4259 : {
4260 120 : sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
4261 120 : input_picture_ptr = picture_control_set_ptr->enhanced_picture_ptr;
4262 :
4263 120 : paReferenceObject = (EbPaReferenceObject*)picture_control_set_ptr->pa_reference_picture_wrapper_ptr->object_ptr;
4264 120 : input_padded_picture_ptr = (EbPictureBufferDesc*)paReferenceObject->input_padded_picture_ptr;
4265 : // Variance
4266 120 : picture_width_in_sb = (sequence_control_set_ptr->seq_header.max_frame_width + sequence_control_set_ptr->sb_sz - 1) / sequence_control_set_ptr->sb_sz;
4267 120 : pictureHeighInLcu = (sequence_control_set_ptr->seq_header.max_frame_height + sequence_control_set_ptr->sb_sz - 1) / sequence_control_set_ptr->sb_sz;
4268 120 : sb_total_count = picture_width_in_sb * pictureHeighInLcu;
4269 :
4270 : // Set picture parameters to account for subpicture, picture scantype, and set regions by resolutions
4271 120 : SetPictureParametersForStatisticsGathering(
4272 : sequence_control_set_ptr);
4273 :
4274 : // Pad pictures to multiple min cu size
4275 119 : PadPictureToMultipleOfMinCuSizeDimensions(
4276 : sequence_control_set_ptr,
4277 : input_picture_ptr);
4278 :
4279 : // Pre processing operations performed on the input picture
4280 119 : PicturePreProcessingOperations(
4281 : picture_control_set_ptr,
4282 : sequence_control_set_ptr,
4283 : sb_total_count);
4284 118 : if (input_picture_ptr->color_format >= EB_YUV422) {
4285 : // Jing: Do the conversion of 422/444=>420 here since it's multi-threaded kernel
4286 : // Reuse the Y, only add cb/cr in the newly created buffer desc
4287 : // NOTE: since denoise may change the src, so this part is after PicturePreProcessingOperations()
4288 0 : picture_control_set_ptr->chroma_downsampled_picture_ptr->buffer_y = input_picture_ptr->buffer_y;
4289 0 : DownSampleChroma(input_picture_ptr, picture_control_set_ptr->chroma_downsampled_picture_ptr);
4290 : }
4291 : else
4292 118 : picture_control_set_ptr->chroma_downsampled_picture_ptr = input_picture_ptr;
4293 : // Pad input picture to complete border LCUs
4294 118 : PadPictureToMultipleOfLcuDimensions(
4295 : input_padded_picture_ptr);
4296 : // 1/4 & 1/16 input picture decimation
4297 120 : DownsampleDecimationInputPicture(
4298 : picture_control_set_ptr,
4299 : input_padded_picture_ptr,
4300 : (EbPictureBufferDesc*)paReferenceObject->quarter_decimated_picture_ptr,
4301 : (EbPictureBufferDesc*)paReferenceObject->sixteenth_decimated_picture_ptr);
4302 :
4303 : // 1/4 & 1/16 input picture downsampling through filtering
4304 120 : if (sequence_control_set_ptr->down_sampling_method_me_search == ME_FILTERED_DOWNSAMPLED) {
4305 60 : DownsampleFilteringInputPicture(
4306 : picture_control_set_ptr,
4307 : input_padded_picture_ptr,
4308 : (EbPictureBufferDesc*)paReferenceObject->quarter_filtered_picture_ptr,
4309 : (EbPictureBufferDesc*)paReferenceObject->sixteenth_filtered_picture_ptr);
4310 : }
4311 : // Gathering statistics of input picture, including Variance Calculation, Histogram Bins
4312 120 : GatheringPictureStatistics(
4313 : sequence_control_set_ptr,
4314 : picture_control_set_ptr,
4315 : picture_control_set_ptr->chroma_downsampled_picture_ptr, //420 input_picture_ptr
4316 : input_padded_picture_ptr,
4317 : (EbPictureBufferDesc*)paReferenceObject->sixteenth_decimated_picture_ptr, // Hsan: always use decimated until studying the trade offs
4318 : sb_total_count);
4319 :
4320 120 : if (sequence_control_set_ptr->static_config.screen_content_mode == 2){ // auto detect
4321 120 : is_screen_content(
4322 : picture_control_set_ptr,
4323 120 : input_picture_ptr->buffer_y + input_picture_ptr->origin_x + input_picture_ptr->origin_y*input_picture_ptr->stride_y,
4324 : 0,
4325 120 : input_picture_ptr->stride_y,
4326 120 : sequence_control_set_ptr->seq_header.max_frame_width, sequence_control_set_ptr->seq_header.max_frame_height);
4327 : }
4328 : else // off / on
4329 0 : picture_control_set_ptr->sc_content_detected = sequence_control_set_ptr->static_config.screen_content_mode;
4330 :
4331 : // Hold the 64x64 variance and mean in the reference frame
4332 : uint32_t sb_index;
4333 7308 : for (sb_index = 0; sb_index < picture_control_set_ptr->sb_total_count; ++sb_index) {
4334 7188 : paReferenceObject->variance[sb_index] = picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_64x64];
4335 7188 : paReferenceObject->y_mean[sb_index] = picture_control_set_ptr->y_mean[sb_index][ME_TIER_ZERO_PU_64x64];
4336 : }
4337 : }
4338 : // Get Empty Results Object
4339 118 : eb_get_empty_object(
4340 : context_ptr->picture_analysis_results_output_fifo_ptr,
4341 : &outputResultsWrapperPtr);
4342 :
4343 120 : outputResultsPtr = (PictureAnalysisResults*)outputResultsWrapperPtr->object_ptr;
4344 120 : outputResultsPtr->picture_control_set_wrapper_ptr = inputResultsPtr->picture_control_set_wrapper_ptr;
4345 :
4346 : // Release the Input Results
4347 120 : eb_release_object(inputResultsWrapperPtr);
4348 :
4349 : // Post the Full Results Object
4350 120 : eb_post_full_object(outputResultsWrapperPtr);
4351 : }
4352 : return EB_NULL;
4353 : }
|