Line data Source code
1 : /*
2 : * Copyright(c) 2019 Intel Corporation
3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 : */
5 :
6 : #include "EbModeDecisionConfiguration.h"
7 : #include "EbRateDistortionCost.h"
8 : #include "EbUtility.h"
9 : #include "EbModeDecisionProcess.h"
10 : #include "EbDefinitions.h"
11 : #if ADD_MDC_FULL_COST
12 : #include "aom_dsp_rtcd.h"
13 : #include "EbAdaptiveMotionVectorPrediction.h"
14 : #endif
15 : /********************************************
16 : * Constants
17 : ********************************************/
18 : int pa_to_ep_block_index[85] = {
19 : 0 ,
20 : 25 ,
21 : 50 ,
22 : 75 , 84 , 93 , 102 ,
23 : 111 ,
24 : 136 , 145 , 154 , 163 ,
25 : 172 ,
26 : 197 , 206 , 215 , 224 ,
27 : 233 ,
28 : 258 , 267 , 276 , 285 ,
29 : 294 ,
30 : 319 ,
31 : 344 , 353 , 362 , 371 ,
32 : 380 ,
33 : 405 , 414 , 423 , 432 ,
34 : 441 ,
35 : 466 , 475 , 484 , 493 ,
36 : 502 ,
37 : 527 , 536 , 545 , 554 ,
38 : 563 ,
39 : 588 ,
40 : 613 , 622 , 631 , 640 ,
41 : 649 ,
42 : 674 , 683 , 692 , 701 ,
43 : 710 ,
44 : 735 , 744 , 753 , 762 ,
45 : 771 ,
46 : 796 , 805 , 814 , 823 ,
47 : 832 ,
48 : 857 ,
49 : 882 , 891 , 900 , 909 ,
50 : 918 ,
51 : 943 , 952 , 961 , 970 ,
52 : 979 ,
53 : 1004 , 1013 , 1022 , 1031 ,
54 : 1040 ,
55 : 1065 , 1074 , 1083 , 1092
56 : };
57 : #if ADD_MDC_FULL_COST
58 : static PartitionType from_shape_to_part[] = {
59 : PARTITION_NONE,
60 : PARTITION_HORZ,
61 : PARTITION_VERT,
62 : PARTITION_HORZ_A,
63 : PARTITION_HORZ_B,
64 : PARTITION_VERT_A,
65 : PARTITION_VERT_B,
66 : PARTITION_HORZ_4,
67 : PARTITION_VERT_4,
68 : PARTITION_SPLIT
69 : };
70 : #endif
71 : #define ADD_CU_STOP_SPLIT 0 // Take into account & Stop Splitting
72 : #define ADD_CU_CONTINUE_SPLIT 1 // Take into account & Continue Splitting
73 : #define DO_NOT_ADD_CU_CONTINUE_SPLIT 2 // Do not take into account & Continue Splitting
74 :
75 : #define DEPTH_64 0 // Depth corresponding to the CU size
76 : #define DEPTH_32 1 // Depth corresponding to the CU size
77 : #define DEPTH_16 2 // Depth corresponding to the CU size
78 : #define DEPTH_8 3 // Depth corresponding to the CU size
79 :
80 : static const uint8_t parentCuIndex[85] =
81 : {
82 : 0,
83 : 0, 0, 0, 1, 2, 3, 5, 0, 1, 2, 3, 10, 0, 1, 2, 3, 15, 0, 1, 2, 3,
84 : 21, 0, 0, 1, 2, 3, 5, 0, 1, 2, 3, 10, 0, 1, 2, 3, 15, 0, 1, 2, 3,
85 : 42, 0, 0, 1, 2, 3, 5, 0, 1, 2, 3, 10, 0, 1, 2, 3, 15, 0, 1, 2, 3,
86 : 36, 0, 0, 1, 2, 3, 5, 0, 1, 2, 3, 10, 0, 1, 2, 3, 15, 0, 1, 2, 3,
87 : };
88 :
89 : const uint8_t incrementalCount[85] = {
90 : //64x64
91 : 0,
92 : //32x32
93 : 4, 4,
94 : 4, 4,
95 : //16x16
96 : 0, 0, 0, 0,
97 : 0, 4, 0, 4,
98 : 0, 0, 0, 0,
99 : 0, 4, 0, 4,
100 : //8x8
101 : 0, 0, 0, 0, 0, 0, 0, 0,
102 : 0, 0, 0, 0, 0, 0, 0, 0,
103 : 0, 0, 0, 0, 0, 0, 0, 0,
104 : 0, 0, 0, 4, 0, 0, 0, 4,
105 : 0, 0, 0, 0, 0, 0, 0, 0,
106 : 0, 0, 0, 0, 0, 0, 0, 0,
107 : 0, 0, 0, 0, 0, 0, 0, 0,
108 : 0, 0, 0, 4, 0, 0, 0, 4
109 : };
110 :
111 : #if PREDICT_NSQ_SHAPE
112 : extern uint32_t get_me_info_index(
113 : uint32_t max_me_block,
114 : const BlockGeom *blk_geom,
115 : uint32_t geom_offset_x,
116 : uint32_t geom_offset_y);
117 : #endif
118 :
119 : /*******************************************
120 : mdcSetDepth : set depth to be tested
121 : *******************************************/
122 : #define REFINEMENT_P 0x01
123 : #define REFINEMENT_Pp1 0x02
124 : #define REFINEMENT_Pp2 0x04
125 : #define REFINEMENT_Pp3 0x08
126 : #define REFINEMENT_Pm1 0x10
127 : #define REFINEMENT_Pm2 0x20
128 : #define REFINEMENT_Pm3 0x40
129 :
130 8772 : EbErrorType MdcRefinement(
131 : MdcpLocalCodingUnit *local_cu_array,
132 : uint32_t cu_index,
133 : uint32_t depth,
134 : uint8_t refinementLevel,
135 : uint8_t lowestLevel)
136 : {
137 8772 : EbErrorType return_error = EB_ErrorNone;
138 :
139 8772 : if (refinementLevel & REFINEMENT_P) {
140 8772 : if (lowestLevel == REFINEMENT_P)
141 6126 : local_cu_array[cu_index].stop_split = EB_TRUE;
142 : }
143 : else
144 0 : local_cu_array[cu_index].selected_cu = EB_FALSE;
145 8772 : if (refinementLevel & REFINEMENT_Pp1) {
146 2646 : if (depth < 3 && cu_index < 81) {
147 2646 : local_cu_array[cu_index + 1].selected_cu = EB_TRUE;
148 2646 : local_cu_array[cu_index + 1 + depth_offset[depth + 1]].selected_cu = EB_TRUE;
149 2646 : local_cu_array[cu_index + 1 + 2 * depth_offset[depth + 1]].selected_cu = EB_TRUE;
150 2646 : local_cu_array[cu_index + 1 + 3 * depth_offset[depth + 1]].selected_cu = EB_TRUE;
151 : }
152 2646 : if (lowestLevel == REFINEMENT_Pp1) {
153 2131 : if (depth < 3 && cu_index < 81) {
154 2131 : local_cu_array[cu_index + 1].stop_split = EB_TRUE;
155 2131 : local_cu_array[cu_index + 1 + depth_offset[depth + 1]].stop_split = EB_TRUE;
156 2131 : local_cu_array[cu_index + 1 + 2 * depth_offset[depth + 1]].stop_split = EB_TRUE;
157 2131 : local_cu_array[cu_index + 1 + 3 * depth_offset[depth + 1]].stop_split = EB_TRUE;
158 : }
159 : }
160 : }
161 :
162 8772 : if (refinementLevel & REFINEMENT_Pp2) {
163 515 : if (depth < 2 && cu_index < 65) {
164 515 : local_cu_array[cu_index + 1 + 1].selected_cu = EB_TRUE;
165 515 : local_cu_array[cu_index + 1 + 1 + depth_offset[depth + 2]].selected_cu = EB_TRUE;
166 515 : local_cu_array[cu_index + 1 + 1 + 2 * depth_offset[depth + 2]].selected_cu = EB_TRUE;
167 515 : local_cu_array[cu_index + 1 + 1 + 3 * depth_offset[depth + 2]].selected_cu = EB_TRUE;
168 :
169 515 : local_cu_array[cu_index + 1 + depth_offset[depth + 1] + 1].selected_cu = EB_TRUE;
170 515 : local_cu_array[cu_index + 1 + depth_offset[depth + 1] + 1 + depth_offset[depth + 2]].selected_cu = EB_TRUE;
171 515 : local_cu_array[cu_index + 1 + depth_offset[depth + 1] + 1 + 2 * depth_offset[depth + 2]].selected_cu = EB_TRUE;
172 515 : local_cu_array[cu_index + 1 + depth_offset[depth + 1] + 1 + 3 * depth_offset[depth + 2]].selected_cu = EB_TRUE;
173 :
174 515 : local_cu_array[cu_index + 1 + 2 * depth_offset[depth + 1] + 1].selected_cu = EB_TRUE;
175 515 : local_cu_array[cu_index + 1 + 2 * depth_offset[depth + 1] + 1 + depth_offset[depth + 2]].selected_cu = EB_TRUE;
176 515 : local_cu_array[cu_index + 1 + 2 * depth_offset[depth + 1] + 1 + 2 * depth_offset[depth + 2]].selected_cu = EB_TRUE;
177 515 : local_cu_array[cu_index + 1 + 2 * depth_offset[depth + 1] + 1 + 3 * depth_offset[depth + 2]].selected_cu = EB_TRUE;
178 :
179 515 : local_cu_array[cu_index + 1 + 3 * depth_offset[depth + 1] + 1].selected_cu = EB_TRUE;
180 515 : local_cu_array[cu_index + 1 + 3 * depth_offset[depth + 1] + 1 + depth_offset[depth + 2]].selected_cu = EB_TRUE;
181 515 : local_cu_array[cu_index + 1 + 3 * depth_offset[depth + 1] + 1 + 2 * depth_offset[depth + 2]].selected_cu = EB_TRUE;
182 515 : local_cu_array[cu_index + 1 + 3 * depth_offset[depth + 1] + 1 + 3 * depth_offset[depth + 2]].selected_cu = EB_TRUE;
183 : }
184 515 : if (lowestLevel == REFINEMENT_Pp2) {
185 515 : if (depth < 2 && cu_index < 65) {
186 515 : local_cu_array[cu_index + 1 + 1].stop_split = EB_TRUE;
187 515 : local_cu_array[cu_index + 1 + 1 + depth_offset[depth + 2]].stop_split = EB_TRUE;
188 515 : local_cu_array[cu_index + 1 + 1 + 2 * depth_offset[depth + 2]].stop_split = EB_TRUE;
189 515 : local_cu_array[cu_index + 1 + 1 + 3 * depth_offset[depth + 2]].stop_split = EB_TRUE;
190 :
191 515 : local_cu_array[cu_index + 1 + depth_offset[depth + 1] + 1].stop_split = EB_TRUE;
192 515 : local_cu_array[cu_index + 1 + depth_offset[depth + 1] + 1 + depth_offset[depth + 2]].stop_split = EB_TRUE;
193 515 : local_cu_array[cu_index + 1 + depth_offset[depth + 1] + 1 + 2 * depth_offset[depth + 2]].stop_split = EB_TRUE;
194 515 : local_cu_array[cu_index + 1 + depth_offset[depth + 1] + 1 + 3 * depth_offset[depth + 2]].stop_split = EB_TRUE;
195 :
196 515 : local_cu_array[cu_index + 1 + 2 * depth_offset[depth + 1] + 1].stop_split = EB_TRUE;
197 515 : local_cu_array[cu_index + 1 + 2 * depth_offset[depth + 1] + 1 + depth_offset[depth + 2]].stop_split = EB_TRUE;
198 515 : local_cu_array[cu_index + 1 + 2 * depth_offset[depth + 1] + 1 + 2 * depth_offset[depth + 2]].stop_split = EB_TRUE;
199 515 : local_cu_array[cu_index + 1 + 2 * depth_offset[depth + 1] + 1 + 3 * depth_offset[depth + 2]].stop_split = EB_TRUE;
200 :
201 515 : local_cu_array[cu_index + 1 + 3 * depth_offset[depth + 1] + 1].stop_split = EB_TRUE;
202 515 : local_cu_array[cu_index + 1 + 3 * depth_offset[depth + 1] + 1 + depth_offset[depth + 2]].stop_split = EB_TRUE;
203 515 : local_cu_array[cu_index + 1 + 3 * depth_offset[depth + 1] + 1 + 2 * depth_offset[depth + 2]].stop_split = EB_TRUE;
204 515 : local_cu_array[cu_index + 1 + 3 * depth_offset[depth + 1] + 1 + 3 * depth_offset[depth + 2]].stop_split = EB_TRUE;
205 : }
206 : }
207 : }
208 :
209 8772 : if (refinementLevel & REFINEMENT_Pp3) {
210 : uint8_t inLoop;
211 : uint8_t outLoop;
212 0 : uint8_t cu_index = 2;
213 0 : if (depth == 0) {
214 0 : for (outLoop = 0; outLoop < 16; ++outLoop) {
215 0 : for (inLoop = 0; inLoop < 4; ++inLoop)
216 0 : local_cu_array[++cu_index].selected_cu = EB_TRUE;
217 0 : cu_index += cu_index == 21 ? 2 : cu_index == 42 ? 2 : cu_index == 63 ? 2 : 1;
218 : }
219 0 : if (lowestLevel == REFINEMENT_Pp3) {
220 0 : cu_index = 2;
221 0 : for (outLoop = 0; outLoop < 16; ++outLoop) {
222 0 : for (inLoop = 0; inLoop < 4; ++inLoop)
223 0 : local_cu_array[++cu_index].stop_split = EB_TRUE;
224 0 : cu_index += cu_index == 21 ? 2 : cu_index == 42 ? 2 : cu_index == 63 ? 2 : 1;
225 : }
226 : }
227 : }
228 : }
229 :
230 8772 : if (refinementLevel & REFINEMENT_Pm1) {
231 2432 : if (depth > 0)
232 2432 : local_cu_array[cu_index - 1 - parentCuIndex[cu_index]].selected_cu = EB_TRUE;
233 2432 : if (lowestLevel == REFINEMENT_Pm1) {
234 0 : if (depth > 0)
235 0 : local_cu_array[cu_index - 1 - parentCuIndex[cu_index]].stop_split = EB_TRUE;
236 : }
237 : }
238 :
239 8772 : if (refinementLevel & REFINEMENT_Pm2) {
240 0 : if (depth == 2)
241 0 : local_cu_array[0].selected_cu = EB_TRUE;
242 0 : if (depth == 3) {
243 0 : local_cu_array[1].selected_cu = EB_TRUE;
244 0 : local_cu_array[22].selected_cu = EB_TRUE;
245 0 : local_cu_array[43].selected_cu = EB_TRUE;
246 0 : local_cu_array[64].selected_cu = EB_TRUE;
247 : }
248 0 : if (lowestLevel == REFINEMENT_Pm2) {
249 0 : if (depth == 2)
250 0 : local_cu_array[0].stop_split = EB_TRUE;
251 0 : if (depth == 3) {
252 0 : local_cu_array[1].stop_split = EB_TRUE;
253 0 : local_cu_array[22].stop_split = EB_TRUE;
254 0 : local_cu_array[43].stop_split = EB_TRUE;
255 0 : local_cu_array[64].stop_split = EB_TRUE;
256 : }
257 : }
258 : }
259 :
260 8772 : if (refinementLevel & REFINEMENT_Pm3) {
261 0 : if (depth == 3)
262 0 : local_cu_array[0].selected_cu = EB_TRUE;
263 0 : if (lowestLevel == REFINEMENT_Pm2) {
264 0 : if (depth == 3)
265 0 : local_cu_array[0].stop_split = EB_TRUE;
266 : }
267 : }
268 :
269 8772 : return return_error;
270 : }
271 :
272 2625 : void RefinementPredictionLoop(
273 : SequenceControlSet *sequence_control_set_ptr,
274 : PictureControlSet *picture_control_set_ptr,
275 : uint32_t sb_index,
276 : ModeDecisionConfigurationContext *context_ptr)
277 : {
278 2625 : MdcpLocalCodingUnit *local_cu_array = context_ptr->local_cu_array;
279 2625 : SbParams *sb_params = &sequence_control_set_ptr->sb_params_array[sb_index];
280 2625 : uint32_t cu_index = 0;
281 31462 : while (cu_index < CU_MAX_COUNT)
282 : {
283 28837 : if (sb_params->raster_scan_cu_validity[md_scan_to_raster_scan[cu_index]] && (local_cu_array[cu_index].early_split_flag == EB_FALSE))
284 8772 : {
285 8772 : local_cu_array[cu_index].selected_cu = EB_TRUE;
286 8772 : uint32_t depth = get_coded_unit_stats(cu_index)->depth;
287 : uint8_t refinementLevel;
288 : {
289 8772 : if (picture_control_set_ptr->parent_pcs_ptr->pic_depth_mode == PIC_SB_SWITCH_DEPTH_MODE && picture_control_set_ptr->parent_pcs_ptr->sb_depth_mode_array[sb_index] == SB_PRED_OPEN_LOOP_DEPTH_MODE)
290 3694 : refinementLevel = Pred;
291 : else
292 :
293 5078 : if (picture_control_set_ptr->parent_pcs_ptr->pic_depth_mode == PIC_SB_SWITCH_DEPTH_MODE && picture_control_set_ptr->parent_pcs_ptr->sb_depth_mode_array[sb_index] == SB_FAST_OPEN_LOOP_DEPTH_MODE)
294 2323 : refinementLevel = ndp_level_1[depth];
295 : else { // SB_OPEN_LOOP_DEPTH_MODE
296 2755 : refinementLevel = ndp_level_0[depth];
297 : }
298 :
299 8772 : if (picture_control_set_ptr->parent_pcs_ptr->cu8x8_mode == CU_8x8_MODE_1) {
300 0 : refinementLevel = ((refinementLevel & REFINEMENT_Pp1) && depth == 2) ? refinementLevel - REFINEMENT_Pp1 :
301 0 : ((refinementLevel & REFINEMENT_Pp2) && depth == 1) ? refinementLevel - REFINEMENT_Pp2 :
302 0 : ((refinementLevel & REFINEMENT_Pp3) && depth == 0) ? refinementLevel - REFINEMENT_Pp3 : refinementLevel;
303 : }
304 :
305 8772 : uint8_t lowestLevel = 0x00;
306 :
307 14898 : lowestLevel = (refinementLevel & REFINEMENT_Pp3) ? REFINEMENT_Pp3 : (refinementLevel & REFINEMENT_Pp2) ? REFINEMENT_Pp2 : (refinementLevel & REFINEMENT_Pp1) ? REFINEMENT_Pp1 :
308 6126 : (refinementLevel & REFINEMENT_P) ? REFINEMENT_P :
309 0 : (refinementLevel & REFINEMENT_Pm1) ? REFINEMENT_Pm1 : (refinementLevel & REFINEMENT_Pm2) ? REFINEMENT_Pm2 : (refinementLevel & REFINEMENT_Pm3) ? REFINEMENT_Pm3 : 0x00;
310 :
311 8772 : MdcRefinement(
312 8772 : &(*context_ptr->local_cu_array),
313 : cu_index,
314 : depth,
315 : refinementLevel,
316 : lowestLevel);
317 : }
318 :
319 8772 : cu_index += depth_offset[depth];
320 : }
321 : else
322 20065 : cu_index++;
323 : } // End while 1 CU Loop
324 2625 : }
325 :
326 2625 : void ForwardCuToModeDecision(
327 : SequenceControlSet *sequence_control_set_ptr,
328 : PictureControlSet *picture_control_set_ptr,
329 : uint32_t sb_index,
330 : ModeDecisionConfigurationContext *context_ptr
331 : )
332 : {
333 2625 : uint8_t cu_index = 0;
334 2625 : uint32_t cuClass = DO_NOT_ADD_CU_CONTINUE_SPLIT;
335 2625 : EbBool split_flag = EB_TRUE;
336 2625 : MdcLcuData *resultsPtr = &picture_control_set_ptr->mdc_sb_array[sb_index];
337 2625 : SbParams *sb_params = &sequence_control_set_ptr->sb_params_array[sb_index];
338 2625 : MdcpLocalCodingUnit *local_cu_array = context_ptr->local_cu_array;
339 2625 : EB_SLICE slice_type = picture_control_set_ptr->slice_type;
340 :
341 : // CU Loop
342 2625 : const CodedUnitStats *cuStatsPtr = get_coded_unit_stats(0);
343 :
344 2625 : resultsPtr->leaf_count = 0;
345 2625 : uint8_t enable_blk_4x4 = 0;
346 2625 : cu_index = 0;
347 :
348 50234 : while (cu_index < CU_MAX_COUNT)
349 : {
350 47609 : split_flag = EB_TRUE;
351 47609 : if (sb_params->raster_scan_cu_validity[md_scan_to_raster_scan[cu_index]])
352 : {
353 27917 : cuStatsPtr = get_coded_unit_stats(cu_index);
354 :
355 27917 : switch (cuStatsPtr->depth) {
356 21806 : case 0:
357 : case 1:
358 : case 2:
359 :
360 21806 : cuClass = DO_NOT_ADD_CU_CONTINUE_SPLIT;
361 :
362 21806 : if (slice_type == I_SLICE) {
363 0 : cuClass = local_cu_array[cu_index].selected_cu == EB_TRUE ? ADD_CU_CONTINUE_SPLIT : cuClass;
364 0 : cuClass = local_cu_array[cu_index].stop_split == EB_TRUE ? ADD_CU_STOP_SPLIT : cuClass;
365 : }
366 : else {
367 21806 : cuClass = local_cu_array[cu_index].selected_cu == EB_TRUE ? ADD_CU_CONTINUE_SPLIT : cuClass;
368 21806 : cuClass = local_cu_array[cu_index].stop_split == EB_TRUE ? ADD_CU_STOP_SPLIT : cuClass;
369 : }
370 :
371 : // Take into account MAX CU size & MAX intra size (from the API)
372 21806 : cuClass = (cuStatsPtr->size > sequence_control_set_ptr->max_cu_size || (slice_type == I_SLICE && cuStatsPtr->size > sequence_control_set_ptr->max_intra_size)) ?
373 43612 : DO_NOT_ADD_CU_CONTINUE_SPLIT :
374 : cuClass;
375 :
376 : // Take into account MIN CU size & Min intra size(from the API)
377 43614 : cuClass = (cuStatsPtr->size == sequence_control_set_ptr->min_cu_size || (slice_type == I_SLICE && cuStatsPtr->size == sequence_control_set_ptr->min_intra_size)) ?
378 43614 : ADD_CU_STOP_SPLIT :
379 : cuClass;
380 :
381 : switch (cuClass) {
382 16750 : case ADD_CU_STOP_SPLIT:
383 : // Stop
384 16750 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].leaf_index = cu_index;
385 16750 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].mds_idx = pa_to_ep_block_index[cu_index];
386 16750 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].tot_d1_blocks = 1;
387 16750 : resultsPtr->leaf_data_array[resultsPtr->leaf_count++].split_flag = split_flag = EB_FALSE;
388 :
389 16750 : break;
390 :
391 4788 : case ADD_CU_CONTINUE_SPLIT:
392 : // Go Down + consider the current CU as candidate
393 4788 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].leaf_index = cu_index;
394 4788 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].mds_idx = pa_to_ep_block_index[cu_index];
395 4788 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].tot_d1_blocks = 1;
396 4788 : resultsPtr->leaf_data_array[resultsPtr->leaf_count++].split_flag = split_flag = EB_TRUE;
397 :
398 4788 : break;
399 :
400 278 : case DO_NOT_ADD_CU_CONTINUE_SPLIT:
401 : // Go Down + do not consider the current CU as candidate
402 278 : split_flag = EB_TRUE;
403 :
404 278 : break;
405 :
406 0 : default:
407 0 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].leaf_index = cu_index;
408 0 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].mds_idx = pa_to_ep_block_index[cu_index];
409 0 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].tot_d1_blocks = 1;
410 0 : resultsPtr->leaf_data_array[resultsPtr->leaf_count++].split_flag = split_flag = EB_TRUE;
411 :
412 0 : break;
413 : }
414 :
415 21806 : break;
416 6119 : case 3:
417 :
418 6119 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].leaf_index = cu_index;
419 6119 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].mds_idx = pa_to_ep_block_index[cu_index];
420 6119 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].tot_d1_blocks = 1;
421 :
422 6119 : if (enable_blk_4x4) {
423 0 : resultsPtr->leaf_data_array[resultsPtr->leaf_count++].split_flag = split_flag = EB_TRUE;
424 :
425 0 : int first_4_index = pa_to_ep_block_index[cu_index] + d1_depth_offset[sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128][cuStatsPtr->depth];
426 0 : for (int i = 0; i < 4; ++i) {
427 0 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].leaf_index = cu_index;
428 :
429 0 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].mds_idx = first_4_index + i;
430 0 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].tot_d1_blocks = 1;
431 :
432 0 : resultsPtr->leaf_data_array[resultsPtr->leaf_count++].split_flag = split_flag = EB_FALSE;
433 : }
434 : }else
435 6119 : resultsPtr->leaf_data_array[resultsPtr->leaf_count++].split_flag = split_flag = EB_FALSE;
436 :
437 6119 : break;
438 :
439 0 : default:
440 0 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].leaf_index = cu_index;
441 0 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].mds_idx = pa_to_ep_block_index[cu_index];
442 0 : resultsPtr->leaf_data_array[resultsPtr->leaf_count].tot_d1_blocks = 1;
443 0 : resultsPtr->leaf_data_array[resultsPtr->leaf_count++].split_flag = split_flag = EB_TRUE;
444 0 : break;
445 : }
446 19692 : }
447 :
448 47609 : cu_index += (split_flag == EB_TRUE) ? 1 : depth_offset[cuStatsPtr->depth];
449 : } // End CU Loop
450 2625 : }
451 :
452 202891 : void MdcInterDepthDecision(
453 : ModeDecisionConfigurationContext *context_ptr,
454 : uint32_t origin_x,
455 : uint32_t origin_y,
456 : uint32_t endDepth,
457 : uint32_t cu_index)
458 : {
459 : uint32_t leftCuIndex;
460 : uint32_t topCuIndex;
461 : uint32_t topLeftCuIndex;
462 : uint32_t depthZeroCandidateCuIndex;
463 202891 : uint32_t depthOneCandidateCuIndex = cu_index;
464 202891 : uint32_t depthTwoCandidateCuIndex = cu_index;
465 202891 : uint64_t depthNRate = 0;
466 202891 : uint64_t depthNPlusOneRate = 0;
467 202891 : uint64_t depthNCost = 0;
468 202891 : uint64_t depthNPlusOneCost = 0;
469 202891 : MdcpLocalCodingUnit *local_cu_array = context_ptr->local_cu_array;
470 : /*** Stage 0: Inter depth decision: depth 2 vs depth 3 ***/
471 : // Walks to the last coded 8x8 block for merging
472 202891 : uint8_t group_of8x8_blocks_count = context_ptr->group_of8x8_blocks_count;
473 202891 : uint8_t group_of16x16_blocks_count = context_ptr->group_of16x16_blocks_count;
474 202891 : if ((GROUP_OF_4_8x8_BLOCKS(origin_x, origin_y))) {
475 37475 : group_of8x8_blocks_count++;
476 :
477 : // From the last coded cu index, get the indices of the left, top, and top left cus
478 37475 : leftCuIndex = cu_index - DEPTH_THREE_STEP;
479 37475 : topCuIndex = leftCuIndex - DEPTH_THREE_STEP;
480 37475 : topLeftCuIndex = topCuIndex - DEPTH_THREE_STEP;
481 :
482 : // From the top left index, get the index of the candidate pu for merging
483 37475 : depthTwoCandidateCuIndex = topLeftCuIndex - 1;
484 :
485 : // Compute depth N cost
486 37475 : local_cu_array[depthTwoCandidateCuIndex].split_context = 0;
487 37475 : depthNCost = (local_cu_array[depthTwoCandidateCuIndex]).early_cost + depthNRate;
488 :
489 37475 : if (endDepth < 3) {
490 0 : (local_cu_array[depthTwoCandidateCuIndex]).early_split_flag = EB_FALSE;
491 0 : (local_cu_array[depthTwoCandidateCuIndex]).early_cost = depthNCost;
492 : }
493 : else {
494 37475 : depthNPlusOneCost = (local_cu_array[cu_index]).early_cost + (local_cu_array[leftCuIndex]).early_cost + (local_cu_array[topCuIndex]).early_cost + (local_cu_array[topLeftCuIndex]).early_cost + depthNPlusOneRate;
495 :
496 37475 : if (depthNCost <= depthNPlusOneCost) {
497 : // If the cost is low enough to warrant not spliting further:
498 : // 1. set the split flag of the candidate pu for merging to false
499 : // 2. update the last pu index
500 37399 : (local_cu_array[depthTwoCandidateCuIndex]).early_split_flag = EB_FALSE;
501 37399 : (local_cu_array[depthTwoCandidateCuIndex]).early_cost = depthNCost;
502 : }
503 : else {
504 : // If the cost is not low enough:
505 : // update the cost of the candidate pu for merging
506 : // this update is required for the next inter depth decision
507 76 : (&local_cu_array[depthTwoCandidateCuIndex])->early_cost = depthNPlusOneCost;
508 : }
509 : }
510 : }
511 :
512 : // Walks to the last coded 16x16 block for merging
513 202891 : if (GROUP_OF_4_16x16_BLOCKS(get_coded_unit_stats(depthTwoCandidateCuIndex)->origin_x, get_coded_unit_stats(depthTwoCandidateCuIndex)->origin_y) &&
514 : (group_of8x8_blocks_count == 4)) {
515 9373 : group_of8x8_blocks_count = 0;
516 9373 : group_of16x16_blocks_count++;
517 :
518 : // From the last coded pu index, get the indices of the left, top, and top left pus
519 9373 : leftCuIndex = depthTwoCandidateCuIndex - DEPTH_TWO_STEP;
520 9373 : topCuIndex = leftCuIndex - DEPTH_TWO_STEP;
521 9373 : topLeftCuIndex = topCuIndex - DEPTH_TWO_STEP;
522 :
523 : // From the top left index, get the index of the candidate pu for merging
524 9373 : depthOneCandidateCuIndex = topLeftCuIndex - 1;
525 :
526 9373 : if (get_coded_unit_stats(depthOneCandidateCuIndex)->depth == 1) {
527 9371 : depthNCost = local_cu_array[depthOneCandidateCuIndex].early_cost + depthNRate;
528 9371 : if (endDepth < 2) {
529 0 : local_cu_array[depthOneCandidateCuIndex].early_split_flag = EB_FALSE;
530 0 : local_cu_array[depthOneCandidateCuIndex].early_cost = depthNCost;
531 : }
532 : else {
533 : // Compute depth N+1 cost
534 9371 : depthNPlusOneCost = local_cu_array[depthTwoCandidateCuIndex].early_cost +
535 9371 : local_cu_array[leftCuIndex].early_cost +
536 9371 : local_cu_array[topCuIndex].early_cost +
537 9371 : local_cu_array[topLeftCuIndex].early_cost +
538 : depthNPlusOneRate;
539 :
540 : // Inter depth comparison: depth 1 vs depth 2
541 9371 : if (depthNCost <= depthNPlusOneCost) {
542 : // If the cost is low enough to warrant not spliting further:
543 : // 1. set the split flag of the candidate pu for merging to false
544 : // 2. update the last pu index
545 9260 : local_cu_array[depthOneCandidateCuIndex].early_split_flag = EB_FALSE;
546 9260 : local_cu_array[depthOneCandidateCuIndex].early_cost = depthNCost;
547 : }
548 : else {
549 : // If the cost is not low enough:
550 : // update the cost of the candidate pu for merging
551 : // this update is required for the next inter depth decision
552 111 : local_cu_array[depthOneCandidateCuIndex].early_cost = depthNPlusOneCost;
553 : }
554 : }
555 : }
556 : }
557 :
558 : // Stage 2: Inter depth decision: depth 0 vs depth 1
559 :
560 : // Walks to the last coded 32x32 block for merging
561 : // Stage 2 isn't performed in I slices since the abcense of 64x64 candidates
562 202852 : if (GROUP_OF_4_32x32_BLOCKS(get_coded_unit_stats(depthOneCandidateCuIndex)->origin_x, get_coded_unit_stats(depthOneCandidateCuIndex)->origin_y) &&
563 : (group_of16x16_blocks_count == 4)) {
564 2062 : group_of16x16_blocks_count = 0;
565 :
566 : // From the last coded pu index, get the indices of the left, top, and top left pus
567 2062 : leftCuIndex = depthOneCandidateCuIndex - DEPTH_ONE_STEP;
568 2062 : topCuIndex = leftCuIndex - DEPTH_ONE_STEP;
569 2062 : topLeftCuIndex = topCuIndex - DEPTH_ONE_STEP;
570 :
571 : // From the top left index, get the index of the candidate pu for merging
572 2062 : depthZeroCandidateCuIndex = topLeftCuIndex - 1;
573 :
574 2062 : if (get_coded_unit_stats(depthZeroCandidateCuIndex)->depth == 0) {
575 : // Compute depth N cost
576 2062 : depthNCost = (&local_cu_array[depthZeroCandidateCuIndex])->early_cost + depthNRate;
577 2062 : if (endDepth < 1)
578 0 : (&local_cu_array[depthZeroCandidateCuIndex])->early_split_flag = EB_FALSE;
579 : else {
580 : // Compute depth N+1 cost
581 2062 : depthNPlusOneCost = local_cu_array[depthOneCandidateCuIndex].early_cost +
582 2062 : local_cu_array[leftCuIndex].early_cost +
583 2062 : local_cu_array[topCuIndex].early_cost +
584 2062 : local_cu_array[topLeftCuIndex].early_cost +
585 : depthNPlusOneRate;
586 :
587 : // Inter depth comparison: depth 0 vs depth 1
588 2062 : if (depthNCost <= depthNPlusOneCost) {
589 : // If the cost is low enough to warrant not spliting further:
590 : // 1. set the split flag of the candidate pu for merging to false
591 : // 2. update the last pu index
592 1886 : (&local_cu_array[depthZeroCandidateCuIndex])->early_split_flag = EB_FALSE;
593 : }
594 : }
595 : }
596 : }
597 :
598 202670 : context_ptr->group_of8x8_blocks_count = group_of8x8_blocks_count;
599 202670 : context_ptr->group_of16x16_blocks_count = group_of16x16_blocks_count;
600 202670 : }
601 :
602 : #if PREDICT_NSQ_SHAPE
603 : /// compute the cost of curr depth, and the depth above
604 275491 : void mdc_compute_depth_costs(
605 : ModeDecisionConfigurationContext *context_ptr,
606 : uint32_t curr_depth_mds,
607 : uint32_t above_depth_mds,
608 : uint32_t step,
609 : uint64_t *above_depth_cost,
610 : uint64_t *curr_depth_cost)
611 : {
612 275491 : uint64_t above_non_split_rate = 0;
613 275491 : uint64_t above_split_rate = 0;
614 :
615 : // Rate of not spliting the current depth (Depth != 4) in case the children were omitted by MDC
616 275491 : uint64_t curr_non_split_rate_blk0 = 0;
617 275491 : uint64_t curr_non_split_rate_blk1 = 0;
618 275491 : uint64_t curr_non_split_rate_blk2 = 0;
619 275491 : uint64_t curr_non_split_rate_blk3 = 0;
620 :
621 : // Compute above depth cost
622 275491 : *above_depth_cost = context_ptr->local_cu_array[above_depth_mds].early_cost + above_non_split_rate;
623 :
624 : // Compute current depth cost
625 275491 : *curr_depth_cost =
626 275491 : context_ptr->local_cu_array[curr_depth_mds].early_cost + curr_non_split_rate_blk3 +
627 275491 : context_ptr->local_cu_array[curr_depth_mds - 1 * step].early_cost + curr_non_split_rate_blk2 +
628 275491 : context_ptr->local_cu_array[curr_depth_mds - 2 * step].early_cost + curr_non_split_rate_blk1 +
629 275491 : context_ptr->local_cu_array[curr_depth_mds - 3 * step].early_cost + curr_non_split_rate_blk0 +
630 : above_split_rate;
631 275491 : }
632 1110530 : uint32_t mdc_d2_inter_depth_block_decision(
633 : PictureControlSet *picture_control_set_ptr,
634 : ModeDecisionConfigurationContext *context_ptr,
635 : EbMdcLeafData *results_ptr,
636 : uint32_t blk_mds,
637 : uint32_t sb_index) {
638 :
639 :
640 : uint32_t last_cu_index;
641 1110530 : uint64_t parent_depth_cost = 0, current_depth_cost = 0;
642 1110530 : SequenceControlSet *sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
643 : EbBool last_depth_flag;
644 : const BlockGeom *blk_geom;
645 :
646 1110530 : last_depth_flag = context_ptr->local_cu_array[blk_mds].early_split_flag == EB_FALSE ? EB_TRUE : EB_FALSE;
647 :
648 1110530 : last_cu_index = blk_mds;
649 1110530 : blk_geom = get_blk_geom_mds(blk_mds);
650 1110560 : uint32_t parent_depth_idx_mds = blk_mds;
651 1110560 : uint32_t current_depth_idx_mds = blk_mds;
652 :
653 1110560 : if (last_depth_flag) {
654 1110600 : while (blk_geom->is_last_quadrant) {
655 : //get parent idx
656 275489 : parent_depth_idx_mds = current_depth_idx_mds - parent_depth_offset[sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128][blk_geom->depth];
657 275489 : if (picture_control_set_ptr->slice_type == I_SLICE && parent_depth_idx_mds == 0 && sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128)
658 0 : parent_depth_cost = MAX_MODE_COST;
659 : else
660 275489 : mdc_compute_depth_costs(context_ptr, current_depth_idx_mds, parent_depth_idx_mds, ns_depth_offset[sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128][blk_geom->depth], &parent_depth_cost, ¤t_depth_cost);
661 275493 : if (!sequence_control_set_ptr->sb_geom[sb_index].block_is_allowed[parent_depth_idx_mds])
662 0 : parent_depth_cost = MAX_MODE_COST;
663 275493 : if (parent_depth_cost <= current_depth_cost) {
664 273027 : context_ptr->local_cu_array[parent_depth_idx_mds].early_split_flag = EB_FALSE;
665 273027 : context_ptr->local_cu_array[parent_depth_idx_mds].early_cost = parent_depth_cost;
666 273027 : results_ptr[parent_depth_idx_mds].early_split_flag = context_ptr->local_cu_array[parent_depth_idx_mds].early_split_flag;
667 273027 : last_cu_index = parent_depth_idx_mds;
668 : }
669 : else
670 2466 : context_ptr->local_cu_array[parent_depth_idx_mds].early_cost = current_depth_cost;
671 :
672 : //setup next parent inter depth
673 275493 : blk_geom = get_blk_geom_mds(parent_depth_idx_mds);
674 275488 : current_depth_idx_mds = parent_depth_idx_mds;
675 : }
676 : }
677 :
678 1110560 : return last_cu_index;
679 : }
680 :
681 2079740 : uint64_t mdc_d1_non_square_block_decision(ModeDecisionConfigurationContext *context_ptr){
682 : //compute total cost for the whole block partition
683 2079740 : uint64_t tot_cost = 0;
684 2079740 : uint32_t first_blk_idx = context_ptr->mds_idx - (context_ptr->blk_geom->totns - 1);//index of first block in this partition
685 : uint32_t blk_it;
686 :
687 5672040 : for (blk_it = 0; blk_it < context_ptr->blk_geom->totns; blk_it++)
688 3592300 : tot_cost += context_ptr->local_cu_array[first_blk_idx + blk_it].early_cost;
689 :
690 2079740 : if (context_ptr->blk_geom->shape == PART_N || tot_cost < context_ptr->local_cu_array[context_ptr->blk_geom->sqi_mds].early_cost)
691 : {
692 : //store best partition cost in parent square
693 1154100 : context_ptr->local_cu_array[context_ptr->blk_geom->sqi_mds].early_cost = tot_cost;
694 : #if ADD_MDC_FULL_COST
695 1154100 : context_ptr->local_cu_array[context_ptr->blk_geom->sqi_mds].part = from_shape_to_part[context_ptr->blk_geom->shape];
696 : #endif
697 1154100 : context_ptr->local_cu_array[context_ptr->blk_geom->sqi_mds].best_d1_blk = first_blk_idx;
698 : }
699 2079740 : return tot_cost;
700 : }
701 :
702 0 : uint8_t find_shape_index(PART shape, PART nsq_shape_table[10]) {
703 : uint8_t i;
704 0 : for (i = 0; i < 10; i++)
705 0 : if (shape == nsq_shape_table[i]) return i;
706 :
707 0 : return 0;
708 : }
709 20880 : uint8_t find_depth_index(uint8_t shape, uint8_t depth_table[NUMBER_OF_DEPTH]) {
710 : uint8_t i;
711 73080 : for (i = 0; i < NUMBER_OF_DEPTH; i++)
712 73080 : if (shape == depth_table[i]) return i;
713 :
714 0 : return 0;
715 : }
716 :
717 1110390 : uint8_t get_depth(
718 : uint8_t sq_size) {
719 1110390 : uint8_t depth = sq_size == 128 ? 0 :
720 : sq_size == 64 ? 1 :
721 : sq_size == 32 ? 2 :
722 : sq_size == 16 ? 3 :
723 : sq_size == 8 ? 4 : 5;
724 :
725 1110390 : return depth;
726 : }
727 : #if ADD_MDC_FULL_COST
728 3617750 : static INLINE void set_dc_sign(int32_t *cul_level, int32_t dc_val) {
729 3617750 : if (dc_val < 0)
730 152953 : *cul_level |= 1 << COEFF_CONTEXT_BITS;
731 3464800 : else if (dc_val > 0)
732 300237 : *cul_level += 2 << COEFF_CONTEXT_BITS;
733 3617750 : }
734 :
735 : extern void av1_quantize_b_facade_II(
736 : const TranLow *coeff_ptr,
737 : int32_t stride,
738 : int32_t width,
739 : int32_t height,
740 : intptr_t n_coeffs,
741 : const MacroblockPlane *p,
742 : TranLow *qcoeff_ptr,
743 : TranLow *dqcoeff_ptr,
744 : uint16_t *eob_ptr,
745 : const ScanOrder *sc,
746 : const QuantParam *qparam);
747 :
748 3617520 : int32_t mdc_av1_quantize_inv_quantize(
749 : PictureControlSet *picture_control_set_ptr,
750 : int32_t *coeff,
751 : const uint32_t coeff_stride,
752 : int32_t *quant_coeff,
753 : int32_t *recon_coeff,
754 : uint32_t qp,
755 : uint32_t width,
756 : uint32_t height,
757 : TxSize txsize,
758 : uint16_t *eob,
759 : uint32_t *count_non_zero_coeffs,
760 : uint32_t component_type,
761 : TxType tx_type)
762 : {
763 : MacroblockPlane candidate_plane;
764 3617520 : const QmVal *qMatrix = picture_control_set_ptr->parent_pcs_ptr->gqmatrix[NUM_QM_LEVELS - 1][0][txsize];
765 3617520 : const QmVal *iqMatrix = picture_control_set_ptr->parent_pcs_ptr->giqmatrix[NUM_QM_LEVELS - 1][0][txsize];
766 : #if ADD_DELTA_QP_SUPPORT && MDC_ADAPTIVE_LEVEL
767 : //NM - Assuming 1 segment.
768 3617520 : uint32_t qIndex = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.delta_q_params.delta_q_present ? quantizer_to_qindex[qp] : picture_control_set_ptr->parent_pcs_ptr->frm_hdr.quantization_params.base_q_idx;
769 : #else
770 : uint32_t qIndex = picture_control_set_ptr->parent_pcs_ptr->delta_q_present_flag ? quantizer_to_qindex[qp] : picture_control_set_ptr->parent_pcs_ptr->base_qindex;
771 : #endif
772 3617520 : if (component_type == COMPONENT_LUMA) {
773 3617560 : candidate_plane.quant_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_quant[qIndex];
774 3617560 : candidate_plane.quant_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_quant_fp[qIndex];
775 3617560 : candidate_plane.round_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_round_fp[qIndex];
776 3617560 : candidate_plane.quant_shift_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_quant_shift[qIndex];
777 3617560 : candidate_plane.zbin_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_zbin[qIndex];
778 3617560 : candidate_plane.round_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_round[qIndex];
779 3617560 : candidate_plane.dequant_QTX = picture_control_set_ptr->parent_pcs_ptr->deqMd.y_dequant_QTX[qIndex];
780 : }
781 3617520 : if (component_type == COMPONENT_CHROMA_CB) {
782 0 : candidate_plane.quant_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_quant[qIndex];
783 0 : candidate_plane.quant_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_quant_fp[qIndex];
784 0 : candidate_plane.round_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_round_fp[qIndex];
785 0 : candidate_plane.quant_shift_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_quant_shift[qIndex];
786 0 : candidate_plane.zbin_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_zbin[qIndex];
787 0 : candidate_plane.round_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_round[qIndex];
788 0 : candidate_plane.dequant_QTX = picture_control_set_ptr->parent_pcs_ptr->deqMd.u_dequant_QTX[qIndex];
789 : }
790 3617520 : if (component_type == COMPONENT_CHROMA_CR) {
791 0 : candidate_plane.quant_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_quant[qIndex];
792 0 : candidate_plane.quant_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_quant_fp[qIndex];
793 0 : candidate_plane.round_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_round_fp[qIndex];
794 0 : candidate_plane.quant_shift_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_quant_shift[qIndex];
795 0 : candidate_plane.zbin_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_zbin[qIndex];
796 0 : candidate_plane.round_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_round[qIndex];
797 0 : candidate_plane.dequant_QTX = picture_control_set_ptr->parent_pcs_ptr->deqMd.v_dequant_QTX[qIndex];
798 : }
799 3617520 : const ScanOrder *const scan_order = &av1_scan_orders[txsize][tx_type];
800 :
801 3617520 : const int32_t n_coeffs = av1_get_max_eob(txsize);
802 :
803 : QuantParam qparam;
804 :
805 3617600 : qparam.log_scale = av1_get_tx_scale(txsize);
806 3617620 : qparam.tx_size = txsize;
807 3617620 : qparam.qmatrix = qMatrix;
808 3617620 : qparam.iqmatrix = iqMatrix;
809 :
810 3617620 : av1_quantize_b_facade_II(
811 : (TranLow*)coeff,
812 : coeff_stride,
813 : width,
814 : height,
815 : n_coeffs,
816 : &candidate_plane,
817 : quant_coeff,
818 : (TranLow*)recon_coeff,
819 : eob,
820 : scan_order,
821 : &qparam);
822 :
823 3617750 : *count_non_zero_coeffs = *eob;
824 : // Derive cul_level
825 3617750 : int32_t cul_level = 0;
826 3617750 : const int16_t *const scan = scan_order->scan;
827 19253300 : for (int32_t c = 0; c < *eob; ++c) {
828 15635500 : const int16_t pos = scan[c];
829 15635500 : const int32_t v = quant_coeff[pos];
830 15635500 : int32_t level = ABS(v);
831 15635500 : cul_level += level;
832 : }
833 3617750 : cul_level = AOMMIN(COEFF_CONTEXT_MASK, cul_level);
834 : // DC value
835 3617750 : set_dc_sign(&cul_level, quant_coeff[0]);
836 3617750 : return cul_level;
837 : }
838 :
839 3617900 : EbErrorType mdc_av1_tu_estimate_coeff_bits(
840 : uint8_t allow_update_cdf,
841 : FRAME_CONTEXT *ec_ctx,
842 : PictureControlSet *picture_control_set_ptr,
843 : struct ModeDecisionCandidateBuffer *candidate_buffer_ptr,
844 : uint32_t tu_origin_index,
845 : EbPictureBufferDesc *coeff_buffer_sb,
846 : uint32_t y_eob,
847 : uint64_t *y_tu_coeff_bits,
848 : TxSize txsize,
849 : TxType tx_type,
850 : COMPONENT_TYPE component_type)
851 : {
852 3617900 : EbErrorType return_error = EB_ErrorNone;
853 : int32_t *coeff_buffer;
854 3617900 : int16_t luma_txb_skip_context = 0;
855 3617900 : int16_t luma_dc_sign_context = 0;
856 3617900 : EbBool reducedTransformSetFlag = picture_control_set_ptr->parent_pcs_ptr->reduced_tx_set_used ? EB_TRUE : EB_FALSE;
857 : //Estimate the rate of the transform type and coefficient for Luma
858 3617900 : if (component_type == COMPONENT_LUMA || component_type == COMPONENT_ALL) {
859 3617990 : if (y_eob) {
860 517434 : coeff_buffer = (int32_t*)&coeff_buffer_sb->buffer_y[tu_origin_index * sizeof(int32_t)];
861 517406 : *y_tu_coeff_bits = eb_av1_cost_coeffs_txb(
862 : allow_update_cdf,
863 : ec_ctx,
864 : candidate_buffer_ptr,
865 : coeff_buffer,
866 517434 : (uint16_t)y_eob,
867 : PLANE_TYPE_Y,
868 : txsize,
869 : tx_type,
870 : luma_txb_skip_context,
871 : luma_dc_sign_context,
872 : reducedTransformSetFlag);
873 : }
874 : else {
875 3100560 : *y_tu_coeff_bits = av1_cost_skip_txb(
876 : allow_update_cdf,
877 : ec_ctx,
878 : candidate_buffer_ptr,
879 : txsize,
880 : PLANE_TYPE_Y,
881 : luma_txb_skip_context);
882 : }
883 : }
884 3617820 : return return_error;
885 : }
886 3617410 : void mdc_full_loop(
887 : ModeDecisionCandidateBuffer *candidate_buffer,
888 : ModeDecisionConfigurationContext *context_ptr,
889 : PictureControlSet *picture_control_set_ptr,
890 : uint32_t qp,
891 : uint32_t *y_count_non_zero_coeffs,
892 : uint64_t *y_coeff_bits,
893 : uint64_t *y_full_distortion)
894 : {
895 : uint32_t tu_origin_index;
896 : uint64_t y_full_cost;
897 3617410 : SequenceControlSet *sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
898 : uint64_t y_tu_coeff_bits;
899 : uint64_t tu_full_distortion[3][DIST_CALC_TOTAL];
900 3617410 : context_ptr->three_quad_energy = 0;
901 3617410 : uint32_t txb_1d_offset = 0;
902 3617410 : uint32_t txb_itr = 0;
903 3617410 : uint8_t tx_depth = candidate_buffer->candidate_ptr->tx_depth;
904 3617410 : uint16_t txb_count = context_ptr->blk_geom->txb_count[tx_depth];
905 7234880 : for (txb_itr = 0; txb_itr < txb_count; txb_itr++) {
906 3617360 : uint16_t tx_org_x = context_ptr->blk_geom->tx_org_x[tx_depth][txb_itr];
907 3617360 : uint16_t tx_org_y = context_ptr->blk_geom->tx_org_y[tx_depth][txb_itr];
908 3617360 : int32_t cropped_tx_width = MIN(context_ptr->blk_geom->tx_width[tx_depth][txb_itr], sequence_control_set_ptr->seq_header.max_frame_width - (context_ptr->sb_origin_x + tx_org_x));
909 3617360 : int32_t cropped_tx_height = MIN(context_ptr->blk_geom->tx_height[tx_depth][txb_itr], sequence_control_set_ptr->seq_header.max_frame_height - (context_ptr->sb_origin_y + tx_org_y));
910 3617360 : tu_origin_index = tx_org_x + (tx_org_y * candidate_buffer->residual_ptr->stride_y);
911 3617360 : y_tu_coeff_bits = 0;
912 :
913 : // Y: T Q iQ
914 3617360 : av1_estimate_transform(
915 3617360 : &(((int16_t*)candidate_buffer->residual_ptr->buffer_y)[tu_origin_index]),
916 3617360 : candidate_buffer->residual_ptr->stride_y,
917 3617360 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[txb_1d_offset]),
918 : NOT_USED_VALUE,
919 3617360 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
920 : &context_ptr->three_quad_energy,
921 : context_ptr->transform_inner_array_ptr,
922 : 0,
923 3617360 : candidate_buffer->candidate_ptr->transform_type[txb_itr],
924 : PLANE_TYPE_Y,
925 : DEFAULT_SHAPE);
926 :
927 7235220 : candidate_buffer->candidate_ptr->quantized_dc[0][txb_itr] = mdc_av1_quantize_inv_quantize(
928 : picture_control_set_ptr,
929 3617530 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[txb_1d_offset]),
930 : NOT_USED_VALUE,
931 3617530 : &(((int32_t*)candidate_buffer->residual_quant_coeff_ptr->buffer_y)[txb_1d_offset]),
932 3617530 : &(((int32_t*)candidate_buffer->recon_coeff_ptr->buffer_y)[txb_1d_offset]),
933 : qp,
934 3617530 : context_ptr->blk_geom->tx_width[tx_depth][txb_itr],
935 3617530 : context_ptr->blk_geom->tx_height[tx_depth][txb_itr],
936 3617530 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
937 3617530 : &candidate_buffer->candidate_ptr->eob[0][txb_itr],
938 3617530 : &(y_count_non_zero_coeffs[txb_itr]),
939 : COMPONENT_LUMA,
940 3617530 : candidate_buffer->candidate_ptr->transform_type[txb_itr]);
941 :
942 3617700 : if (context_ptr->spatial_sse_full_loop) {
943 0 : EbPictureBufferDesc *input_picture_ptr = picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
944 0 : uint32_t input_tu_origin_index = (context_ptr->sb_origin_x + tx_org_x + input_picture_ptr->origin_x) + ((context_ptr->sb_origin_y + tx_org_y + input_picture_ptr->origin_y) * input_picture_ptr->stride_y);
945 0 : uint32_t y_has_coeff = y_count_non_zero_coeffs[txb_itr] > 0;
946 :
947 0 : if (y_has_coeff) {
948 : (void)context_ptr;
949 0 : uint8_t *pred_buffer = &(candidate_buffer->prediction_ptr->buffer_y[tu_origin_index]);
950 0 : uint8_t *rec_buffer = &(candidate_buffer->recon_ptr->buffer_y[tu_origin_index]);
951 : uint32_t j;
952 0 : for (j = 0; j < context_ptr->blk_geom->tx_height[tx_depth][txb_itr]; j++)
953 0 : memcpy(rec_buffer + j * candidate_buffer->recon_ptr->stride_y, pred_buffer + j * candidate_buffer->prediction_ptr->stride_y, context_ptr->blk_geom->tx_width[tx_depth][txb_itr]);
954 :
955 0 : av1_inv_transform_recon8bit(
956 0 : &(((int32_t*)candidate_buffer->recon_coeff_ptr->buffer_y)[txb_1d_offset]),
957 0 : rec_buffer, candidate_buffer->recon_ptr->stride_y,
958 0 : rec_buffer, candidate_buffer->recon_ptr->stride_y,
959 0 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
960 0 : candidate_buffer->candidate_ptr->transform_type[txb_itr],
961 : PLANE_TYPE_Y,
962 0 : (uint16_t)candidate_buffer->candidate_ptr->eob[0][txb_itr],
963 : 0 /*lossless*/);
964 : }
965 : else {
966 0 : picture_copy(
967 : candidate_buffer->prediction_ptr,
968 : tu_origin_index,
969 : 0,
970 : candidate_buffer->recon_ptr,
971 : tu_origin_index,
972 : 0,
973 0 : context_ptr->blk_geom->tx_width[tx_depth][txb_itr],
974 0 : context_ptr->blk_geom->tx_height[tx_depth][txb_itr],
975 : 0,
976 : 0,
977 : PICTURE_BUFFER_DESC_Y_FLAG,
978 : 0);
979 :
980 : }
981 :
982 0 : tu_full_distortion[0][DIST_CALC_PREDICTION] = spatial_full_distortion(
983 : input_picture_ptr->buffer_y,
984 : input_tu_origin_index,
985 0 : input_picture_ptr->stride_y,
986 0 : candidate_buffer->prediction_ptr->buffer_y,
987 : tu_origin_index,
988 0 : candidate_buffer->prediction_ptr->stride_y,
989 : cropped_tx_width,
990 : cropped_tx_height,
991 0 : Log2f(context_ptr->blk_geom->tx_width[tx_depth][txb_itr]) - 2);
992 :
993 0 : tu_full_distortion[0][DIST_CALC_RESIDUAL] = spatial_full_distortion(
994 : input_picture_ptr->buffer_y,
995 : input_tu_origin_index,
996 0 : input_picture_ptr->stride_y,
997 0 : &(((uint8_t*)candidate_buffer->recon_ptr->buffer_y)[tu_origin_index]),
998 : 0,
999 0 : candidate_buffer->recon_ptr->stride_y,
1000 : cropped_tx_width,
1001 : cropped_tx_height,
1002 0 : Log2f(context_ptr->blk_geom->tx_width[tx_depth][txb_itr]) - 2);
1003 :
1004 0 : tu_full_distortion[0][DIST_CALC_PREDICTION] <<= 4;
1005 0 : tu_full_distortion[0][DIST_CALC_RESIDUAL] <<= 4;
1006 : }
1007 : else {
1008 : // LUMA DISTORTION
1009 3617700 : picture_full_distortion32_bits(
1010 3617700 : context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr,
1011 : txb_1d_offset,
1012 : 0,
1013 : candidate_buffer->recon_coeff_ptr,
1014 : txb_1d_offset,
1015 : 0,
1016 3617700 : context_ptr->blk_geom->tx_width[tx_depth][txb_itr],
1017 3617700 : context_ptr->blk_geom->tx_height[tx_depth][txb_itr],
1018 : NOT_USED_VALUE,
1019 : NOT_USED_VALUE,
1020 : tu_full_distortion[0],
1021 : NOT_USED_VALUE,
1022 : NOT_USED_VALUE,
1023 3617700 : y_count_non_zero_coeffs[txb_itr],
1024 : 0,
1025 : 0,
1026 : COMPONENT_LUMA);
1027 :
1028 3618000 : tu_full_distortion[0][DIST_CALC_RESIDUAL] += context_ptr->three_quad_energy;
1029 3618000 : tu_full_distortion[0][DIST_CALC_PREDICTION] += context_ptr->three_quad_energy;
1030 : //assert(context_ptr->three_quad_energy == 0 && context_ptr->cu_stats->size < 64);
1031 3618000 : TxSize tx_size = context_ptr->blk_geom->txsize[tx_depth][txb_itr];
1032 3618000 : int32_t shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
1033 3617940 : tu_full_distortion[0][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(tu_full_distortion[0][DIST_CALC_RESIDUAL], shift);
1034 3617940 : tu_full_distortion[0][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(tu_full_distortion[0][DIST_CALC_PREDICTION], shift);
1035 : }
1036 : //LUMA-ONLY
1037 3617940 : mdc_av1_tu_estimate_coeff_bits(
1038 : 0,//allow_update_cdf,
1039 : NULL,//FRAME_CONTEXT *ec_ctx,
1040 : picture_control_set_ptr,
1041 : candidate_buffer,
1042 : txb_1d_offset,
1043 : candidate_buffer->residual_quant_coeff_ptr,
1044 3617940 : y_count_non_zero_coeffs[txb_itr],
1045 : &y_tu_coeff_bits,
1046 3617940 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
1047 3617940 : candidate_buffer->candidate_ptr->transform_type[txb_itr],
1048 : COMPONENT_LUMA);
1049 :
1050 3617580 : av1_tu_calc_cost_luma(
1051 : 0,
1052 : candidate_buffer->candidate_ptr,
1053 : txb_itr,
1054 3617580 : context_ptr->blk_geom->txsize[tx_depth][0],
1055 3617580 : y_count_non_zero_coeffs[txb_itr],
1056 : tu_full_distortion[0], //gets updated inside based on cbf decision
1057 : &y_tu_coeff_bits, //gets updated inside based on cbf decision
1058 : &y_full_cost,
1059 : context_ptr->full_lambda);
1060 :
1061 3617470 : (*y_coeff_bits) += y_tu_coeff_bits;
1062 3617470 : y_full_distortion[DIST_CALC_RESIDUAL] += tu_full_distortion[0][DIST_CALC_RESIDUAL];
1063 3617470 : y_full_distortion[DIST_CALC_PREDICTION] += tu_full_distortion[0][DIST_CALC_PREDICTION];
1064 3617470 : txb_1d_offset += context_ptr->blk_geom->tx_width[tx_depth][txb_itr] * context_ptr->blk_geom->tx_height[tx_depth][txb_itr];
1065 : }
1066 3617520 : }
1067 :
1068 : void av1_set_ref_frame(MvReferenceFrame *rf, int8_t ref_frame_type);
1069 :
1070 3618010 : EbErrorType mdc_inter_pu_prediction_av1(
1071 : ModeDecisionConfigurationContext *context_ptr,
1072 : PictureControlSet *picture_control_set_ptr,
1073 : ModeDecisionCandidateBuffer *candidate_buffer_ptr)
1074 : {
1075 3618010 : EbErrorType return_error = EB_ErrorNone;
1076 : EbPictureBufferDesc *ref_pic_list0;
1077 3618010 : EbPictureBufferDesc *ref_pic_list1 = NULL;
1078 :
1079 : Mv mv_0;
1080 : Mv mv_1;
1081 3618010 : mv_0.x = candidate_buffer_ptr->candidate_ptr->motion_vector_xl0;
1082 3618010 : mv_0.y = candidate_buffer_ptr->candidate_ptr->motion_vector_yl0;
1083 3618010 : mv_1.x = candidate_buffer_ptr->candidate_ptr->motion_vector_xl1;
1084 3618010 : mv_1.y = candidate_buffer_ptr->candidate_ptr->motion_vector_yl1;
1085 : MvUnit mv_unit;
1086 3618010 : mv_unit.pred_direction = candidate_buffer_ptr->candidate_ptr->prediction_direction[0];
1087 3618010 : mv_unit.mv[0] = mv_0;
1088 3618010 : mv_unit.mv[1] = mv_1;
1089 3618010 : int8_t ref_idx_l0 = candidate_buffer_ptr->candidate_ptr->ref_frame_index_l0;
1090 3618010 : int8_t ref_idx_l1 = candidate_buffer_ptr->candidate_ptr->ref_frame_index_l1;
1091 : // MRP_MD_UNI_DIR_BIPRED
1092 : MvReferenceFrame rf[2];
1093 3618010 : av1_set_ref_frame(rf, candidate_buffer_ptr->candidate_ptr->ref_frame_type);
1094 : uint8_t list_idx0, list_idx1;
1095 3618130 : list_idx0 = get_list_idx(rf[0]);
1096 3618080 : if (rf[1] == NONE_FRAME)
1097 3618080 : list_idx1 = get_list_idx(rf[0]);
1098 : else
1099 0 : list_idx1 = get_list_idx(rf[1]);
1100 3617680 : assert(list_idx0 < MAX_NUM_OF_REF_PIC_LIST);
1101 3617680 : assert(list_idx1 < MAX_NUM_OF_REF_PIC_LIST);
1102 3617680 : if (ref_idx_l0 >= 0)
1103 3617780 : ref_pic_list0 = ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[list_idx0][ref_idx_l0]->object_ptr)->reference_picture;
1104 : else
1105 0 : ref_pic_list0 = (EbPictureBufferDesc*)EB_NULL;
1106 3617680 : if (ref_idx_l1 >= 0)
1107 0 : ref_pic_list1 = ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[list_idx1][ref_idx_l1]->object_ptr)->reference_picture;
1108 : else
1109 3617680 : ref_pic_list1 = (EbPictureBufferDesc*)EB_NULL;
1110 :
1111 3617680 : candidate_buffer_ptr->candidate_ptr->interp_filters = 0;
1112 :
1113 3617680 : av1_inter_prediction(
1114 : picture_control_set_ptr,
1115 3617680 : candidate_buffer_ptr->candidate_ptr->interp_filters,
1116 : context_ptr->mdc_cu_ptr,
1117 3617680 : candidate_buffer_ptr->candidate_ptr->ref_frame_type,
1118 : &mv_unit,
1119 3617680 : candidate_buffer_ptr->candidate_ptr->use_intrabc,
1120 : #if OBMC_FLAG
1121 : SIMPLE_TRANSLATION,
1122 : 0,
1123 : 0,
1124 : #endif
1125 3617680 : candidate_buffer_ptr->candidate_ptr->compound_idx,
1126 3617680 : &candidate_buffer_ptr->candidate_ptr->interinter_comp,
1127 : #if II_COMP_FLAG
1128 : NULL,
1129 : NULL,//ep_luma_recon_neighbor_array,
1130 : NULL,//ep_cb_recon_neighbor_array ,
1131 : NULL,//ep_cr_recon_neighbor_array ,
1132 : 0,//cu_ptr->is_interintra_used,
1133 : 0,//cu_ptr->interintra_mode,
1134 : 0,//cu_ptr->use_wedge_interintra,
1135 : 0,//cu_ptr->interintra_wedge_index,
1136 : #endif
1137 3617680 : context_ptr->cu_origin_x,
1138 3617680 : context_ptr->cu_origin_y,
1139 3617680 : context_ptr->blk_geom->bwidth,
1140 3617680 : context_ptr->blk_geom->bheight,
1141 : ref_pic_list0,
1142 : ref_pic_list1,
1143 : candidate_buffer_ptr->prediction_ptr,
1144 3617680 : context_ptr->blk_geom->origin_x,
1145 3617680 : context_ptr->blk_geom->origin_y,
1146 : 0, // No chroma
1147 : #if MDC_ADAPTIVE_LEVEL
1148 : 8); //bit_depth 0
1149 : #else
1150 : 0); //bit_depth 0
1151 : #endif
1152 :
1153 3617560 : return return_error;
1154 : }
1155 :
1156 : int8_t av1_ref_frame_type(const MvReferenceFrame *const rf);
1157 :
1158 3616480 : uint64_t mdc_av1_full_cost(
1159 : ModeDecisionConfigurationContext *context_ptr,
1160 : uint64_t *y_distortion,
1161 : uint64_t *y_coeff_bits,
1162 : uint64_t lambda) {
1163 :
1164 : //EbErrorType return_error = EB_ErrorNone;
1165 : // Luma and chroma rate
1166 3616480 : uint64_t lumaRate = 0;
1167 3616480 : uint64_t coeffRate = 0;
1168 :
1169 : // Luma and chroma SSE
1170 : uint64_t luma_sse;
1171 : uint64_t totalDistortion;
1172 : uint64_t rate;
1173 :
1174 3616480 : lumaRate += context_ptr->candidate_buffer->candidate_ptr->fast_luma_rate;
1175 :
1176 : // Coeff rate
1177 3616480 : coeffRate = (*y_coeff_bits);
1178 3616480 : luma_sse = y_distortion[0];
1179 3616480 : totalDistortion = luma_sse;
1180 3616480 : rate = lumaRate + coeffRate;
1181 : // Assign full cost
1182 3616480 : uint64_t full_cost = RDCOST(lambda, rate, totalDistortion);
1183 :
1184 3616480 : return full_cost;
1185 : }
1186 : #endif
1187 3480 : EB_EXTERN EbErrorType open_loop_partitioning_sb(
1188 : SequenceControlSet *sequence_control_set_ptr,
1189 : PictureControlSet *picture_control_set_ptr,
1190 : ModeDecisionConfigurationContext *context_ptr,
1191 : MdcLcuData *mdc_result_tb_ptr,
1192 : uint32_t sb_originx,
1193 : uint32_t sb_originy,
1194 : uint32_t sb_index) {
1195 :
1196 3480 : EbErrorType return_error = EB_ErrorNone;
1197 : uint32_t cuIdx;
1198 : uint32_t leaf_idx;
1199 : uint32_t start_idx, end_idx;
1200 3480 : uint32_t leaf_count = mdc_result_tb_ptr->leaf_count;
1201 3480 : EbMdcLeafData *leaf_data_array = mdc_result_tb_ptr->leaf_data_array;
1202 3480 : MdcpLocalCodingUnit *local_cu_array = context_ptr->local_cu_array;
1203 : MdcpLocalCodingUnit *cu_ptr;
1204 : //CU Loop
1205 3480 : cuIdx = 0; //index over mdc array
1206 3480 : start_idx = 0;
1207 3480 : uint64_t nsq_cost[NUMBER_OF_SHAPES] = { MAX_CU_COST, MAX_CU_COST,MAX_CU_COST,MAX_CU_COST,MAX_CU_COST,
1208 : MAX_CU_COST, MAX_CU_COST,MAX_CU_COST,MAX_CU_COST,MAX_CU_COST };
1209 3480 : PART nsq_shape_table[NUMBER_OF_SHAPES] = { PART_N, PART_H, PART_V, PART_HA, PART_HB,
1210 : PART_VA, PART_VB, PART_H4, PART_V4, PART_S };
1211 3480 : uint32_t blk_idx_mds = 0;
1212 3480 : uint32_t d1_blocks_accumlated = 0;
1213 : #if ADD_SAD_FOR_128X128
1214 3480 : uint64_t me_128x128 = 0;
1215 : #endif
1216 : #if ADD_MDC_FULL_COST
1217 3480 : context_ptr->coeff_est_entropy_coder_ptr = picture_control_set_ptr->coeff_est_entropy_coder_ptr;
1218 : #endif
1219 : uint64_t tot_me_sb;
1220 : #if MDC_ADAPTIVE_LEVEL
1221 3480 : LargestCodingUnit *sb_ptr = picture_control_set_ptr->sb_ptr_array[sb_index];
1222 3480 : uint64_t depth_cost[NUMBER_OF_DEPTH] = { 0 };
1223 3480 : uint8_t depth_table[NUMBER_OF_DEPTH] = { 0, 1, 2 , 3 ,4 ,5 };
1224 : #endif
1225 : do {
1226 3830170 : EbMdcLeafData * leaf_data_ptr = &mdc_result_tb_ptr->leaf_data_array[cuIdx];
1227 3830170 : blk_idx_mds = leaf_data_array[cuIdx].mds_idx;
1228 3830170 : context_ptr->mds_idx = blk_idx_mds;
1229 3830170 : const BlockGeom * blk_geom = context_ptr->blk_geom = get_blk_geom_mds(blk_idx_mds);
1230 3829790 : uint32_t cu_origin_x = sb_originx + blk_geom->origin_x;
1231 3829790 : uint32_t cu_origin_y = sb_originy + blk_geom->origin_y;
1232 3829790 : if (!(cu_origin_x < sequence_control_set_ptr->seq_header.max_frame_width && cu_origin_y < sequence_control_set_ptr->seq_header.max_frame_height))
1233 : {
1234 212703 : cuIdx++;
1235 212703 : continue;
1236 : }
1237 3617080 : cu_ptr = &local_cu_array[cuIdx];
1238 : #if ADD_MDC_FULL_COST
1239 3617080 : context_ptr->round_origin_x = ((context_ptr->cu_origin_x >> 3) << 3);
1240 3617080 : context_ptr->round_origin_y = ((context_ptr->cu_origin_y >> 3) << 3);
1241 3617080 : EbPictureBufferDesc *input_picture_ptr = picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
1242 3617080 : context_ptr->cu_size_log2 = blk_geom->bwidth_log2;
1243 3617080 : context_ptr->cu_origin_x = sb_originx + blk_geom->origin_x;
1244 3617080 : context_ptr->cu_origin_y = sb_originy + blk_geom->origin_y;
1245 3617080 : context_ptr->sb_origin_x = sb_originx;
1246 3617080 : context_ptr->sb_origin_y = sb_originy;
1247 3617080 : uint64_t y_full_distortion[DIST_CALC_TOTAL] = { 0 };
1248 3617080 : uint32_t count_non_zero_coeffs[MAX_NUM_OF_TU_PER_CU] = { 0 };
1249 3617080 : uint64_t y_coeff_bits = 0;
1250 3617080 : const uint32_t input_origin_index = (context_ptr->cu_origin_y + input_picture_ptr->origin_y) * input_picture_ptr->stride_y + (context_ptr->cu_origin_x + input_picture_ptr->origin_x);
1251 3617080 : const uint32_t cu_origin_index = blk_geom->origin_x + blk_geom->origin_y * SB_STRIDE_Y;
1252 3617080 : context_ptr->candidate_buffer->candidate_ptr = &context_ptr->fast_candidate_array[0];
1253 3617080 : cu_ptr->best_d1_blk = blk_idx_mds;
1254 : #endif
1255 3617080 : if (picture_control_set_ptr->slice_type != I_SLICE) {
1256 3617650 : uint32_t geom_offset_x = 0;
1257 3617650 : uint32_t geom_offset_y = 0;
1258 : uint32_t me_sb_addr;
1259 3617650 : if (sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128) {
1260 0 : uint32_t me_sb_size = sequence_control_set_ptr->sb_sz;
1261 0 : uint32_t me_pic_width_in_sb = (sequence_control_set_ptr->seq_header.max_frame_width + sequence_control_set_ptr->sb_sz - 1) / me_sb_size;
1262 0 : uint32_t me_pic_height_in_sb = (sequence_control_set_ptr->seq_header.max_frame_height + sequence_control_set_ptr->sb_sz - 1) / me_sb_size;
1263 0 : tot_me_sb = me_pic_width_in_sb * me_pic_height_in_sb;
1264 0 : uint32_t me_sb_x = (cu_origin_x / me_sb_size);
1265 0 : uint32_t me_sb_y = (cu_origin_y / me_sb_size);
1266 0 : me_sb_addr = me_sb_x + me_sb_y * me_pic_width_in_sb;
1267 0 : geom_offset_x = (me_sb_x & 0x1) * me_sb_size;
1268 0 : geom_offset_y = (me_sb_y & 0x1) * me_sb_size;
1269 : #if ADD_SAD_FOR_128X128
1270 : uint64_t sb_6x6_index;
1271 0 : uint64_t sb_6x6_dist_0 = 0;
1272 0 : uint64_t sb_6x6_dist_1 = 0;
1273 0 : uint64_t sb_6x6_dist_2 = 0;
1274 0 : uint64_t sb_6x6_dist_3 = 0;
1275 0 : if (blk_geom->sq_size == 128) {
1276 0 : sb_6x6_index = me_sb_addr;
1277 0 : SbParams *sb_params = &sequence_control_set_ptr->sb_params_array[sb_6x6_index];
1278 0 : if (sb_params->is_complete_sb) {
1279 0 : MeLcuResults *me_results_64x64 = picture_control_set_ptr->parent_pcs_ptr->me_results[sb_6x6_index];
1280 0 : const MeCandidate *me_block_results_64x64 = me_results_64x64->me_candidate[0];
1281 0 : sb_6x6_dist_0 = me_block_results_64x64[0].distortion;
1282 : }
1283 0 : if (blk_geom->bsize == BLOCK_128X128 || blk_geom->bsize == BLOCK_128X64) {
1284 0 : sb_6x6_index = MIN(tot_me_sb - 1, me_sb_addr + 1);
1285 0 : SbParams *sb_params = &sequence_control_set_ptr->sb_params_array[sb_6x6_index];
1286 0 : if (sb_params->is_complete_sb) {
1287 0 : MeLcuResults *me_results_64x64 = picture_control_set_ptr->parent_pcs_ptr->me_results[sb_6x6_index];
1288 0 : const MeCandidate *me_block_results_64x64 = me_results_64x64->me_candidate[0];
1289 0 : sb_6x6_dist_1 = me_block_results_64x64[0].distortion;
1290 : }
1291 : }
1292 0 : if (blk_geom->bsize == BLOCK_128X128 || blk_geom->bsize == BLOCK_64X128) {
1293 0 : sb_6x6_index = MIN(tot_me_sb - 1, me_sb_addr + me_pic_width_in_sb);
1294 :
1295 0 : SbParams *sb_params = &sequence_control_set_ptr->sb_params_array[sb_6x6_index];
1296 0 : if (sb_params->is_complete_sb) {
1297 0 : MeLcuResults *me_results_64x64 = picture_control_set_ptr->parent_pcs_ptr->me_results[sb_6x6_index];
1298 0 : const MeCandidate *me_block_results_64x64 = me_results_64x64->me_candidate[0];
1299 0 : sb_6x6_dist_2 = me_block_results_64x64[0].distortion;
1300 : }
1301 : }
1302 0 : if (blk_geom->bsize == BLOCK_128X128) {
1303 0 : sb_6x6_index = MIN(tot_me_sb - 1, me_sb_addr + me_pic_width_in_sb + 1);
1304 0 : SbParams *sb_params = &sequence_control_set_ptr->sb_params_array[sb_6x6_index];
1305 0 : if (sb_params->is_complete_sb) {
1306 0 : MeLcuResults *me_results_64x64 = picture_control_set_ptr->parent_pcs_ptr->me_results[sb_6x6_index];
1307 0 : const MeCandidate *me_block_results_64x64 = me_results_64x64->me_candidate[0];
1308 0 : sb_6x6_dist_3 = me_block_results_64x64[0].distortion;
1309 : }
1310 : }
1311 0 : if (blk_geom->bsize == BLOCK_128X128)
1312 0 : me_128x128 = sb_6x6_dist_0 + sb_6x6_dist_1 + sb_6x6_dist_2 + sb_6x6_dist_3;
1313 0 : if (blk_geom->bsize == BLOCK_128X64) {
1314 0 : me_128x128 = sb_6x6_dist_0 + sb_6x6_dist_1 + sb_6x6_dist_2 + sb_6x6_dist_3;
1315 0 : if (blk_geom->bsize == BLOCK_64X128) {
1316 0 : me_128x128 = sb_6x6_dist_0 + sb_6x6_dist_1 + sb_6x6_dist_2 + sb_6x6_dist_3;
1317 : }
1318 : }
1319 : }
1320 : #endif
1321 : }
1322 : else
1323 3617650 : me_sb_addr = sb_index;
1324 : uint32_t max_number_of_pus_per_sb;
1325 3617650 : max_number_of_pus_per_sb = picture_control_set_ptr->parent_pcs_ptr->max_number_of_pus_per_sb;
1326 3617500 : uint32_t me_block_offset =
1327 2151980 : (blk_geom->bwidth == 4 || blk_geom->bheight == 4 || blk_geom->bwidth == 128 || blk_geom->bheight == 128) ?
1328 4243850 : 0 :
1329 1525780 : get_me_info_index(max_number_of_pus_per_sb, context_ptr->blk_geom, geom_offset_x, geom_offset_y);
1330 3617500 : MeLcuResults *me_results = picture_control_set_ptr->parent_pcs_ptr->me_results[me_sb_addr];
1331 3617500 : EbBool allow_bipred = (context_ptr->blk_geom->bwidth == 4 || context_ptr->blk_geom->bheight == 4) ? EB_FALSE : EB_TRUE;
1332 3617500 : EbBool is_compound_enabled = (picture_control_set_ptr->parent_pcs_ptr->reference_mode == SINGLE_REFERENCE) ? 0 : 1;
1333 3617500 : const MeCandidate *me_block_results = me_results->me_candidate[me_block_offset];
1334 3617500 : uint8_t total_me_cnt = me_results->total_me_candidate_index[me_block_offset];
1335 3617500 : uint8_t me_index = 0;
1336 11927000 : for (uint8_t me_candidate_index = 0; me_candidate_index < total_me_cnt; me_candidate_index++) {
1337 11927000 : const MeCandidate *me_block_results_ptr = &me_block_results[me_candidate_index];
1338 11927000 : if (is_compound_enabled) {
1339 0 : if (allow_bipred) {
1340 0 : if (me_block_results_ptr->direction == 2) {
1341 0 : me_index = me_candidate_index;
1342 0 : break;
1343 : }
1344 : }
1345 : else {
1346 0 : if (me_block_results_ptr->direction == 0) {
1347 0 : me_index = me_candidate_index;
1348 0 : break;
1349 : }
1350 : }
1351 : }
1352 : else {
1353 11927000 : if (me_block_results_ptr->direction == 0) {
1354 3617540 : me_index = me_candidate_index;
1355 3617540 : break;
1356 : }
1357 : }
1358 : }
1359 : // Initialize the mdc candidate (only av1 rate estimation inputs)
1360 3617500 : context_ptr->mdc_candidate_ptr->md_rate_estimation_ptr = context_ptr->md_rate_estimation_ptr;
1361 3617500 : context_ptr->mdc_candidate_ptr->type = INTER_MODE;
1362 3617500 : context_ptr->mdc_candidate_ptr->merge_flag = EB_FALSE;
1363 3617500 : context_ptr->mdc_candidate_ptr->prediction_direction[0] = (picture_control_set_ptr->parent_pcs_ptr->temporal_layer_index == 0) ?
1364 : UNI_PRED_LIST_0 :
1365 3430410 : me_block_results[me_index].direction;
1366 3617500 : context_ptr->mdc_candidate_ptr->inter_mode = NEARESTMV;
1367 3617500 : context_ptr->mdc_candidate_ptr->pred_mode = NEARESTMV;
1368 3617500 : context_ptr->mdc_candidate_ptr->motion_mode = SIMPLE_TRANSLATION;
1369 3617500 : context_ptr->mdc_candidate_ptr->is_new_mv = 1;
1370 3617500 : context_ptr->mdc_candidate_ptr->is_zero_mv = 0;
1371 3617500 : context_ptr->mdc_candidate_ptr->drl_index = 0;
1372 3617500 : context_ptr->mdc_candidate_ptr->motion_vector_xl0 = me_results->me_mv_array[me_block_offset][0].x_mv << 1;
1373 3617500 : context_ptr->mdc_candidate_ptr->motion_vector_yl0 = me_results->me_mv_array[me_block_offset][0].y_mv << 1;
1374 3617500 : context_ptr->mdc_candidate_ptr->motion_vector_xl1 = me_results->me_mv_array[me_block_offset][((sequence_control_set_ptr->mrp_mode == 0) ? 4 : 2)].x_mv << 1;
1375 3617500 : context_ptr->mdc_candidate_ptr->motion_vector_yl1 = me_results->me_mv_array[me_block_offset][((sequence_control_set_ptr->mrp_mode == 0) ? 4 : 2)].y_mv << 1;
1376 3617500 : context_ptr->mdc_candidate_ptr->ref_mv_index = 0;
1377 3617500 : context_ptr->mdc_candidate_ptr->pred_mv_weight = 0;
1378 3617500 : if (context_ptr->mdc_candidate_ptr->prediction_direction[0] == BI_PRED) {
1379 0 : context_ptr->mdc_candidate_ptr->ref_frame_type = LAST_BWD_FRAME;
1380 0 : context_ptr->mdc_candidate_ptr->is_compound = 1;
1381 : }
1382 3617500 : else if (context_ptr->mdc_candidate_ptr->prediction_direction[0] == UNI_PRED_LIST_0) {
1383 3617790 : context_ptr->mdc_candidate_ptr->ref_frame_type = LAST_FRAME;
1384 3617790 : context_ptr->mdc_candidate_ptr->is_compound = 0;
1385 : }
1386 : else {
1387 0 : context_ptr->mdc_candidate_ptr->ref_frame_type = BWDREF_FRAME;
1388 0 : context_ptr->mdc_candidate_ptr->is_compound = 0;
1389 : }
1390 3617500 : context_ptr->mdc_candidate_ptr->motion_vector_pred_x[REF_LIST_0] = 0;
1391 3617500 : context_ptr->mdc_candidate_ptr->motion_vector_pred_y[REF_LIST_0] = 0;
1392 : // Initialize the ref mv
1393 3617500 : memset(context_ptr->mdc_ref_mv_stack, 0, sizeof(CandidateMv));
1394 3617500 : context_ptr->mdc_cu_ptr->is_inter_ctx = 0;
1395 3617500 : context_ptr->mdc_cu_ptr->skip_flag_context = 0;
1396 3617500 : context_ptr->mdc_cu_ptr->inter_mode_ctx[context_ptr->mdc_candidate_ptr->ref_frame_type] = 0;
1397 3617500 : context_ptr->mdc_cu_ptr->reference_mode_context = 0;
1398 3617500 : context_ptr->mdc_cu_ptr->compoud_reference_type_context = 0;
1399 3617500 : av1_zero(context_ptr->mdc_cu_ptr->av1xd->neighbors_ref_counts);
1400 : #if ADD_MDC_FULL_COST
1401 3617500 : const uint8_t list0_ref_index = me_block_results[me_index].ref_idx_l0;
1402 3617500 : const uint8_t list1_ref_index = me_block_results[me_index].ref_idx_l1;
1403 3617500 : context_ptr->candidate_buffer->candidate_ptr->use_intrabc = 0;
1404 3617500 : context_ptr->candidate_buffer->candidate_ptr->motion_mode = SIMPLE_TRANSLATION;
1405 3617500 : context_ptr->candidate_buffer->candidate_ptr->md_rate_estimation_ptr = context_ptr->md_rate_estimation_ptr;
1406 3617500 : context_ptr->candidate_buffer->candidate_ptr->type = INTER_MODE;
1407 3617500 : context_ptr->candidate_buffer->candidate_ptr->merge_flag = EB_FALSE;
1408 3617500 : context_ptr->candidate_buffer->candidate_ptr->prediction_direction[0] = me_block_results[me_index].direction;
1409 3617500 : context_ptr->candidate_buffer->candidate_ptr->motion_mode = SIMPLE_TRANSLATION;
1410 3617500 : context_ptr->candidate_buffer->candidate_ptr->is_new_mv = 1;
1411 3617500 : context_ptr->candidate_buffer->candidate_ptr->is_zero_mv = 0;
1412 3617500 : context_ptr->candidate_buffer->candidate_ptr->drl_index = 0;
1413 3617500 : int16_t to_inject_mv_x_l0 = me_results->me_mv_array[me_block_offset][list0_ref_index].x_mv << 1;
1414 3617500 : int16_t to_inject_mv_y_l0 = me_results->me_mv_array[me_block_offset][list0_ref_index].y_mv << 1;
1415 3617500 : int16_t to_inject_mv_x_l1 = me_results->me_mv_array[me_block_offset][((sequence_control_set_ptr->mrp_mode == 0) ? (me_block_results[me_index].ref1_list << 2) : (me_block_results[me_index].ref1_list << 1)) + list1_ref_index].x_mv << 1;
1416 3617500 : int16_t to_inject_mv_y_l1 = me_results->me_mv_array[me_block_offset][((sequence_control_set_ptr->mrp_mode == 0) ? (me_block_results[me_index].ref1_list << 2) : (me_block_results[me_index].ref1_list << 1)) + list1_ref_index].y_mv << 1;
1417 3617500 : context_ptr->candidate_buffer->candidate_ptr->motion_vector_xl0 = to_inject_mv_x_l0;
1418 3617500 : context_ptr->candidate_buffer->candidate_ptr->motion_vector_yl0 = to_inject_mv_y_l0;
1419 3617500 : context_ptr->candidate_buffer->candidate_ptr->motion_vector_xl1 = to_inject_mv_x_l1;
1420 3617500 : context_ptr->candidate_buffer->candidate_ptr->motion_vector_yl1 = to_inject_mv_y_l1;
1421 3617500 : context_ptr->candidate_buffer->candidate_ptr->ref_mv_index = 0;
1422 3617500 : context_ptr->candidate_buffer->candidate_ptr->pred_mv_weight = 0;
1423 3617500 : if (context_ptr->candidate_buffer->candidate_ptr->prediction_direction[0] == 0) {
1424 3618030 : context_ptr->candidate_buffer->candidate_ptr->inter_mode = NEARESTMV;
1425 3618030 : context_ptr->candidate_buffer->candidate_ptr->pred_mode = NEARESTMV;
1426 3618030 : context_ptr->candidate_buffer->candidate_ptr->ref_frame_type = svt_get_ref_frame_type(REF_LIST_0, list0_ref_index);
1427 3617910 : context_ptr->candidate_buffer->candidate_ptr->ref_frame_index_l0 = list0_ref_index;
1428 3617910 : context_ptr->candidate_buffer->candidate_ptr->ref_frame_index_l1 = -1;
1429 3617910 : context_ptr->candidate_buffer->candidate_ptr->is_compound = 0;
1430 : }
1431 0 : else if (context_ptr->candidate_buffer->candidate_ptr->prediction_direction[0] == 1) {
1432 0 : context_ptr->candidate_buffer->candidate_ptr->inter_mode = NEARESTMV;
1433 0 : context_ptr->candidate_buffer->candidate_ptr->pred_mode = NEARESTMV;
1434 0 : context_ptr->candidate_buffer->candidate_ptr->ref_frame_type = svt_get_ref_frame_type(REF_LIST_1, list1_ref_index);
1435 0 : context_ptr->candidate_buffer->candidate_ptr->ref_frame_index_l0 = -1;
1436 0 : context_ptr->candidate_buffer->candidate_ptr->ref_frame_index_l1 = list1_ref_index;
1437 0 : context_ptr->candidate_buffer->candidate_ptr->is_compound = 0;
1438 : }
1439 0 : else if (context_ptr->candidate_buffer->candidate_ptr->prediction_direction[0] == 2) {
1440 : MvReferenceFrame rf[2];
1441 0 : rf[0] = svt_get_ref_frame_type(me_block_results[me_index].ref0_list, list0_ref_index);
1442 0 : rf[1] = svt_get_ref_frame_type(me_block_results[me_index].ref1_list, list1_ref_index);
1443 0 : context_ptr->candidate_buffer->candidate_ptr->ref_frame_type = av1_ref_frame_type(rf);
1444 0 : context_ptr->candidate_buffer->candidate_ptr->ref_frame_index_l0 = list0_ref_index;
1445 0 : context_ptr->candidate_buffer->candidate_ptr->ref_frame_index_l1 = list1_ref_index;
1446 0 : context_ptr->candidate_buffer->candidate_ptr->inter_mode = NEW_NEWMV;
1447 0 : context_ptr->candidate_buffer->candidate_ptr->pred_mode = NEW_NEWMV;
1448 0 : context_ptr->candidate_buffer->candidate_ptr->is_compound = 1;
1449 : }
1450 : else {
1451 0 : SVT_LOG("mdc invalid pred_direction");
1452 : }
1453 3618040 : context_ptr->candidate_buffer->candidate_ptr->motion_vector_pred_x[REF_LIST_0] = 0;
1454 3618040 : context_ptr->candidate_buffer->candidate_ptr->motion_vector_pred_y[REF_LIST_0] = 0;
1455 3618040 : context_ptr->candidate_buffer->candidate_ptr->motion_vector_pred_x[REF_LIST_1] = 0;
1456 3618040 : context_ptr->candidate_buffer->candidate_ptr->motion_vector_pred_y[REF_LIST_1] = 0;
1457 3618040 : context_ptr->candidate_buffer->candidate_ptr->interp_filters = 0;
1458 3618040 : context_ptr->candidate_buffer->candidate_ptr->compound_idx = 0;
1459 3618040 : context_ptr->candidate_buffer->candidate_ptr->interinter_comp.type = COMPOUND_AVERAGE;
1460 3618040 : context_ptr->mdc_cu_ptr->is_inter_ctx = 0;
1461 3618040 : context_ptr->mdc_cu_ptr->skip_flag_context = 0;
1462 3618040 : context_ptr->mdc_cu_ptr->inter_mode_ctx[context_ptr->candidate_buffer->candidate_ptr->ref_frame_type] = 0;
1463 3618040 : context_ptr->mdc_cu_ptr->reference_mode_context = 0;
1464 3618040 : context_ptr->mdc_cu_ptr->compoud_reference_type_context = 0;
1465 3618040 : av1_zero(context_ptr->mdc_cu_ptr->av1xd->neighbors_ref_counts);
1466 3618040 : uint16_t txb_count = context_ptr->blk_geom->txb_count[0];
1467 7236140 : for (uint16_t txb_itr = 0; txb_itr < txb_count; txb_itr++)
1468 3618100 : context_ptr->candidate_buffer->candidate_ptr->transform_type[txb_itr] = DCT_DCT;
1469 :
1470 3618040 : mdc_inter_pu_prediction_av1(
1471 : context_ptr,
1472 : picture_control_set_ptr,
1473 : context_ptr->candidate_buffer);
1474 :
1475 : //Y Residual
1476 3617600 : residual_kernel8bit(
1477 3617600 : &(input_picture_ptr->buffer_y[input_origin_index]),
1478 3617600 : input_picture_ptr->stride_y,
1479 3617600 : &(context_ptr->candidate_buffer->prediction_ptr->buffer_y[cu_origin_index]),
1480 3617600 : context_ptr->candidate_buffer->prediction_ptr->stride_y/* 64*/,
1481 3617600 : &(((int16_t*)context_ptr->candidate_buffer->residual_ptr->buffer_y)[cu_origin_index]),
1482 3617600 : context_ptr->candidate_buffer->residual_ptr->stride_y,
1483 3617600 : context_ptr->blk_geom->bwidth,
1484 3617600 : context_ptr->blk_geom->bheight);
1485 :
1486 3617460 : context_ptr->candidate_buffer->candidate_ptr->tx_depth = 0;
1487 3617460 : context_ptr->spatial_sse_full_loop = 0;
1488 :
1489 3617460 : mdc_full_loop(
1490 : context_ptr->candidate_buffer,
1491 : context_ptr,
1492 : picture_control_set_ptr,
1493 3617460 : context_ptr->qp,
1494 : count_non_zero_coeffs,
1495 : &y_coeff_bits,
1496 : y_full_distortion);
1497 7234930 : for (uint8_t txb_itr = 0; txb_itr < txb_count; txb_itr++)
1498 3617460 : context_ptr->mdc_cu_ptr->quantized_dc[0][txb_itr] = context_ptr->candidate_buffer->candidate_ptr->quantized_dc[0][txb_itr];
1499 :
1500 : #endif
1501 : // Fast Cost Calc
1502 : #if! ADD_MDC_FULL_COST
1503 : cu_ptr->early_cost = av1_inter_fast_cost(
1504 : context_ptr->mdc_cu_ptr,
1505 : context_ptr->mdc_candidate_ptr,
1506 : context_ptr->qp,
1507 :
1508 : me_block_results[me_index].distortion,
1509 :
1510 : (uint64_t)0,
1511 : context_ptr->lambda,
1512 : 0,
1513 : picture_control_set_ptr,
1514 : context_ptr->mdc_ref_mv_stack,
1515 : blk_geom,
1516 : (sb_originy + blk_geom->origin_y) >> MI_SIZE_LOG2,
1517 : (sb_originx + blk_geom->origin_x) >> MI_SIZE_LOG2,
1518 : 0,
1519 : DC_PRED, // Hsan: neighbor not generated @ open loop partitioning
1520 : DC_PRED); // Hsan: neighbor not generated @ open loop partitioning
1521 : #endif
1522 : #if ADD_MDC_FULL_COST
1523 0 : mdc_av1_inter_fast_cost(
1524 : context_ptr->mdc_cu_ptr,
1525 3617460 : context_ptr->candidate_buffer->candidate_ptr,
1526 3617460 : blk_geom->sq_size == 128 ? me_128x128 : me_block_results[me_index].distortion,
1527 : context_ptr->lambda,
1528 : 0,
1529 : picture_control_set_ptr,
1530 : context_ptr->mdc_ref_mv_stack,
1531 : blk_geom);
1532 :
1533 3616570 : cu_ptr->early_cost = mdc_av1_full_cost(
1534 : context_ptr,
1535 : y_full_distortion,
1536 : &y_coeff_bits,
1537 : context_ptr->full_lambda);
1538 : #endif
1539 : }
1540 3617280 : if (blk_geom->nsi + 1 == blk_geom->totns)
1541 2079740 : nsq_cost[context_ptr->blk_geom->shape] = mdc_d1_non_square_block_decision(context_ptr);
1542 3617450 : d1_blocks_accumlated = blk_geom->shape == PART_N ? 1 : d1_blocks_accumlated + 1;
1543 3617450 : if (d1_blocks_accumlated == leaf_data_ptr->tot_d1_blocks) {
1544 1110540 : end_idx = cuIdx + 1;
1545 : //Sorting
1546 : uint32_t i, j, index;
1547 11102800 : for (i = 0; i < NUMBER_OF_SHAPES - 1; ++i) {
1548 59955000 : for (j = i + 1; j < NUMBER_OF_SHAPES; ++j) {
1549 49962700 : if (nsq_cost[nsq_shape_table[j]] < nsq_cost[nsq_shape_table[i]]) {
1550 563085 : index = nsq_shape_table[i];
1551 563085 : nsq_shape_table[i] = nsq_shape_table[j];
1552 563085 : nsq_shape_table[j] = index;
1553 : }
1554 : }
1555 : }
1556 : #if MDC_ADAPTIVE_LEVEL
1557 1110540 : depth_cost[get_depth(context_ptr->blk_geom->sq_size)] += nsq_cost[nsq_shape_table[0]];
1558 : #endif
1559 : // Assign ranking # to each block
1560 4859900 : for (leaf_idx = start_idx; leaf_idx < end_idx; leaf_idx++) {
1561 3749520 : EbMdcLeafData * current_depth_leaf_data = &mdc_result_tb_ptr->leaf_data_array[leaf_idx];
1562 : #if COMBINE_MDC_NSQ_TABLE
1563 3749520 : current_depth_leaf_data->ol_best_nsq_shape1 = nsq_shape_table[0];
1564 3749520 : current_depth_leaf_data->ol_best_nsq_shape2 = nsq_shape_table[1];
1565 3749520 : current_depth_leaf_data->ol_best_nsq_shape3 = nsq_shape_table[2];
1566 3749520 : current_depth_leaf_data->ol_best_nsq_shape4 = nsq_shape_table[3];
1567 3749520 : current_depth_leaf_data->ol_best_nsq_shape5 = nsq_shape_table[4];
1568 3749520 : current_depth_leaf_data->ol_best_nsq_shape6 = nsq_shape_table[5];
1569 3749520 : current_depth_leaf_data->ol_best_nsq_shape7 = nsq_shape_table[6];
1570 3749520 : current_depth_leaf_data->ol_best_nsq_shape8 = nsq_shape_table[7];
1571 : #endif
1572 : }
1573 : //Reset nsq table
1574 : //memset(nsq_cost, MAX_CU_COST,NUMBER_OF_SHAPES*sizeof(uint64_t));
1575 12214200 : for (int cost_idx = 0; cost_idx < NUMBER_OF_SHAPES; cost_idx++)
1576 11103800 : nsq_cost[cost_idx] = MAX_CU_COST;
1577 12214400 : for (int sh = 0; sh < NUMBER_OF_SHAPES; sh++)
1578 11104000 : nsq_shape_table[sh] = (PART)sh;
1579 1110380 : start_idx = end_idx;
1580 1110380 : uint32_t last_cu_index = mdc_d2_inter_depth_block_decision(
1581 : picture_control_set_ptr,
1582 : context_ptr,
1583 : leaf_data_ptr,
1584 1110380 : blk_geom->sqi_mds,//input is parent square,
1585 : sb_index);
1586 1110550 : if (last_cu_index)
1587 1105630 : last_cu_index = 0;
1588 : }
1589 3617470 : cuIdx++;
1590 3830170 : } while (cuIdx < leaf_count);// End of CU loop
1591 : #if MDC_ADAPTIVE_LEVEL
1592 3480 : if (sequence_control_set_ptr->seq_header.sb_size == BLOCK_64X64)
1593 3480 : depth_cost[0] = MAX_CU_COST;
1594 : //Sorting
1595 : {
1596 : uint32_t i, j, index;
1597 20880 : for (i = 0; i < NUMBER_OF_DEPTH - 1; ++i) {
1598 69600 : for (j = i + 1; j < NUMBER_OF_DEPTH; ++j) {
1599 52200 : if (depth_cost[depth_table[j]] < depth_cost[depth_table[i]]) {
1600 18873 : index = depth_table[i];
1601 18873 : depth_table[i] = depth_table[j];
1602 18873 : depth_table[j] = index;
1603 : }
1604 : }
1605 : }
1606 : }
1607 24360 : for (uint8_t depth_idx = 0; depth_idx < NUMBER_OF_DEPTH; depth_idx++) {
1608 20880 : sb_ptr->depth_ranking[depth_idx] = find_depth_index(depth_idx, depth_table);
1609 20880 : sb_ptr->depth_cost[depth_idx] = depth_cost[depth_idx];
1610 : }
1611 : #endif
1612 3480 : return return_error;
1613 : }
1614 : #endif
1615 :
1616 2625 : void PredictionPartitionLoop(
1617 : SequenceControlSet *sequence_control_set_ptr,
1618 : PictureControlSet *picture_control_set_ptr,
1619 : uint32_t sb_index,
1620 : uint32_t tbOriginX,
1621 : uint32_t tbOriginY,
1622 : uint32_t startDepth,
1623 : uint32_t endDepth,
1624 : ModeDecisionConfigurationContext *context_ptr){
1625 2625 : MdcpLocalCodingUnit *local_cu_array = context_ptr->local_cu_array;
1626 : MdcpLocalCodingUnit *cu_ptr;
1627 :
1628 2625 : SbParams *sb_params = &sequence_control_set_ptr->sb_params_array[sb_index];
1629 : uint32_t cuIndexInRaterScan;
1630 2625 : uint32_t cu_index = 0;
1631 2625 : uint32_t start_index = 0;
1632 :
1633 : (void)tbOriginX;
1634 : (void)tbOriginY;
1635 :
1636 : const CodedUnitStats *cuStatsPtr;
1637 :
1638 2625 : FrameHeader *frm_hdr = &picture_control_set_ptr->parent_pcs_ptr->frm_hdr;
1639 :
1640 224889 : for (cu_index = start_index; cu_index < CU_MAX_COUNT; ++cu_index)
1641 :
1642 : {
1643 222269 : local_cu_array[cu_index].selected_cu = EB_FALSE;
1644 222269 : local_cu_array[cu_index].stop_split = EB_FALSE;
1645 :
1646 222269 : cu_ptr = &local_cu_array[cu_index];
1647 222269 : cuIndexInRaterScan = md_scan_to_raster_scan[cu_index];
1648 222269 : if (sb_params->raster_scan_cu_validity[cuIndexInRaterScan])
1649 : {
1650 : uint32_t depth;
1651 202647 : cuStatsPtr = get_coded_unit_stats(cu_index);
1652 :
1653 202485 : depth = cuStatsPtr->depth;
1654 202485 : cu_ptr->early_split_flag = (depth < endDepth) ? EB_TRUE : EB_FALSE;
1655 :
1656 202485 : if (depth >= startDepth && depth <= endDepth) {
1657 : //reset the flags here: all CU splitFalg=TRUE. default: we always split. interDepthDecision will select where to stop splitting(ie setting the flag to False)
1658 :
1659 202557 : if (picture_control_set_ptr->slice_type != I_SLICE) {
1660 202576 : const MeLcuResults *me_results = picture_control_set_ptr->parent_pcs_ptr->me_results[sb_index];
1661 202576 : const MeCandidate *me_block_results = me_results->me_candidate[cuIndexInRaterScan];
1662 202576 : uint8_t total_me_cnt = me_results->total_me_candidate_index[cuIndexInRaterScan];
1663 202576 : uint8_t me_index = 0;
1664 430386 : for (uint8_t me_candidate_index = 0; me_candidate_index < total_me_cnt; me_candidate_index++) {
1665 430519 : const MeCandidate *me_block_results_ptr = &me_block_results[me_candidate_index];
1666 430519 : if (frm_hdr->reference_mode == SINGLE_REFERENCE) {
1667 10850 : if (me_block_results_ptr->direction == 0) {
1668 10850 : me_index = me_candidate_index;
1669 10850 : break;
1670 : }
1671 : }
1672 : else {
1673 419669 : if (me_block_results_ptr->direction == 2) {
1674 191859 : me_index = me_candidate_index;
1675 191859 : break;
1676 : }
1677 : }
1678 : }
1679 :
1680 : //const MeCandidate_t *me_results = &me_block_results[me_index];
1681 :
1682 : // Initialize the mdc candidate (only av1 rate estimation inputs)
1683 : // Hsan: mode, direction, .. could be modified toward better early inter depth decision (e.g. NEARESTMV instead of NEWMV)
1684 202576 : context_ptr->mdc_candidate_ptr->md_rate_estimation_ptr = context_ptr->md_rate_estimation_ptr;
1685 202576 : context_ptr->mdc_candidate_ptr->type = INTER_MODE;
1686 202576 : context_ptr->mdc_candidate_ptr->merge_flag = EB_FALSE;
1687 202576 : context_ptr->mdc_candidate_ptr->prediction_direction[0] = (picture_control_set_ptr->parent_pcs_ptr->temporal_layer_index == 0) ?
1688 : UNI_PRED_LIST_0 :
1689 201895 : me_block_results[me_index].direction;
1690 : // Hsan: what's the best mode for rate simulation
1691 202576 : context_ptr->mdc_candidate_ptr->inter_mode = NEARESTMV;
1692 202576 : context_ptr->mdc_candidate_ptr->pred_mode = NEARESTMV;
1693 202576 : context_ptr->mdc_candidate_ptr->motion_mode = SIMPLE_TRANSLATION;
1694 202576 : context_ptr->mdc_candidate_ptr->is_new_mv = 1;
1695 202576 : context_ptr->mdc_candidate_ptr->is_zero_mv = 0;
1696 202576 : context_ptr->mdc_candidate_ptr->drl_index = 0;
1697 202576 : context_ptr->mdc_candidate_ptr->motion_vector_xl0 = me_results->me_mv_array[cuIndexInRaterScan][0].x_mv << 1;
1698 202576 : context_ptr->mdc_candidate_ptr->motion_vector_yl0 = me_results->me_mv_array[cuIndexInRaterScan][0].y_mv << 1;
1699 202576 : context_ptr->mdc_candidate_ptr->motion_vector_xl1 = me_results->me_mv_array[cuIndexInRaterScan][((sequence_control_set_ptr->mrp_mode == 0) ? 4 : 2)].x_mv << 1;
1700 202576 : context_ptr->mdc_candidate_ptr->motion_vector_yl1 = me_results->me_mv_array[cuIndexInRaterScan][((sequence_control_set_ptr->mrp_mode == 0) ? 4 : 2)].y_mv << 1;
1701 202576 : context_ptr->mdc_candidate_ptr->ref_mv_index = 0;
1702 202576 : context_ptr->mdc_candidate_ptr->pred_mv_weight = 0;
1703 202576 : if (context_ptr->mdc_candidate_ptr->prediction_direction[0] == BI_PRED) {
1704 191336 : context_ptr->mdc_candidate_ptr->ref_frame_type = LAST_BWD_FRAME;
1705 191336 : context_ptr->mdc_candidate_ptr->is_compound = 1;
1706 : }
1707 11240 : else if (context_ptr->mdc_candidate_ptr->prediction_direction[0] == UNI_PRED_LIST_0) {
1708 11700 : context_ptr->mdc_candidate_ptr->ref_frame_type = LAST_FRAME;
1709 11700 : context_ptr->mdc_candidate_ptr->is_compound = 0;
1710 : }
1711 : else { // context_ptr->mdc_candidate_ptr->prediction_direction[0]
1712 0 : context_ptr->mdc_candidate_ptr->ref_frame_type = BWDREF_FRAME;
1713 0 : context_ptr->mdc_candidate_ptr->is_compound = 0;
1714 : }
1715 202576 : context_ptr->mdc_candidate_ptr->motion_vector_pred_x[REF_LIST_0] = 0;
1716 202576 : context_ptr->mdc_candidate_ptr->motion_vector_pred_y[REF_LIST_0] = 0;
1717 : // Initialize the ref mv
1718 202576 : memset(context_ptr->mdc_ref_mv_stack,0,sizeof(CandidateMv));
1719 202576 : context_ptr->blk_geom = get_blk_geom_mds(pa_to_ep_block_index[cu_index]);
1720 : // Initialize mdc cu (only av1 rate estimation inputs)
1721 202885 : context_ptr->mdc_cu_ptr->is_inter_ctx = 0;
1722 202885 : context_ptr->mdc_cu_ptr->skip_flag_context = 0;
1723 202885 : context_ptr->mdc_cu_ptr->inter_mode_ctx[context_ptr->mdc_candidate_ptr->ref_frame_type] = 0;
1724 202885 : context_ptr->mdc_cu_ptr->reference_mode_context = 0;
1725 202885 : context_ptr->mdc_cu_ptr->compoud_reference_type_context = 0;
1726 202885 : av1_zero(context_ptr->mdc_cu_ptr->av1xd->neighbors_ref_counts); // Hsan: neighbor not generated @ open loop partitioning => assumes always (0,0)
1727 :
1728 : // Fast Cost Calc
1729 202937 : cu_ptr->early_cost = av1_inter_fast_cost(
1730 : context_ptr->mdc_cu_ptr,
1731 : context_ptr->mdc_candidate_ptr,
1732 202885 : context_ptr->qp,
1733 202885 : me_block_results[me_index].distortion,
1734 : (uint64_t) 0,
1735 : context_ptr->lambda,
1736 : 0,
1737 : picture_control_set_ptr,
1738 : context_ptr->mdc_ref_mv_stack,
1739 : context_ptr->blk_geom,
1740 202885 : (tbOriginY + context_ptr->blk_geom->origin_y) >> MI_SIZE_LOG2,
1741 202885 : (tbOriginX + context_ptr->blk_geom->origin_x) >> MI_SIZE_LOG2,
1742 : 0,
1743 : DC_PRED, // Hsan: neighbor not generated @ open loop partitioning
1744 : DC_PRED); // Hsan: neighbor not generated @ open loop partitioning
1745 : }
1746 :
1747 202918 : if (endDepth == 2)
1748 0 : context_ptr->group_of8x8_blocks_count = depth == 2 ? incrementalCount[cuIndexInRaterScan] : 0;
1749 202918 : if (endDepth == 1)
1750 0 : context_ptr->group_of16x16_blocks_count = depth == 1 ? incrementalCount[cuIndexInRaterScan] : 0;
1751 202918 : MdcInterDepthDecision(
1752 : context_ptr,
1753 202918 : cuStatsPtr->origin_x,
1754 202918 : cuStatsPtr->origin_y,
1755 : endDepth,
1756 : cu_index);
1757 : }
1758 : else
1759 0 : cu_ptr->early_cost = ~0u;
1760 : }
1761 : }// End CU Loop
1762 2620 : }
1763 :
1764 2625 : EbErrorType early_mode_decision_lcu(
1765 : SequenceControlSet *sequence_control_set_ptr,
1766 : PictureControlSet *picture_control_set_ptr,
1767 : LargestCodingUnit *sb_ptr,
1768 : uint32_t sb_index,
1769 : ModeDecisionConfigurationContext *context_ptr){
1770 2625 : EbErrorType return_error = EB_ErrorNone;
1771 2625 : uint32_t tbOriginX = sb_ptr->origin_x;
1772 2625 : uint32_t tbOriginY = sb_ptr->origin_y;
1773 :
1774 2625 : uint32_t startDepth = DEPTH_64;
1775 :
1776 2625 : uint32_t endDepth = DEPTH_8 ;
1777 2625 : context_ptr->group_of8x8_blocks_count = 0;
1778 2625 : context_ptr->group_of16x16_blocks_count = 0;
1779 :
1780 2625 : PredictionPartitionLoop(
1781 : sequence_control_set_ptr,
1782 : picture_control_set_ptr,
1783 : sb_index,
1784 : tbOriginX,
1785 : tbOriginY,
1786 : startDepth,
1787 : endDepth,
1788 : context_ptr
1789 : );
1790 :
1791 2625 : RefinementPredictionLoop(
1792 : sequence_control_set_ptr,
1793 : picture_control_set_ptr,
1794 : sb_index,
1795 : context_ptr);
1796 :
1797 2625 : ForwardCuToModeDecision(
1798 : sequence_control_set_ptr,
1799 : picture_control_set_ptr,
1800 : sb_index,
1801 : context_ptr);
1802 :
1803 2625 : return return_error;
1804 : }
1805 :
|