Line data Source code
1 : /*
2 : * Copyright(c) 2019 Intel Corporation
3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 : */
5 :
6 : /*
7 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
8 : *
9 : * This source code is subject to the terms of the BSD 2 Clause License and
10 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
11 : * was not distributed with this source code in the LICENSE file, you can
12 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
13 : * Media Patent License 1.0 was not distributed with this source code in the
14 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
15 : */
16 :
17 : #include <stdlib.h>
18 :
19 : #include "EbDefinitions.h"
20 : #include "EbUtility.h"
21 : #include "EbTransformUnit.h"
22 : #include "EbRateDistortionCost.h"
23 : #include "EbFullLoop.h"
24 : #include "EbPictureOperators.h"
25 :
26 : #include "EbModeDecisionProcess.h"
27 : #include "EbComputeSAD.h"
28 : #include "EbTransforms.h"
29 : #include "EbMeSadCalculation.h"
30 : #include "EbMotionEstimation.h"
31 : #include "EbAvcStyleMcp.h"
32 : #include "aom_dsp_rtcd.h"
33 : #include "EbCodingLoop.h"
34 :
35 : #define PREDICTIVE_ME_MAX_MVP_CANIDATES 4
36 : #define PREDICTIVE_ME_DEVIATION_TH 50
37 : #define FULL_PEL_REF_WINDOW_WIDTH 7
38 : #define FULL_PEL_REF_WINDOW_HEIGHT 5
39 : #define HALF_PEL_REF_WINDOW 3
40 : #define QUARTER_PEL_REF_WINDOW 3
41 : #if EIGHT_PEL_PREDICTIVE_ME
42 : #define EIGHT_PEL_REF_WINDOW 3
43 : #endif
44 : EbErrorType generate_md_stage_0_cand(
45 : LargestCodingUnit *sb_ptr,
46 : ModeDecisionContext *context_ptr,
47 : SsMeContext *ss_mecontext,
48 : uint32_t *fast_candidate_total_count,
49 : PictureControlSet *picture_control_set_ptr);
50 :
51 : #if II_COMP_FLAG
52 811366 : static INLINE int is_interintra_allowed_bsize(const BlockSize bsize) {
53 811366 : return (bsize >= BLOCK_8X8) && (bsize <= BLOCK_32X32);
54 : }
55 : void precompute_intra_pred_for_inter_intra(
56 : PictureControlSet *picture_control_set_ptr,
57 : ModeDecisionContext *context_ptr);
58 : #endif
59 :
60 : #if PAL_SUP
61 : int svt_av1_allow_palette(int allow_palette,
62 : BlockSize sb_type);
63 : #endif
64 : /*******************************************
65 : * set Penalize Skip Flag
66 : *
67 : * Summary: Set the penalize_skipflag to true
68 : * When there is luminance/chrominance change
69 : * or in noisy clip with low motion at meduim
70 : * varince area
71 : *
72 : *******************************************/
73 :
74 : const EbPredictionFunc ProductPredictionFunTable[3] = { NULL, inter_pu_prediction_av1, eb_av1_intra_prediction_cl};
75 :
76 : const EbFastCostFunc Av1ProductFastCostFuncTable[3] =
77 : {
78 : NULL,
79 : av1_inter_fast_cost, /*INTER */
80 : av1_intra_fast_cost /*INTRA */
81 : };
82 :
83 : const EbAv1FullCostFunc Av1ProductFullCostFuncTable[3] =
84 : {
85 : NULL,
86 : av1_inter_full_cost, /*INTER */
87 : av1_intra_full_cost/*INTRA */
88 : };
89 :
90 : /***************************************************
91 : * Update Recon Samples Neighbor Arrays
92 : ***************************************************/
93 1217070 : void mode_decision_update_neighbor_arrays(
94 : PictureControlSet *picture_control_set_ptr,
95 : ModeDecisionContext *context_ptr,
96 : uint32_t index_mds,
97 : EbBool intraMdOpenLoop,
98 : EbBool intra4x4Selected){
99 1217070 : uint32_t bwdith = context_ptr->blk_geom->bwidth;
100 1217070 : uint32_t bheight = context_ptr->blk_geom->bheight;
101 :
102 1217070 : uint32_t origin_x = context_ptr->cu_origin_x;
103 1217070 : uint32_t origin_y = context_ptr->cu_origin_y;
104 : (void)intra4x4Selected;
105 :
106 1217070 : uint32_t cu_origin_x_uv = context_ptr->round_origin_x >> 1;
107 1217070 : uint32_t cu_origin_y_uv = context_ptr->round_origin_y >> 1;
108 1217070 : uint32_t bwdith_uv = context_ptr->blk_geom->bwidth_uv;
109 1217070 : uint32_t bwheight_uv = context_ptr->blk_geom->bheight_uv;
110 :
111 1217070 : uint8_t modeType = context_ptr->cu_ptr->prediction_mode_flag;
112 1217070 : uint8_t intra_luma_mode = (uint8_t)context_ptr->cu_ptr->pred_mode;
113 1217070 : uint8_t chroma_mode = (uint8_t)context_ptr->cu_ptr->prediction_unit_array->intra_chroma_mode;
114 1217070 : uint8_t skip_flag = (uint8_t)context_ptr->cu_ptr->skip_flag;
115 :
116 1217070 : context_ptr->mv_unit.pred_direction = (uint8_t)(context_ptr->md_cu_arr_nsq[index_mds].prediction_unit_array[0].inter_pred_direction_index);
117 1217070 : context_ptr->mv_unit.mv[REF_LIST_0].mv_union = context_ptr->md_cu_arr_nsq[index_mds].prediction_unit_array[0].mv[REF_LIST_0].mv_union;
118 1217070 : context_ptr->mv_unit.mv[REF_LIST_1].mv_union = context_ptr->md_cu_arr_nsq[index_mds].prediction_unit_array[0].mv[REF_LIST_1].mv_union;
119 1217070 : uint8_t inter_pred_direction_index = (uint8_t)context_ptr->cu_ptr->prediction_unit_array->inter_pred_direction_index;
120 1217070 : uint8_t ref_frame_type = (uint8_t)context_ptr->cu_ptr->prediction_unit_array[0].ref_frame_type;
121 :
122 1217070 : if (picture_control_set_ptr->parent_pcs_ptr->interpolation_search_level != IT_SEARCH_OFF)
123 1132860 : neighbor_array_unit_mode_write32(
124 : context_ptr->interpolation_type_neighbor_array,
125 1132860 : context_ptr->cu_ptr->interp_filters,
126 : origin_x,
127 : origin_y,
128 : bwdith,
129 : bheight,
130 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
131 :
132 : {
133 : struct PartitionContext partition;
134 1217040 : partition.above = partition_context_lookup[context_ptr->blk_geom->bsize].above;
135 1217040 : partition.left = partition_context_lookup[context_ptr->blk_geom->bsize].left;
136 :
137 1217040 : neighbor_array_unit_mode_write(
138 : context_ptr->leaf_partition_neighbor_array,
139 : (uint8_t*)(&partition), // NaderM
140 : origin_x,
141 : origin_y,
142 : bwdith,
143 : bheight,
144 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
145 :
146 : // Mode Type Update
147 1217130 : neighbor_array_unit_mode_write(
148 : context_ptr->mode_type_neighbor_array,
149 : &modeType,
150 : origin_x,
151 : origin_y,
152 : bwdith,
153 : bheight,
154 : NEIGHBOR_ARRAY_UNIT_FULL_MASK);
155 1217140 : if (picture_control_set_ptr->parent_pcs_ptr->skip_sub_blks)
156 : // Intra Luma Mode Update
157 0 : neighbor_array_unit_mode_write(
158 : context_ptr->leaf_depth_neighbor_array,
159 0 : (uint8_t*)&context_ptr->blk_geom->bsize,//(uint8_t*)luma_mode,
160 : origin_x,
161 : origin_y,
162 : bwdith,
163 : bheight,
164 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
165 : // Intra Luma Mode Update
166 1217140 : neighbor_array_unit_mode_write(
167 : context_ptr->intra_luma_mode_neighbor_array,
168 : &intra_luma_mode,//(uint8_t*)luma_mode,
169 : origin_x,
170 : origin_y,
171 : bwdith,
172 : bheight,
173 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
174 :
175 1217130 : uint16_t txb_count = context_ptr->blk_geom->txb_count[context_ptr->cu_ptr->tx_depth];
176 2490580 : for (uint8_t txb_itr = 0; txb_itr < txb_count; txb_itr++)
177 : {
178 1273460 : uint8_t dc_sign_level_coeff = (int32_t)context_ptr->cu_ptr->quantized_dc[0][txb_itr];
179 :
180 1273460 : neighbor_array_unit_mode_write(
181 : context_ptr->luma_dc_sign_level_coeff_neighbor_array,
182 : (uint8_t*)&dc_sign_level_coeff,
183 1273460 : context_ptr->sb_origin_x + context_ptr->blk_geom->tx_org_x[context_ptr->cu_ptr->tx_depth][txb_itr],
184 1273460 : context_ptr->sb_origin_y + context_ptr->blk_geom->tx_org_y[context_ptr->cu_ptr->tx_depth][txb_itr],
185 1273460 : context_ptr->blk_geom->tx_width[context_ptr->cu_ptr->tx_depth][txb_itr],
186 1273460 : context_ptr->blk_geom->tx_height[context_ptr->cu_ptr->tx_depth][txb_itr],
187 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
188 :
189 1273450 : neighbor_array_unit_mode_write(
190 : picture_control_set_ptr->md_tx_depth_1_luma_dc_sign_level_coeff_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX],
191 : (uint8_t*)&dc_sign_level_coeff,
192 1273450 : context_ptr->sb_origin_x + context_ptr->blk_geom->tx_org_x[context_ptr->cu_ptr->tx_depth][txb_itr],
193 1273450 : context_ptr->sb_origin_y + context_ptr->blk_geom->tx_org_y[context_ptr->cu_ptr->tx_depth][txb_itr],
194 1273450 : context_ptr->blk_geom->tx_width[context_ptr->cu_ptr->tx_depth][txb_itr],
195 1273450 : context_ptr->blk_geom->tx_height[context_ptr->cu_ptr->tx_depth][txb_itr],
196 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
197 : }
198 : }
199 :
200 : // Hsan: chroma mode rate estimation is kept even for chroma blind
201 1217120 : if (context_ptr->blk_geom->has_uv) {
202 : // Intra Chroma Mode Update
203 818364 : neighbor_array_unit_mode_write(
204 : context_ptr->intra_chroma_mode_neighbor_array,
205 : &chroma_mode,
206 : cu_origin_x_uv,
207 : cu_origin_y_uv,
208 : bwdith_uv,
209 : bwheight_uv,
210 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
211 : }
212 :
213 1217140 : neighbor_array_unit_mode_write(
214 : context_ptr->skip_flag_neighbor_array,
215 : &skip_flag,
216 : origin_x,
217 : origin_y,
218 : bwdith,
219 : bheight,
220 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
221 :
222 1217150 : if (context_ptr->blk_geom->has_uv && context_ptr->chroma_level <= CHROMA_MODE_1) {
223 : // Update chroma CB cbf and Dc context
224 : {
225 734071 : uint8_t dc_sign_level_coeff = (int32_t)context_ptr->cu_ptr->quantized_dc[1][0];
226 734071 : neighbor_array_unit_mode_write(
227 : context_ptr->cb_dc_sign_level_coeff_neighbor_array,
228 : (uint8_t*)&dc_sign_level_coeff,
229 : cu_origin_x_uv,
230 : cu_origin_y_uv,
231 : bwdith_uv,
232 : bwheight_uv,
233 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
234 : }
235 :
236 : // Update chroma CR cbf and Dc context
237 : {
238 734067 : uint8_t dc_sign_level_coeff = (int32_t)context_ptr->cu_ptr->quantized_dc[2][0];
239 734067 : neighbor_array_unit_mode_write(
240 : context_ptr->cr_dc_sign_level_coeff_neighbor_array,
241 : (uint8_t*)&dc_sign_level_coeff,
242 : cu_origin_x_uv,
243 : cu_origin_y_uv,
244 : bwdith_uv,
245 : bwheight_uv,
246 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
247 : }
248 : }
249 : #if ENHANCE_ATB
250 1217140 : uint8_t tx_size = tx_depth_to_tx_size[context_ptr->cu_ptr->tx_depth][context_ptr->blk_geom->bsize];
251 1217140 : uint8_t bw = tx_size_wide[tx_size];
252 1217140 : uint8_t bh = tx_size_high[tx_size];
253 :
254 1217140 : neighbor_array_unit_mode_write(
255 : context_ptr->txfm_context_array,
256 : &bw,
257 : origin_x,
258 : origin_y,
259 : bwdith,
260 : bheight,
261 : NEIGHBOR_ARRAY_UNIT_TOP_MASK);
262 :
263 1217170 : neighbor_array_unit_mode_write(
264 : context_ptr->txfm_context_array,
265 : &bh,
266 : origin_x,
267 : origin_y,
268 : bwdith,
269 : bheight,
270 : NEIGHBOR_ARRAY_UNIT_LEFT_MASK);
271 : #else
272 : neighbor_array_unit_mode_write(
273 : context_ptr->txfm_context_array,
274 : &context_ptr->cu_ptr->tx_depth,
275 : origin_x,
276 : origin_y,
277 : bwdith,
278 : bheight,
279 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
280 : #endif
281 :
282 : // Update the Inter Pred Type Neighbor Array
283 :
284 1217150 : neighbor_array_unit_mode_write(
285 : context_ptr->inter_pred_dir_neighbor_array,
286 : &inter_pred_direction_index,
287 : origin_x,
288 : origin_y,
289 : bwdith,
290 : bheight,
291 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
292 :
293 : // Update the refFrame Type Neighbor Array
294 1217140 : neighbor_array_unit_mode_write(
295 : context_ptr->ref_frame_type_neighbor_array,
296 : &ref_frame_type,
297 : origin_x,
298 : origin_y,
299 : bwdith,
300 : bheight,
301 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
302 :
303 1217160 : if (!context_ptr->hbd_mode_decision) {
304 1217160 : if (intraMdOpenLoop == EB_FALSE)
305 : {
306 1217160 : update_recon_neighbor_array(
307 : context_ptr->luma_recon_neighbor_array,
308 1217160 : context_ptr->cu_ptr->neigh_top_recon[0],
309 1217160 : context_ptr->cu_ptr->neigh_left_recon[0],
310 : origin_x,
311 : origin_y,
312 1217160 : context_ptr->blk_geom->bwidth,
313 1217160 : context_ptr->blk_geom->bheight);
314 1217200 : if (picture_control_set_ptr->parent_pcs_ptr->atb_mode) {
315 159544 : update_recon_neighbor_array(
316 : picture_control_set_ptr->md_tx_depth_1_luma_recon_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX],
317 159544 : context_ptr->cu_ptr->neigh_top_recon[0],
318 159544 : context_ptr->cu_ptr->neigh_left_recon[0],
319 : origin_x,
320 : origin_y,
321 159544 : context_ptr->blk_geom->bwidth,
322 159544 : context_ptr->blk_geom->bheight);
323 : }
324 : }
325 :
326 1217200 : if (intraMdOpenLoop == EB_FALSE) {
327 1217200 : if (context_ptr->blk_geom->has_uv && context_ptr->chroma_level <= CHROMA_MODE_1) {
328 734074 : update_recon_neighbor_array(
329 : context_ptr->cb_recon_neighbor_array,
330 734074 : context_ptr->cu_ptr->neigh_top_recon[1],
331 734074 : context_ptr->cu_ptr->neigh_left_recon[1],
332 : cu_origin_x_uv,
333 : cu_origin_y_uv,
334 : bwdith_uv,
335 : bwheight_uv);
336 734067 : update_recon_neighbor_array(
337 : context_ptr->cr_recon_neighbor_array,
338 734067 : context_ptr->cu_ptr->neigh_top_recon[2],
339 734067 : context_ptr->cu_ptr->neigh_left_recon[2],
340 : cu_origin_x_uv,
341 : cu_origin_y_uv,
342 : bwdith_uv,
343 : bwheight_uv);
344 : }
345 : }
346 : } else {
347 0 : if (intraMdOpenLoop == EB_FALSE)
348 0 : update_recon_neighbor_array16bit(
349 : context_ptr->luma_recon_neighbor_array16bit,
350 0 : context_ptr->cu_ptr->neigh_top_recon_16bit[0],
351 0 : context_ptr->cu_ptr->neigh_left_recon_16bit[0],
352 : origin_x,
353 : origin_y,
354 0 : context_ptr->blk_geom->bwidth,
355 0 : context_ptr->blk_geom->bheight);
356 :
357 0 : if (picture_control_set_ptr->parent_pcs_ptr->atb_mode) {
358 0 : update_recon_neighbor_array16bit(
359 : picture_control_set_ptr->md_tx_depth_1_luma_recon_neighbor_array16bit[MD_NEIGHBOR_ARRAY_INDEX],
360 0 : context_ptr->cu_ptr->neigh_top_recon_16bit[0],
361 0 : context_ptr->cu_ptr->neigh_left_recon_16bit[0],
362 : origin_x,
363 : origin_y,
364 0 : context_ptr->blk_geom->bwidth,
365 0 : context_ptr->blk_geom->bheight);
366 : }
367 :
368 0 : if (intraMdOpenLoop == EB_FALSE &&
369 0 : context_ptr->blk_geom->has_uv &&
370 0 : context_ptr->chroma_level <= CHROMA_MODE_1)
371 : {
372 0 : update_recon_neighbor_array16bit(
373 : context_ptr->cb_recon_neighbor_array16bit,
374 0 : context_ptr->cu_ptr->neigh_top_recon_16bit[1],
375 0 : context_ptr->cu_ptr->neigh_left_recon_16bit[1],
376 : cu_origin_x_uv,
377 : cu_origin_y_uv,
378 : bwdith_uv,
379 : bwheight_uv);
380 0 : update_recon_neighbor_array16bit(
381 : context_ptr->cr_recon_neighbor_array16bit,
382 0 : context_ptr->cu_ptr->neigh_top_recon_16bit[2],
383 0 : context_ptr->cu_ptr->neigh_left_recon_16bit[2],
384 : cu_origin_x_uv,
385 : cu_origin_y_uv,
386 : bwdith_uv,
387 : bwheight_uv);
388 : }
389 : }
390 :
391 1217200 : return;
392 : }
393 :
394 629717 : void copy_neighbour_arrays(
395 : PictureControlSet *picture_control_set_ptr,
396 : ModeDecisionContext *context_ptr,
397 : uint32_t src_idx,
398 : uint32_t dst_idx,
399 : uint32_t blk_mds,
400 : uint32_t sb_org_x,
401 : uint32_t sb_org_y)
402 : {
403 : (void)*context_ptr;
404 :
405 629717 : const BlockGeom * blk_geom = get_blk_geom_mds(blk_mds);
406 :
407 629720 : uint32_t blk_org_x = sb_org_x + blk_geom->origin_x;
408 629720 : uint32_t blk_org_y = sb_org_y + blk_geom->origin_y;
409 629720 : uint32_t blk_org_x_uv = (blk_org_x >> 3 << 3) >> 1;
410 629720 : uint32_t blk_org_y_uv = (blk_org_y >> 3 << 3) >> 1;
411 629720 : uint32_t bwidth_uv = blk_geom->bwidth_uv;
412 629720 : uint32_t bheight_uv = blk_geom->bheight_uv;
413 :
414 629720 : copy_neigh_arr(
415 : picture_control_set_ptr->md_intra_luma_mode_neighbor_array[src_idx],
416 : picture_control_set_ptr->md_intra_luma_mode_neighbor_array[dst_idx],
417 : blk_org_x,
418 : blk_org_y,
419 629720 : blk_geom->bwidth,
420 629720 : blk_geom->bheight,
421 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
422 :
423 : //neighbor_array_unit_reset(picture_control_set_ptr->md_intra_chroma_mode_neighbor_array[depth]);
424 629742 : copy_neigh_arr(
425 : picture_control_set_ptr->md_intra_chroma_mode_neighbor_array[src_idx],
426 : picture_control_set_ptr->md_intra_chroma_mode_neighbor_array[dst_idx],
427 : blk_org_x_uv,
428 : blk_org_y_uv,
429 : bwidth_uv,
430 : bheight_uv,
431 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
432 :
433 : //neighbor_array_unit_reset(picture_control_set_ptr->md_skip_flag_neighbor_array[depth]);
434 629727 : copy_neigh_arr(
435 : picture_control_set_ptr->md_skip_flag_neighbor_array[src_idx],
436 : picture_control_set_ptr->md_skip_flag_neighbor_array[dst_idx],
437 : blk_org_x,
438 : blk_org_y,
439 629727 : blk_geom->bwidth,
440 629727 : blk_geom->bheight,
441 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
442 :
443 : //neighbor_array_unit_reset(picture_control_set_ptr->md_mode_type_neighbor_array[depth]);
444 629735 : copy_neigh_arr(
445 : picture_control_set_ptr->md_mode_type_neighbor_array[src_idx],
446 : picture_control_set_ptr->md_mode_type_neighbor_array[dst_idx],
447 : blk_org_x,
448 : blk_org_y,
449 629735 : blk_geom->bwidth,
450 629735 : blk_geom->bheight,
451 : NEIGHBOR_ARRAY_UNIT_FULL_MASK);
452 :
453 : //neighbor_array_unit_reset(picture_control_set_ptr->md_leaf_depth_neighbor_array[depth]);
454 629738 : copy_neigh_arr(
455 : picture_control_set_ptr->md_leaf_depth_neighbor_array[src_idx],
456 : picture_control_set_ptr->md_leaf_depth_neighbor_array[dst_idx],
457 : blk_org_x,
458 : blk_org_y,
459 629738 : blk_geom->bwidth,
460 629738 : blk_geom->bheight,
461 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
462 629732 : copy_neigh_arr(
463 : picture_control_set_ptr->mdleaf_partition_neighbor_array[src_idx],
464 : picture_control_set_ptr->mdleaf_partition_neighbor_array[dst_idx],
465 : blk_org_x,
466 : blk_org_y,
467 629732 : blk_geom->bwidth,
468 629732 : blk_geom->bheight,
469 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
470 :
471 629741 : if (!context_ptr->hbd_mode_decision) {
472 629741 : copy_neigh_arr(
473 : picture_control_set_ptr->md_luma_recon_neighbor_array[src_idx],
474 : picture_control_set_ptr->md_luma_recon_neighbor_array[dst_idx],
475 : blk_org_x,
476 : blk_org_y,
477 629741 : blk_geom->bwidth,
478 629741 : blk_geom->bheight,
479 : NEIGHBOR_ARRAY_UNIT_FULL_MASK);
480 629751 : if (picture_control_set_ptr->parent_pcs_ptr->atb_mode) {
481 87089 : copy_neigh_arr(
482 : picture_control_set_ptr->md_tx_depth_1_luma_recon_neighbor_array[src_idx],
483 : picture_control_set_ptr->md_tx_depth_1_luma_recon_neighbor_array[dst_idx],
484 : blk_org_x,
485 : blk_org_y,
486 87089 : blk_geom->bwidth,
487 87089 : blk_geom->bheight,
488 : NEIGHBOR_ARRAY_UNIT_FULL_MASK);
489 : }
490 629751 : if (blk_geom->has_uv && context_ptr->chroma_level <= CHROMA_MODE_1) {
491 629754 : copy_neigh_arr(
492 : picture_control_set_ptr->md_cb_recon_neighbor_array[src_idx],
493 : picture_control_set_ptr->md_cb_recon_neighbor_array[dst_idx],
494 : blk_org_x_uv,
495 : blk_org_y_uv,
496 : bwidth_uv,
497 : bheight_uv,
498 : NEIGHBOR_ARRAY_UNIT_FULL_MASK);
499 :
500 629749 : copy_neigh_arr(
501 : picture_control_set_ptr->md_cr_recon_neighbor_array[src_idx],
502 : picture_control_set_ptr->md_cr_recon_neighbor_array[dst_idx],
503 : blk_org_x_uv,
504 : blk_org_y_uv,
505 : bwidth_uv,
506 : bheight_uv,
507 : NEIGHBOR_ARRAY_UNIT_FULL_MASK);
508 : }
509 : } else {
510 0 : copy_neigh_arr(
511 : picture_control_set_ptr->md_luma_recon_neighbor_array16bit[src_idx],
512 : picture_control_set_ptr->md_luma_recon_neighbor_array16bit[dst_idx],
513 : blk_org_x,
514 : blk_org_y,
515 0 : blk_geom->bwidth,
516 0 : blk_geom->bheight,
517 : NEIGHBOR_ARRAY_UNIT_FULL_MASK);
518 :
519 0 : if (picture_control_set_ptr->parent_pcs_ptr->atb_mode) {
520 0 : copy_neigh_arr(
521 : picture_control_set_ptr->md_tx_depth_1_luma_recon_neighbor_array16bit[src_idx],
522 : picture_control_set_ptr->md_tx_depth_1_luma_recon_neighbor_array16bit[dst_idx],
523 : blk_org_x,
524 : blk_org_y,
525 0 : blk_geom->bwidth,
526 0 : blk_geom->bheight,
527 : NEIGHBOR_ARRAY_UNIT_FULL_MASK);
528 : }
529 :
530 0 : if (blk_geom->has_uv && context_ptr->chroma_level <= CHROMA_MODE_1) {
531 0 : copy_neigh_arr(
532 : picture_control_set_ptr->md_cb_recon_neighbor_array16bit[src_idx],
533 : picture_control_set_ptr->md_cb_recon_neighbor_array16bit[dst_idx],
534 : blk_org_x_uv,
535 : blk_org_y_uv,
536 : bwidth_uv,
537 : bheight_uv,
538 : NEIGHBOR_ARRAY_UNIT_FULL_MASK);
539 :
540 0 : copy_neigh_arr(
541 : picture_control_set_ptr->md_cr_recon_neighbor_array16bit[src_idx],
542 : picture_control_set_ptr->md_cr_recon_neighbor_array16bit[dst_idx],
543 : blk_org_x_uv,
544 : blk_org_y_uv,
545 : bwidth_uv,
546 : bheight_uv,
547 : NEIGHBOR_ARRAY_UNIT_FULL_MASK);
548 : }
549 : }
550 :
551 : //neighbor_array_unit_reset(picture_control_set_ptr->md_skip_coeff_neighbor_array[depth]);
552 629743 : copy_neigh_arr(
553 : picture_control_set_ptr->md_skip_coeff_neighbor_array[src_idx],
554 : picture_control_set_ptr->md_skip_coeff_neighbor_array[dst_idx],
555 : blk_org_x,
556 : blk_org_y,
557 629743 : blk_geom->bwidth,
558 629743 : blk_geom->bheight,
559 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
560 : //neighbor_array_unit_reset(picture_control_set_ptr->md_luma_dc_sign_level_coeff_neighbor_array[depth]);
561 629711 : copy_neigh_arr(
562 : picture_control_set_ptr->md_luma_dc_sign_level_coeff_neighbor_array[src_idx],
563 : picture_control_set_ptr->md_luma_dc_sign_level_coeff_neighbor_array[dst_idx],
564 : blk_org_x,
565 : blk_org_y,
566 629711 : blk_geom->bwidth,
567 629711 : blk_geom->bheight,
568 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
569 :
570 629738 : copy_neigh_arr(
571 : picture_control_set_ptr->md_tx_depth_1_luma_dc_sign_level_coeff_neighbor_array[src_idx],
572 : picture_control_set_ptr->md_tx_depth_1_luma_dc_sign_level_coeff_neighbor_array[dst_idx],
573 : blk_org_x,
574 : blk_org_y,
575 629738 : blk_geom->bwidth,
576 629738 : blk_geom->bheight,
577 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
578 :
579 629738 : if (blk_geom->has_uv && context_ptr->chroma_level <= CHROMA_MODE_1) {
580 629742 : copy_neigh_arr(
581 : picture_control_set_ptr->md_cb_dc_sign_level_coeff_neighbor_array[src_idx],
582 : picture_control_set_ptr->md_cb_dc_sign_level_coeff_neighbor_array[dst_idx],
583 : blk_org_x_uv,
584 : blk_org_y_uv,
585 : bwidth_uv,
586 : bheight_uv,
587 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
588 : //neighbor_array_unit_reset(picture_control_set_ptr->md_cr_dc_sign_level_coeff_neighbor_array[depth]);
589 :
590 629722 : copy_neigh_arr(
591 : picture_control_set_ptr->md_cr_dc_sign_level_coeff_neighbor_array[src_idx],
592 : picture_control_set_ptr->md_cr_dc_sign_level_coeff_neighbor_array[dst_idx],
593 : blk_org_x_uv,
594 : blk_org_y_uv,
595 : bwidth_uv,
596 : bheight_uv,
597 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
598 : }
599 :
600 : //neighbor_array_unit_reset(picture_control_set_ptr->md_txfm_context_array[depth]);
601 629711 : copy_neigh_arr(
602 : picture_control_set_ptr->md_txfm_context_array[src_idx],
603 : picture_control_set_ptr->md_txfm_context_array[dst_idx],
604 : blk_org_x,
605 : blk_org_y,
606 629711 : blk_geom->bwidth,
607 629711 : blk_geom->bheight,
608 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
609 : //neighbor_array_unit_reset(picture_control_set_ptr->md_inter_pred_dir_neighbor_array[depth]);
610 629738 : copy_neigh_arr(
611 : picture_control_set_ptr->md_inter_pred_dir_neighbor_array[src_idx],
612 : picture_control_set_ptr->md_inter_pred_dir_neighbor_array[dst_idx],
613 : blk_org_x,
614 : blk_org_y,
615 629738 : blk_geom->bwidth,
616 629738 : blk_geom->bheight,
617 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
618 : //neighbor_array_unit_reset(picture_control_set_ptr->md_ref_frame_type_neighbor_array[depth]);
619 629733 : copy_neigh_arr(
620 : picture_control_set_ptr->md_ref_frame_type_neighbor_array[src_idx],
621 : picture_control_set_ptr->md_ref_frame_type_neighbor_array[dst_idx],
622 : blk_org_x,
623 : blk_org_y,
624 629733 : blk_geom->bwidth,
625 629733 : blk_geom->bheight,
626 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
627 :
628 629707 : copy_neigh_arr_32(
629 : picture_control_set_ptr->md_interpolation_type_neighbor_array[src_idx],
630 : picture_control_set_ptr->md_interpolation_type_neighbor_array[dst_idx],
631 : blk_org_x,
632 : blk_org_y,
633 629707 : blk_geom->bwidth,
634 629707 : blk_geom->bheight,
635 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
636 629740 : }
637 :
638 1217140 : void md_update_all_neighbour_arrays(
639 : PictureControlSet *picture_control_set_ptr,
640 : ModeDecisionContext *context_ptr,
641 : uint32_t lastCuIndex_mds,
642 : uint32_t sb_origin_x,
643 : uint32_t sb_origin_y)
644 : {
645 1217140 : context_ptr->blk_geom = get_blk_geom_mds(lastCuIndex_mds);
646 1217130 : context_ptr->cu_origin_x = sb_origin_x + context_ptr->blk_geom->origin_x;
647 1217130 : context_ptr->cu_origin_y = sb_origin_y + context_ptr->blk_geom->origin_y;
648 1217130 : context_ptr->round_origin_x = ((context_ptr->cu_origin_x >> 3) << 3);
649 1217130 : context_ptr->round_origin_y = ((context_ptr->cu_origin_y >> 3) << 3);
650 :
651 1217130 : context_ptr->cu_ptr = &context_ptr->md_cu_arr_nsq[lastCuIndex_mds];
652 :
653 1217130 : mode_decision_update_neighbor_arrays(
654 : picture_control_set_ptr,
655 : context_ptr,
656 : lastCuIndex_mds,
657 1217130 : picture_control_set_ptr->intra_md_open_loop_flag,
658 : EB_FALSE);
659 :
660 1217190 : update_mi_map(
661 : context_ptr,
662 : context_ptr->cu_ptr,
663 1217190 : context_ptr->cu_origin_x,
664 1217190 : context_ptr->cu_origin_y,
665 : context_ptr->blk_geom,
666 : 0,
667 : picture_control_set_ptr);
668 1217180 : }
669 :
670 312992 : void md_update_all_neighbour_arrays_multiple(
671 : PictureControlSet *picture_control_set_ptr,
672 : ModeDecisionContext *context_ptr,
673 : uint32_t blk_mds,
674 : uint32_t sb_origin_x,
675 : uint32_t sb_origin_y){
676 312992 : context_ptr->blk_geom = get_blk_geom_mds(blk_mds);
677 :
678 : uint32_t blk_it;
679 632866 : for (blk_it = 0; blk_it < context_ptr->blk_geom->totns; blk_it++)
680 : {
681 319813 : md_update_all_neighbour_arrays(
682 : picture_control_set_ptr,
683 : context_ptr,
684 : blk_mds + blk_it,
685 : sb_origin_x,
686 : sb_origin_y);
687 : }
688 313053 : }
689 :
690 : #define TOTAL_SQ_BLOCK_COUNT 341
691 : int sq_block_index[TOTAL_SQ_BLOCK_COUNT] = {
692 : 0,
693 : 25,
694 : 50,
695 : 75,
696 : 80,
697 : 81,
698 : 82,
699 : 83,
700 : 84,
701 : 89,
702 : 90,
703 : 91,
704 : 92,
705 : 93,
706 : 98,
707 : 99,
708 : 100,
709 : 101,
710 : 102,
711 : 107,
712 : 108,
713 : 109,
714 : 110,
715 : 111,
716 : 136,
717 : 141,
718 : 142,
719 : 143,
720 : 144,
721 : 145,
722 : 150,
723 : 151,
724 : 152,
725 : 153,
726 : 154,
727 : 159,
728 : 160,
729 : 161,
730 : 162,
731 : 163,
732 : 168,
733 : 169,
734 : 170,
735 : 171,
736 : 172,
737 : 197,
738 : 202,
739 : 203,
740 : 204,
741 : 205,
742 : 206,
743 : 211,
744 : 212,
745 : 213,
746 : 214,
747 : 215,
748 : 220,
749 : 221,
750 : 222,
751 : 223,
752 : 224,
753 : 229,
754 : 230,
755 : 231,
756 : 232,
757 : 233,
758 : 258,
759 : 263,
760 : 264,
761 : 265,
762 : 266,
763 : 267,
764 : 272,
765 : 273,
766 : 274,
767 : 275,
768 : 276,
769 : 281,
770 : 282,
771 : 283,
772 : 284,
773 : 285,
774 : 290,
775 : 291,
776 : 292,
777 : 293,
778 : 294,
779 : 319,
780 : 344,
781 : 349,
782 : 350,
783 : 351,
784 : 352,
785 : 353,
786 : 358,
787 : 359,
788 : 360,
789 : 361,
790 : 362,
791 : 367,
792 : 368,
793 : 369,
794 : 370,
795 : 371,
796 : 376,
797 : 377,
798 : 378,
799 : 379,
800 : 380,
801 : 405,
802 : 410,
803 : 411,
804 : 412,
805 : 413,
806 : 414,
807 : 419,
808 : 420,
809 : 421,
810 : 422,
811 : 423,
812 : 428,
813 : 429,
814 : 430,
815 : 431,
816 : 432,
817 : 437,
818 : 438,
819 : 439,
820 : 440,
821 : 441,
822 : 466,
823 : 471,
824 : 472,
825 : 473,
826 : 474,
827 : 475,
828 : 480,
829 : 481,
830 : 482,
831 : 483,
832 : 484,
833 : 489,
834 : 490,
835 : 491,
836 : 492,
837 : 493,
838 : 498,
839 : 499,
840 : 500,
841 : 501,
842 : 502,
843 : 527,
844 : 532,
845 : 533,
846 : 534,
847 : 535,
848 : 536,
849 : 541,
850 : 542,
851 : 543,
852 : 544,
853 : 545,
854 : 550,
855 : 551,
856 : 552,
857 : 553,
858 : 554,
859 : 559,
860 : 560,
861 : 561,
862 : 562,
863 : 563,
864 : 588,
865 : 613,
866 : 618,
867 : 619,
868 : 620,
869 : 621,
870 : 622,
871 : 627,
872 : 628,
873 : 629,
874 : 630,
875 : 631,
876 : 636,
877 : 637,
878 : 638,
879 : 639,
880 : 640,
881 : 645,
882 : 646,
883 : 647,
884 : 648,
885 : 649,
886 : 674,
887 : 679,
888 : 680,
889 : 681,
890 : 682,
891 : 683,
892 : 688,
893 : 689,
894 : 690,
895 : 691,
896 : 692,
897 : 697,
898 : 698,
899 : 699,
900 : 700,
901 : 701,
902 : 706,
903 : 707,
904 : 708,
905 : 709,
906 : 710,
907 : 735,
908 : 740,
909 : 741,
910 : 742,
911 : 743,
912 : 744,
913 : 749,
914 : 750,
915 : 751,
916 : 752,
917 : 753,
918 : 758,
919 : 759,
920 : 760,
921 : 761,
922 : 762,
923 : 767,
924 : 768,
925 : 769,
926 : 770,
927 : 771,
928 : 796,
929 : 801,
930 : 802,
931 : 803,
932 : 804,
933 : 805,
934 : 810,
935 : 811,
936 : 812,
937 : 813,
938 : 814,
939 : 819,
940 : 820,
941 : 821,
942 : 822,
943 : 823,
944 : 828,
945 : 829,
946 : 830,
947 : 831,
948 : 832,
949 : 857,
950 : 882,
951 : 887,
952 : 888,
953 : 889,
954 : 890,
955 : 891,
956 : 896,
957 : 897,
958 : 898,
959 : 899,
960 : 900,
961 : 905,
962 : 906,
963 : 907,
964 : 908,
965 : 909,
966 : 914,
967 : 915,
968 : 916,
969 : 917,
970 : 918,
971 : 943,
972 : 948,
973 : 949,
974 : 950,
975 : 951,
976 : 952,
977 : 957,
978 : 958,
979 : 959,
980 : 960,
981 : 961,
982 : 966,
983 : 967,
984 : 968,
985 : 969,
986 : 970,
987 : 975,
988 : 976,
989 : 977,
990 : 978,
991 : 979,
992 : 1004,
993 : 1009,
994 : 1010,
995 : 1011,
996 : 1012,
997 : 1013,
998 : 1018,
999 : 1019,
1000 : 1020,
1001 : 1021,
1002 : 1022,
1003 : 1027,
1004 : 1028,
1005 : 1029,
1006 : 1030,
1007 : 1031,
1008 : 1036,
1009 : 1037,
1010 : 1038,
1011 : 1039,
1012 : 1040,
1013 : 1065,
1014 : 1070,
1015 : 1071,
1016 : 1072,
1017 : 1073,
1018 : 1074,
1019 : 1079,
1020 : 1080,
1021 : 1081,
1022 : 1082,
1023 : 1083,
1024 : 1088,
1025 : 1089,
1026 : 1090,
1027 : 1091,
1028 : 1092,
1029 : 1097,
1030 : 1098,
1031 : 1099,
1032 : 1100
1033 : };
1034 3598 : void init_sq_nsq_block(
1035 : SequenceControlSet *sequence_control_set_ptr,
1036 : ModeDecisionContext *context_ptr){
1037 3598 : uint32_t blk_idx = 0;
1038 : do {
1039 3727280 : const BlockGeom * blk_geom = get_blk_geom_mds(blk_idx);
1040 3727270 : context_ptr->md_local_cu_unit[blk_idx].avail_blk_flag = EB_FALSE;
1041 3727270 : if (blk_geom->shape == PART_N)
1042 : {
1043 1186890 : context_ptr->md_cu_arr_nsq[blk_idx].split_flag = EB_TRUE;
1044 1186890 : context_ptr->md_cu_arr_nsq[blk_idx].part = PARTITION_SPLIT;
1045 1186890 : context_ptr->md_local_cu_unit[blk_idx].tested_cu_flag = EB_FALSE;
1046 : }
1047 3727270 : ++blk_idx;
1048 3727270 : } while (blk_idx < sequence_control_set_ptr->max_block_cnt);
1049 3590 : }
1050 3599 : void init_sq_non4_block(
1051 : SequenceControlSet *sequence_control_set_ptr,
1052 : ModeDecisionContext *context_ptr){
1053 1195380 : for (uint32_t blk_idx = 0; blk_idx < TOTAL_SQ_BLOCK_COUNT; blk_idx++){
1054 1191780 : context_ptr->md_cu_arr_nsq[sq_block_index[blk_idx]].part = PARTITION_SPLIT;
1055 1191780 : context_ptr->md_local_cu_unit[sq_block_index[blk_idx]].tested_cu_flag = EB_FALSE;
1056 : }
1057 3793120 : for(uint32_t blk_idx = 0; blk_idx < sequence_control_set_ptr->max_block_cnt; ++blk_idx){
1058 3789530 : context_ptr->md_local_cu_unit[blk_idx].avail_blk_flag = EB_FALSE;
1059 : }
1060 3599 : }
1061 0 : static INLINE TranHigh check_range(TranHigh input, int32_t bd) {
1062 : // AV1 TX case
1063 : // - 8 bit: signed 16 bit integer
1064 : // - 10 bit: signed 18 bit integer
1065 : // - 12 bit: signed 20 bit integer
1066 : // - max quantization error = 1828 << (bd - 8)
1067 0 : const int32_t int_max = (1 << (7 + bd)) - 1 + (914 << (bd - 7));
1068 0 : const int32_t int_min = -int_max - 1;
1069 : #if CONFIG_COEFFICIENT_RANGE_CHECKING
1070 : assert(int_min <= input);
1071 : assert(input <= int_max);
1072 : #endif // CONFIG_COEFFICIENT_RANGE_CHECKING
1073 0 : return (TranHigh)clamp64(input, int_min, int_max);
1074 : }
1075 :
1076 : #define HIGHBD_WRAPLOW(x, bd) ((int32_t)check_range((x), bd))
1077 0 : static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, TranHigh trans,
1078 : int32_t bd) {
1079 0 : trans = HIGHBD_WRAPLOW(trans, bd);
1080 0 : return clip_pixel_highbd(dest + (int32_t)trans, bd);
1081 : }
1082 :
1083 : /*********************************
1084 : * Picture Single Channel Kernel
1085 : *********************************/
1086 0 : void picture_addition_kernel(
1087 : uint8_t *pred_ptr,
1088 : uint32_t pred_stride,
1089 : int32_t *residual_ptr,
1090 : uint32_t residual_stride,
1091 : uint8_t *recon_ptr,
1092 : uint32_t recon_stride,
1093 : uint32_t width,
1094 : uint32_t height,
1095 : int32_t bd)
1096 : {
1097 : uint32_t columnIndex;
1098 0 : uint32_t row_index = 0;
1099 : // const int32_t maxValue = 0xFF;
1100 :
1101 : //printf("\n");
1102 : //printf("Reconstruction---------------------------------------------------\n");
1103 :
1104 0 : while (row_index < height) {
1105 0 : columnIndex = 0;
1106 0 : while (columnIndex < width) {
1107 : //recon_ptr[columnIndex] = (uint8_t)CLIP3(0, maxValue, ((int32_t)residual_ptr[columnIndex]) + ((int32_t)pred_ptr[columnIndex]));
1108 0 : uint16_t rec = (uint16_t)pred_ptr[columnIndex];
1109 0 : recon_ptr[columnIndex] = (uint8_t)highbd_clip_pixel_add(rec, (TranLow)residual_ptr[columnIndex], bd);
1110 :
1111 : //printf("%d\t", recon_ptr[columnIndex]);
1112 0 : ++columnIndex;
1113 : }
1114 :
1115 : //printf("\n");
1116 0 : residual_ptr += residual_stride;
1117 0 : pred_ptr += pred_stride;
1118 0 : recon_ptr += recon_stride;
1119 0 : ++row_index;
1120 : }
1121 : //printf("-----------------------------------------------------------------\n");
1122 : //printf("\n");
1123 : //printf("\n");
1124 0 : return;
1125 : }
1126 :
1127 0 : void picture_addition_kernel16_bit(
1128 : uint16_t *pred_ptr,
1129 : uint32_t pred_stride,
1130 : int32_t *residual_ptr,
1131 : uint32_t residual_stride,
1132 : uint16_t *recon_ptr,
1133 : uint32_t recon_stride,
1134 : uint32_t width,
1135 : uint32_t height,
1136 : int32_t bd)
1137 : {
1138 : uint32_t columnIndex;
1139 0 : uint32_t row_index = 0;
1140 : // const int32_t maxValue = 0xFF;
1141 :
1142 : //printf("\n");
1143 : //printf("Reconstruction---------------------------------------------------\n");
1144 :
1145 0 : while (row_index < height) {
1146 0 : columnIndex = 0;
1147 0 : while (columnIndex < width) {
1148 : //recon_ptr[columnIndex] = (uint8_t)CLIP3(0, maxValue, ((int32_t)residual_ptr[columnIndex]) + ((int32_t)pred_ptr[columnIndex]));
1149 0 : uint16_t rec = (uint16_t)pred_ptr[columnIndex];
1150 0 : recon_ptr[columnIndex] = highbd_clip_pixel_add(rec, (TranLow)residual_ptr[columnIndex], bd);
1151 :
1152 : //printf("%d\t", recon_ptr[columnIndex]);
1153 0 : ++columnIndex;
1154 : }
1155 :
1156 : //printf("\n");
1157 0 : residual_ptr += residual_stride;
1158 0 : pred_ptr += pred_stride;
1159 0 : recon_ptr += recon_stride;
1160 0 : ++row_index;
1161 : }
1162 : // printf("-----------------------------------------------------------------\n");
1163 : // printf("\n");
1164 : // printf("\n");
1165 0 : return;
1166 : }
1167 :
1168 519775 : void AV1PerformInverseTransformReconLuma(
1169 : PictureControlSet *picture_control_set_ptr,
1170 : ModeDecisionContext *context_ptr,
1171 : ModeDecisionCandidateBuffer *candidate_buffer)
1172 : {
1173 : uint32_t tu_width;
1174 : uint32_t tu_height;
1175 : uint32_t txb_origin_x;
1176 : uint32_t txb_origin_y;
1177 : uint32_t tu_origin_index;
1178 : uint32_t tuTotalCount;
1179 : uint32_t txb_itr;
1180 :
1181 519775 : if (picture_control_set_ptr->intra_md_open_loop_flag == EB_FALSE) {
1182 519775 : uint8_t tx_depth = candidate_buffer->candidate_ptr->tx_depth;
1183 519775 : tuTotalCount = context_ptr->blk_geom->txb_count[tx_depth];
1184 519775 : txb_itr = 0;
1185 519775 : uint32_t txb_1d_offset = 0;
1186 : do {
1187 869067 : txb_origin_x = context_ptr->blk_geom->tx_org_x[tx_depth][txb_itr];
1188 869067 : txb_origin_y = context_ptr->blk_geom->tx_org_y[tx_depth][txb_itr];
1189 869067 : tu_width = context_ptr->blk_geom->tx_width[tx_depth][txb_itr];
1190 869067 : tu_height = context_ptr->blk_geom->tx_height[tx_depth][txb_itr];
1191 869067 : tu_origin_index = txb_origin_x + txb_origin_y * candidate_buffer->prediction_ptr->stride_y;
1192 869067 : uint32_t recLumaOffset = txb_origin_x + txb_origin_y * candidate_buffer->recon_ptr->stride_y;
1193 869067 : uint32_t y_has_coeff = (candidate_buffer->candidate_ptr->y_has_coeff & (1 << txb_itr)) > 0;
1194 :
1195 869067 : if (y_has_coeff)
1196 672988 : inv_transform_recon_wrapper(
1197 672988 : candidate_buffer->prediction_ptr->buffer_y,
1198 : tu_origin_index,
1199 672988 : candidate_buffer->prediction_ptr->stride_y,
1200 672988 : context_ptr->hbd_mode_decision ? (uint8_t *)context_ptr->cfl_temp_luma_recon16bit : context_ptr->cfl_temp_luma_recon,
1201 : recLumaOffset,
1202 672988 : candidate_buffer->recon_ptr->stride_y,
1203 672988 : (int32_t*) candidate_buffer->recon_coeff_ptr->buffer_y,
1204 : txb_1d_offset,
1205 672988 : context_ptr->hbd_mode_decision,
1206 672988 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
1207 672988 : candidate_buffer->candidate_ptr->transform_type[txb_itr],
1208 : PLANE_TYPE_Y,
1209 672988 : (uint32_t)candidate_buffer->candidate_ptr->eob[0][txb_itr]);
1210 : else {
1211 196079 : if (context_ptr->hbd_mode_decision) {
1212 0 : pic_copy_kernel_16bit(
1213 0 : ((uint16_t *) candidate_buffer->prediction_ptr->buffer_y) + tu_origin_index,
1214 0 : candidate_buffer->prediction_ptr->stride_y,
1215 0 : context_ptr->cfl_temp_luma_recon16bit + recLumaOffset,
1216 0 : candidate_buffer->recon_ptr->stride_y,
1217 : tu_width,
1218 : tu_height);
1219 : } else {
1220 196079 : pic_copy_kernel_8bit(
1221 196079 : &(candidate_buffer->prediction_ptr->buffer_y[tu_origin_index]),
1222 196079 : candidate_buffer->prediction_ptr->stride_y,
1223 196079 : &(context_ptr->cfl_temp_luma_recon[recLumaOffset]),
1224 196079 : candidate_buffer->recon_ptr->stride_y,
1225 : tu_width,
1226 : tu_height);
1227 : }
1228 : }
1229 869062 : txb_1d_offset += context_ptr->blk_geom->tx_width[tx_depth][txb_itr] * context_ptr->blk_geom->tx_height[tx_depth][txb_itr];
1230 869062 : ++txb_itr;
1231 869062 : } while (txb_itr < tuTotalCount);
1232 : }
1233 519770 : }
1234 811320 : void AV1PerformInverseTransformRecon(
1235 : PictureControlSet *picture_control_set_ptr,
1236 : ModeDecisionContext *context_ptr,
1237 : ModeDecisionCandidateBuffer *candidate_buffer,
1238 : CodingUnit *cu_ptr,
1239 : const BlockGeom *blk_geom)
1240 : {
1241 : uint32_t tu_width;
1242 : uint32_t tu_height;
1243 : uint32_t txb_origin_x;
1244 : uint32_t txb_origin_y;
1245 : uint32_t tu_origin_index;
1246 : uint32_t tuTotalCount;
1247 : uint32_t tu_index;
1248 : uint32_t txb_itr;
1249 : TransformUnit *txb_ptr;
1250 :
1251 : UNUSED(blk_geom);
1252 :
1253 811320 : if (picture_control_set_ptr->intra_md_open_loop_flag == EB_FALSE) {
1254 811360 : uint8_t tx_depth = candidate_buffer->candidate_ptr->tx_depth;
1255 811360 : tuTotalCount = context_ptr->blk_geom->txb_count[tx_depth];
1256 811360 : tu_index = 0;
1257 811360 : txb_itr = 0;
1258 811360 : uint32_t txb_1d_offset = 0, txb_1d_offset_uv = 0;
1259 : uint32_t recLumaOffset, recCbOffset, recCrOffset;
1260 :
1261 : do {
1262 877594 : txb_origin_x = context_ptr->blk_geom->tx_org_x[tx_depth][txb_itr];
1263 877594 : txb_origin_y = context_ptr->blk_geom->tx_org_y[tx_depth][txb_itr];
1264 877594 : tu_width = context_ptr->blk_geom->tx_width[tx_depth][txb_itr];
1265 877594 : tu_height = context_ptr->blk_geom->tx_height[tx_depth][txb_itr];
1266 877594 : txb_ptr = &cu_ptr->transform_unit_array[tu_index];
1267 877594 : recLumaOffset = context_ptr->blk_geom->tx_org_x[tx_depth][txb_itr] + context_ptr->blk_geom->tx_org_y[tx_depth][txb_itr] * candidate_buffer->recon_ptr->stride_y;
1268 877594 : recCbOffset = ((((context_ptr->blk_geom->tx_org_x[tx_depth][txb_itr] >> 3) << 3) + ((context_ptr->blk_geom->tx_org_y[tx_depth][txb_itr] >> 3) << 3) * candidate_buffer->recon_ptr->stride_cb) >> 1);
1269 877594 : recCrOffset = ((((context_ptr->blk_geom->tx_org_x[tx_depth][txb_itr] >> 3) << 3) + ((context_ptr->blk_geom->tx_org_y[tx_depth][txb_itr] >> 3) << 3) * candidate_buffer->recon_ptr->stride_cr) >> 1);
1270 877594 : tu_origin_index = txb_origin_x + txb_origin_y * candidate_buffer->prediction_ptr->stride_y;
1271 877594 : if (txb_ptr->y_has_coeff)
1272 156282 : inv_transform_recon_wrapper(
1273 156282 : candidate_buffer->prediction_ptr->buffer_y,
1274 : tu_origin_index,
1275 156282 : candidate_buffer->prediction_ptr->stride_y,
1276 156282 : candidate_buffer->recon_ptr->buffer_y,
1277 : recLumaOffset,
1278 156282 : candidate_buffer->recon_ptr->stride_y,
1279 156282 : (int32_t*) candidate_buffer->recon_coeff_ptr->buffer_y,
1280 : txb_1d_offset,
1281 156282 : context_ptr->hbd_mode_decision,
1282 156282 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
1283 156282 : candidate_buffer->candidate_ptr->transform_type[txb_itr],
1284 : PLANE_TYPE_Y,
1285 156282 : (uint32_t)candidate_buffer->candidate_ptr->eob[0][txb_itr]);
1286 : else
1287 721312 : picture_copy(
1288 : candidate_buffer->prediction_ptr,
1289 : tu_origin_index,
1290 : 0,//tu_chroma_origin_index,
1291 : candidate_buffer->recon_ptr,
1292 : recLumaOffset,
1293 : 0,//tu_chroma_origin_index,
1294 : tu_width,
1295 : tu_height,
1296 : 0,//chromaTuSize,
1297 : 0,//chromaTuSize,
1298 : PICTURE_BUFFER_DESC_Y_FLAG,
1299 721312 : context_ptr->hbd_mode_decision);
1300 :
1301 : //CHROMA
1302 877562 : uint8_t tx_depth = candidate_buffer->candidate_ptr->tx_depth;
1303 877562 : if (tx_depth == 0 || txb_itr == 0) {
1304 811399 : if (context_ptr->chroma_level <= CHROMA_MODE_1)
1305 : {
1306 735760 : uint32_t chroma_tu_width = tx_size_wide[context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr]];
1307 735760 : uint32_t chroma_tu_height = tx_size_high[context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr]];
1308 735760 : uint32_t cbTuChromaOriginIndex = ((((txb_origin_x >> 3) << 3) + ((txb_origin_y >> 3) << 3) * candidate_buffer->recon_coeff_ptr->stride_cb) >> 1);
1309 735760 : uint32_t crTuChromaOriginIndex = ((((txb_origin_x >> 3) << 3) + ((txb_origin_y >> 3) << 3) * candidate_buffer->recon_coeff_ptr->stride_cr) >> 1);
1310 :
1311 735760 : if (context_ptr->blk_geom->has_uv && txb_ptr->u_has_coeff)
1312 23534 : inv_transform_recon_wrapper(
1313 23534 : candidate_buffer->prediction_ptr->buffer_cb,
1314 : cbTuChromaOriginIndex,
1315 23534 : candidate_buffer->prediction_ptr->stride_cb,
1316 23534 : candidate_buffer->recon_ptr->buffer_cb,
1317 : recCbOffset,
1318 23534 : candidate_buffer->recon_ptr->stride_cb,
1319 23534 : (int32_t*) candidate_buffer->recon_coeff_ptr->buffer_cb,
1320 : txb_1d_offset_uv,
1321 23534 : context_ptr->hbd_mode_decision,
1322 23534 : context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
1323 23534 : candidate_buffer->candidate_ptr->transform_type_uv,
1324 : PLANE_TYPE_UV,
1325 23534 : (uint32_t)candidate_buffer->candidate_ptr->eob[1][txb_itr]);
1326 : else
1327 712226 : picture_copy(
1328 : candidate_buffer->prediction_ptr,
1329 : 0,
1330 : cbTuChromaOriginIndex,
1331 : candidate_buffer->recon_ptr,
1332 : 0,
1333 : recCbOffset,
1334 : 0,
1335 : 0,
1336 : chroma_tu_width,
1337 : chroma_tu_height,
1338 : PICTURE_BUFFER_DESC_Cb_FLAG,
1339 712226 : context_ptr->hbd_mode_decision);
1340 :
1341 :
1342 735757 : if (context_ptr->blk_geom->has_uv && txb_ptr->v_has_coeff)
1343 14134 : inv_transform_recon_wrapper(
1344 14134 : candidate_buffer->prediction_ptr->buffer_cr,
1345 : crTuChromaOriginIndex,
1346 14134 : candidate_buffer->prediction_ptr->stride_cr,
1347 14134 : candidate_buffer->recon_ptr->buffer_cr,
1348 : recCrOffset,
1349 14134 : candidate_buffer->recon_ptr->stride_cr,
1350 14134 : (int32_t*) candidate_buffer->recon_coeff_ptr->buffer_cr,
1351 : txb_1d_offset_uv,
1352 14134 : context_ptr->hbd_mode_decision,
1353 14134 : context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
1354 14134 : candidate_buffer->candidate_ptr->transform_type_uv,
1355 : PLANE_TYPE_UV,
1356 14134 : (uint32_t)candidate_buffer->candidate_ptr->eob[2][txb_itr]);
1357 : else
1358 721623 : picture_copy(
1359 : candidate_buffer->prediction_ptr,
1360 : 0,
1361 : crTuChromaOriginIndex,
1362 : candidate_buffer->recon_ptr,
1363 : 0,
1364 : recCrOffset,
1365 : 0,
1366 : 0,
1367 : chroma_tu_width,
1368 : chroma_tu_height,
1369 : PICTURE_BUFFER_DESC_Cr_FLAG,
1370 721623 : context_ptr->hbd_mode_decision);
1371 :
1372 735756 : if (context_ptr->blk_geom->has_uv)
1373 551032 : txb_1d_offset_uv += context_ptr->blk_geom->tx_width_uv[tx_depth][txb_itr] * context_ptr->blk_geom->tx_height_uv[tx_depth][txb_itr];
1374 : }
1375 : }
1376 877558 : txb_1d_offset += context_ptr->blk_geom->tx_width[tx_depth][txb_itr] * context_ptr->blk_geom->tx_height[tx_depth][txb_itr];
1377 877558 : ++tu_index;
1378 877558 : ++txb_itr;
1379 877558 : } while (txb_itr < tuTotalCount);
1380 : }
1381 811284 : }
1382 :
1383 : /*******************************************
1384 : * Coding Loop - Fast Loop Initialization
1385 : *******************************************/
1386 811363 : void ProductCodingLoopInitFastLoop(
1387 : ModeDecisionContext *context_ptr,
1388 : NeighborArrayUnit *skip_coeff_neighbor_array,
1389 : NeighborArrayUnit *inter_pred_dir_neighbor_array,
1390 : NeighborArrayUnit *ref_frame_type_neighbor_array,
1391 : NeighborArrayUnit *intra_luma_mode_neighbor_array,
1392 : NeighborArrayUnit *skip_flag_neighbor_array,
1393 : NeighborArrayUnit *mode_type_neighbor_array,
1394 : NeighborArrayUnit *leaf_depth_neighbor_array,
1395 : NeighborArrayUnit *leaf_partition_neighbor_array
1396 : )
1397 : {
1398 811363 : context_ptr->tx_depth = context_ptr->cu_ptr->tx_depth = 0;
1399 : // Generate Split, Skip and intra mode contexts for the rate estimation
1400 811363 : coding_loop_context_generation(
1401 : context_ptr,
1402 : context_ptr->cu_ptr,
1403 811363 : context_ptr->cu_origin_x,
1404 811363 : context_ptr->cu_origin_y,
1405 : BLOCK_SIZE_64,
1406 : skip_coeff_neighbor_array,
1407 : inter_pred_dir_neighbor_array,
1408 : ref_frame_type_neighbor_array,
1409 : intra_luma_mode_neighbor_array,
1410 : skip_flag_neighbor_array,
1411 : mode_type_neighbor_array,
1412 : leaf_depth_neighbor_array,
1413 : leaf_partition_neighbor_array);
1414 109474000 : for (uint32_t index = 0; index < MAX_NFL_BUFF; ++index)
1415 108662000 : context_ptr->fast_cost_array[index] = MAX_CU_COST;
1416 811344 : return;
1417 : }
1418 :
1419 111293000 : void fast_loop_core(
1420 : ModeDecisionCandidateBuffer *candidate_buffer,
1421 : PictureControlSet *picture_control_set_ptr,
1422 : ModeDecisionContext *context_ptr,
1423 : EbPictureBufferDesc *input_picture_ptr,
1424 : uint32_t input_origin_index,
1425 : uint32_t input_cb_origin_index,
1426 : uint32_t input_cr_origin_index,
1427 : CodingUnit *cu_ptr,
1428 : uint32_t cu_origin_index,
1429 : uint32_t cu_chroma_origin_index,
1430 : EbBool use_ssd)
1431 : {
1432 : uint64_t lumaFastDistortion;
1433 : uint64_t chromaFastDistortion;
1434 :
1435 111293000 : ModeDecisionCandidate *candidate_ptr = candidate_buffer->candidate_ptr;
1436 111293000 : EbPictureBufferDesc *prediction_ptr = candidate_buffer->prediction_ptr;
1437 111293000 : context_ptr->pu_itr = 0;
1438 : // Prediction
1439 : // Set default interp_filters
1440 111293000 : candidate_buffer->candidate_ptr->interp_filters = (context_ptr->md_staging_use_bilinear) ? av1_make_interp_filters(BILINEAR, BILINEAR) : 0;
1441 111256000 : ProductPredictionFunTable[candidate_buffer->candidate_ptr->use_intrabc ? INTER_MODE : candidate_ptr->type](
1442 : context_ptr,
1443 : picture_control_set_ptr,
1444 : candidate_buffer);
1445 :
1446 : // Distortion
1447 : // Y
1448 111308000 : if (use_ssd) {
1449 0 : EbSpatialFullDistType spatial_full_dist_type_fun = context_ptr->hbd_mode_decision ?
1450 0 : full_distortion_kernel16_bits : spatial_full_distortion_kernel;
1451 :
1452 0 : candidate_buffer->candidate_ptr->luma_fast_distortion = (uint32_t)(lumaFastDistortion = spatial_full_dist_type_fun(
1453 : input_picture_ptr->buffer_y,
1454 : input_origin_index,
1455 0 : input_picture_ptr->stride_y,
1456 : prediction_ptr->buffer_y,
1457 : cu_origin_index,
1458 0 : prediction_ptr->stride_y,
1459 0 : context_ptr->blk_geom->bwidth,
1460 0 : context_ptr->blk_geom->bheight));
1461 : }
1462 : else {
1463 111308000 : assert((context_ptr->blk_geom->bwidth >> 3) < 17);
1464 111308000 : if (!context_ptr->hbd_mode_decision) {
1465 111411000 : candidate_buffer->candidate_ptr->luma_fast_distortion = (uint32_t)(lumaFastDistortion = nxm_sad_kernel_sub_sampled(
1466 111344000 : input_picture_ptr->buffer_y + input_origin_index,
1467 111344000 : input_picture_ptr->stride_y,
1468 111344000 : prediction_ptr->buffer_y + cu_origin_index,
1469 111344000 : prediction_ptr->stride_y,
1470 111344000 : context_ptr->blk_geom->bheight,
1471 111344000 : context_ptr->blk_geom->bwidth));
1472 : }
1473 : else {
1474 0 : candidate_buffer->candidate_ptr->luma_fast_distortion = (uint32_t)(lumaFastDistortion = sad_16b_kernel(
1475 0 : ((uint16_t *)input_picture_ptr->buffer_y) + input_origin_index,
1476 0 : input_picture_ptr->stride_y,
1477 0 : ((uint16_t *)prediction_ptr->buffer_y) + cu_origin_index,
1478 0 : prediction_ptr->stride_y,
1479 0 : context_ptr->blk_geom->bheight,
1480 0 : context_ptr->blk_geom->bwidth));
1481 : }
1482 : }
1483 :
1484 111411000 : if (context_ptr->blk_geom->has_uv && context_ptr->chroma_level <= CHROMA_MODE_1 && context_ptr->md_staging_skip_inter_chroma_pred == EB_FALSE) {
1485 6904200 : if (use_ssd) {
1486 0 : EbSpatialFullDistType spatial_full_dist_type_fun = context_ptr->hbd_mode_decision ?
1487 0 : full_distortion_kernel16_bits : spatial_full_distortion_kernel;
1488 :
1489 0 : chromaFastDistortion = spatial_full_dist_type_fun(
1490 : input_picture_ptr->buffer_cb,
1491 : input_cb_origin_index,
1492 0 : input_picture_ptr->stride_cb,
1493 0 : candidate_buffer->prediction_ptr->buffer_cb,
1494 : cu_chroma_origin_index,
1495 0 : prediction_ptr->stride_cb,
1496 0 : context_ptr->blk_geom->bwidth_uv,
1497 0 : context_ptr->blk_geom->bheight_uv);
1498 :
1499 0 : chromaFastDistortion += spatial_full_dist_type_fun(
1500 : input_picture_ptr->buffer_cr,
1501 : input_cr_origin_index,
1502 0 : input_picture_ptr->stride_cb,
1503 0 : candidate_buffer->prediction_ptr->buffer_cr,
1504 : cu_chroma_origin_index,
1505 0 : prediction_ptr->stride_cr,
1506 0 : context_ptr->blk_geom->bwidth_uv,
1507 0 : context_ptr->blk_geom->bheight_uv);
1508 : }
1509 : else {
1510 6904200 : assert((context_ptr->blk_geom->bwidth_uv >> 3) < 17);
1511 :
1512 6904200 : if (!context_ptr->hbd_mode_decision) {
1513 13808400 : chromaFastDistortion = nxm_sad_kernel_sub_sampled(
1514 6904230 : input_picture_ptr->buffer_cb + input_cb_origin_index,
1515 6904230 : input_picture_ptr->stride_cb,
1516 6904230 : candidate_buffer->prediction_ptr->buffer_cb + cu_chroma_origin_index,
1517 6904230 : prediction_ptr->stride_cb,
1518 6904230 : context_ptr->blk_geom->bheight_uv,
1519 6904230 : context_ptr->blk_geom->bwidth_uv);
1520 :
1521 6904070 : chromaFastDistortion += nxm_sad_kernel_sub_sampled(
1522 6904190 : input_picture_ptr->buffer_cr + input_cr_origin_index,
1523 6904190 : input_picture_ptr->stride_cr,
1524 6904190 : candidate_buffer->prediction_ptr->buffer_cr + cu_chroma_origin_index,
1525 6904190 : prediction_ptr->stride_cr,
1526 6904190 : context_ptr->blk_geom->bheight_uv,
1527 6904190 : context_ptr->blk_geom->bwidth_uv);
1528 : }
1529 : else {
1530 0 : chromaFastDistortion = sad_16b_kernel(
1531 0 : ((uint16_t *)input_picture_ptr->buffer_cb) + input_cb_origin_index,
1532 0 : input_picture_ptr->stride_cb,
1533 0 : ((uint16_t *)candidate_buffer->prediction_ptr->buffer_cb) + cu_chroma_origin_index,
1534 0 : prediction_ptr->stride_cb,
1535 0 : context_ptr->blk_geom->bheight_uv,
1536 0 : context_ptr->blk_geom->bwidth_uv);
1537 :
1538 15 : chromaFastDistortion += sad_16b_kernel(
1539 0 : ((uint16_t *)input_picture_ptr->buffer_cr) + input_cr_origin_index,
1540 0 : input_picture_ptr->stride_cr,
1541 0 : ((uint16_t *)candidate_buffer->prediction_ptr->buffer_cr) + cu_chroma_origin_index,
1542 0 : prediction_ptr->stride_cr,
1543 0 : context_ptr->blk_geom->bheight_uv,
1544 0 : context_ptr->blk_geom->bwidth_uv);
1545 : }
1546 : }
1547 : }
1548 : else
1549 104507000 : chromaFastDistortion = 0;
1550 : // Fast Cost
1551 445299000 : *(candidate_buffer->fast_cost_ptr) = Av1ProductFastCostFuncTable[candidate_ptr->type](
1552 : cu_ptr,
1553 111411000 : candidate_buffer->candidate_ptr,
1554 111411000 : cu_ptr->qp,
1555 : lumaFastDistortion,
1556 : chromaFastDistortion,
1557 111411000 : use_ssd ? context_ptr->full_lambda : context_ptr->fast_lambda,
1558 : use_ssd,
1559 : picture_control_set_ptr,
1560 111411000 : &(context_ptr->md_local_cu_unit[context_ptr->blk_geom->blkidx_mds].ed_ref_mv_stack[candidate_ptr->ref_frame_type][0]),
1561 : context_ptr->blk_geom,
1562 111411000 : context_ptr->cu_origin_y >> MI_SIZE_LOG2,
1563 111411000 : context_ptr->cu_origin_x >> MI_SIZE_LOG2,
1564 : 1,
1565 111411000 : context_ptr->intra_luma_left_mode,
1566 111411000 : context_ptr->intra_luma_top_mode);
1567 111066000 : }
1568 : #if REMOVE_MD_STAGE_1
1569 811174 : void set_md_stage_counts(
1570 : PictureControlSet *picture_control_set_ptr,
1571 : ModeDecisionContext *context_ptr,
1572 : uint32_t fastCandidateTotalCount)
1573 : {
1574 811174 : SequenceControlSet* scs = (SequenceControlSet*)(picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr);
1575 :
1576 : // Step 1: derive bypass_stage1 flags
1577 811174 : if (context_ptr->md_staging_mode == MD_STAGING_MODE_1)
1578 735751 : memset(context_ptr->bypass_md_stage_1, EB_FALSE, CAND_CLASS_TOTAL);
1579 : else
1580 75423 : memset(context_ptr->bypass_md_stage_1, EB_TRUE, CAND_CLASS_TOTAL);
1581 :
1582 : // Step 2: set md_stage count
1583 811174 : context_ptr->md_stage_1_count[CAND_CLASS_0] = (picture_control_set_ptr->slice_type == I_SLICE) ? fastCandidateTotalCount : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? INTRA_NFL : (INTRA_NFL >> 1);
1584 811174 : context_ptr->md_stage_1_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? INTER_NEW_NFL : (INTER_NEW_NFL >> 1);
1585 811174 : context_ptr->md_stage_1_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? INTER_PRED_NFL : (INTER_PRED_NFL >> 1);
1586 811174 : context_ptr->md_stage_1_count[CAND_CLASS_3] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? INTER_PRED_NFL : (INTER_PRED_NFL >> 1);
1587 : #if II_COMP_FLAG
1588 811174 : context_ptr->md_stage_1_count[CAND_CLASS_4] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 14 : 6;
1589 : #endif
1590 : #if OBMC_FLAG
1591 811174 : context_ptr->md_stage_1_count[CAND_CLASS_5] = 16;
1592 : #endif
1593 : #if FILTER_INTRA_FLAG
1594 811174 : context_ptr->md_stage_1_count[CAND_CLASS_6] = (picture_control_set_ptr->temporal_layer_index == 0) ? 10 : 5;
1595 : #endif
1596 : #if PAL_CLASS
1597 811174 : context_ptr->md_stage_1_count[CAND_CLASS_7] = 12;
1598 : #endif
1599 811174 : context_ptr->md_stage_1_count[CAND_CLASS_8] = (picture_control_set_ptr->temporal_layer_index == 0) ? 5 : 4;
1600 811174 : if (context_ptr->combine_class12) {
1601 75636 : context_ptr->md_stage_1_count[CAND_CLASS_1] = context_ptr->md_stage_1_count[CAND_CLASS_1] * 2;
1602 : }
1603 811174 : if (picture_control_set_ptr->enc_mode >= ENC_M2) {
1604 75640 : context_ptr->md_stage_1_count[CAND_CLASS_1] = context_ptr->md_stage_1_count[CAND_CLASS_1] / 2;
1605 75640 : context_ptr->md_stage_1_count[CAND_CLASS_2] = context_ptr->md_stage_1_count[CAND_CLASS_2] / 2;
1606 75640 : context_ptr->md_stage_1_count[CAND_CLASS_3] = context_ptr->md_stage_1_count[CAND_CLASS_3] / 2;
1607 : }
1608 :
1609 :
1610 811174 : context_ptr->md_stage_2_count[CAND_CLASS_0] = (picture_control_set_ptr->slice_type == I_SLICE) ? 10 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? ((scs->input_resolution >= INPUT_SIZE_1080i_RANGE) ? 7 : 10) : 4;
1611 : #if FILTER_INTRA_FLAG
1612 811174 : context_ptr->md_stage_2_count[CAND_CLASS_6] = (picture_control_set_ptr->temporal_layer_index == 0) ? 5 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 3 : 2;
1613 : #endif
1614 : #if PAL_CLASS
1615 811174 : if (picture_control_set_ptr->parent_pcs_ptr->palette_mode == 1)
1616 0 : context_ptr->md_stage_2_count[CAND_CLASS_7] =
1617 0 : (picture_control_set_ptr->temporal_layer_index == 0) ? 7 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 4 : 4;
1618 811174 : else if (picture_control_set_ptr->parent_pcs_ptr->palette_mode == 2 || picture_control_set_ptr->parent_pcs_ptr->palette_mode == 3)
1619 0 : context_ptr->md_stage_2_count[CAND_CLASS_7] =
1620 0 : (picture_control_set_ptr->temporal_layer_index == 0) ? 7 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 2;
1621 811411 : else if (picture_control_set_ptr->parent_pcs_ptr->palette_mode == 4 || picture_control_set_ptr->parent_pcs_ptr->palette_mode == 5)
1622 4 : context_ptr->md_stage_2_count[CAND_CLASS_7] =
1623 4 : (picture_control_set_ptr->temporal_layer_index == 0) ? 4 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 1;
1624 : else
1625 811407 : context_ptr->md_stage_2_count[CAND_CLASS_7] =
1626 811407 : (picture_control_set_ptr->temporal_layer_index == 0) ? 2 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 1 : 1;
1627 : #endif
1628 811174 : context_ptr->md_stage_2_count[CAND_CLASS_8] = (picture_control_set_ptr->temporal_layer_index == 0) ? 4 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 3 : 2;
1629 : #if REMOVE_MD_STAGE_1
1630 811174 : context_ptr->md_stage_2_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 6 : 3;
1631 811174 : context_ptr->md_stage_2_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 6 : 3;
1632 811174 : context_ptr->md_stage_2_count[CAND_CLASS_3] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 6 : 3;
1633 : #else
1634 : context_ptr->md_stage_2_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 3 : 1;
1635 : context_ptr->md_stage_2_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 3 : 1;
1636 : context_ptr->md_stage_2_count[CAND_CLASS_3] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 3 : 1;
1637 : #endif
1638 : #if II_COMP_FLAG
1639 811174 : context_ptr->md_stage_2_count[CAND_CLASS_4] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 12 : 4;// 14 : 4;
1640 : #endif
1641 : #if OBMC_FLAG
1642 811174 : if (picture_control_set_ptr->parent_pcs_ptr->pic_obmc_mode == 1)
1643 0 : context_ptr->md_stage_2_count[CAND_CLASS_5] = 14;
1644 811174 : else if (picture_control_set_ptr->parent_pcs_ptr->pic_obmc_mode <= 3)
1645 811411 : context_ptr->md_stage_2_count[CAND_CLASS_5] = (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 12 : 4;
1646 : else
1647 0 : context_ptr->md_stage_2_count[CAND_CLASS_5] = (picture_control_set_ptr->temporal_layer_index == 0) ? 12 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 8 : 4;
1648 : #endif
1649 :
1650 811174 : if (context_ptr->combine_class12) {
1651 75645 : context_ptr->md_stage_2_count[CAND_CLASS_1] = context_ptr->md_stage_2_count[CAND_CLASS_1] * 2;
1652 : }
1653 :
1654 811174 : if (!context_ptr->combine_class12 && picture_control_set_ptr->parent_pcs_ptr->sc_content_detected && picture_control_set_ptr->enc_mode == ENC_M0) {
1655 0 : context_ptr->md_stage_2_count[CAND_CLASS_0] = (picture_control_set_ptr->slice_type == I_SLICE) ? 10 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 8 : 4;
1656 : #if REMOVE_MD_STAGE_1
1657 0 : context_ptr->md_stage_2_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 12 : 6;
1658 0 : context_ptr->md_stage_2_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 12 : 6;
1659 0 : context_ptr->md_stage_2_count[CAND_CLASS_3] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 12 : 6;
1660 : #else
1661 : context_ptr->md_stage_2_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 8 : 4;
1662 : context_ptr->md_stage_2_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 8 : 4;
1663 : context_ptr->md_stage_2_count[CAND_CLASS_3] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 8 : 4;
1664 : #endif
1665 : }
1666 :
1667 811174 : if (picture_control_set_ptr->enc_mode >= ENC_M1)
1668 75644 : context_ptr->md_stage_2_count[CAND_CLASS_0] = (picture_control_set_ptr->slice_type == I_SLICE) ? 10 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 4 : 1;
1669 :
1670 811174 : if (picture_control_set_ptr->enc_mode >= ENC_M2 && picture_control_set_ptr->enc_mode <= ENC_M4) {
1671 0 : context_ptr->md_stage_2_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 4 : 2;
1672 0 : context_ptr->md_stage_2_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 1;
1673 0 : if (!context_ptr->combine_class12) {
1674 0 : context_ptr->md_stage_2_count[CAND_CLASS_1] = context_ptr->md_stage_2_count[CAND_CLASS_1] / 2;
1675 0 : context_ptr->md_stage_2_count[CAND_CLASS_3] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 1;
1676 : }
1677 : }
1678 811174 : else if (picture_control_set_ptr->enc_mode >= ENC_M5) {
1679 75643 : if (picture_control_set_ptr->enc_mode <= ENC_M6) {
1680 0 : context_ptr->md_stage_1_count[CAND_CLASS_0] = (picture_control_set_ptr->slice_type == I_SLICE) ? 8 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 3 : 1;
1681 :
1682 0 : if (context_ptr->combine_class12) {
1683 0 : context_ptr->md_stage_1_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 5 : 3;
1684 0 : context_ptr->md_stage_1_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 1 : 1;
1685 :
1686 : }
1687 : else {
1688 :
1689 0 : context_ptr->md_stage_1_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 1;
1690 0 : context_ptr->md_stage_1_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 1;
1691 0 : context_ptr->md_stage_1_count[CAND_CLASS_3] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 1 : 1;
1692 : }
1693 :
1694 0 : context_ptr->md_stage_2_count[CAND_CLASS_0] = context_ptr->md_stage_1_count[CAND_CLASS_0];
1695 0 : context_ptr->md_stage_2_count[CAND_CLASS_1] = context_ptr->md_stage_1_count[CAND_CLASS_1];
1696 0 : context_ptr->md_stage_2_count[CAND_CLASS_2] = context_ptr->md_stage_1_count[CAND_CLASS_2];
1697 0 : if (!context_ptr->combine_class12)
1698 0 : context_ptr->md_stage_2_count[CAND_CLASS_3] = context_ptr->md_stage_1_count[CAND_CLASS_3];
1699 : }
1700 : else {
1701 75643 : context_ptr->md_stage_1_count[CAND_CLASS_0] = (picture_control_set_ptr->slice_type == I_SLICE) ? 6 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 1;
1702 75643 : if (context_ptr->combine_class12) {
1703 75634 : context_ptr->md_stage_1_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 4 : 2;
1704 75634 : context_ptr->md_stage_1_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 1 : 1;
1705 :
1706 : }
1707 : else {
1708 :
1709 9 : context_ptr->md_stage_1_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 1;
1710 9 : context_ptr->md_stage_1_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 1;
1711 9 : context_ptr->md_stage_1_count[CAND_CLASS_3] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 1 : 1;
1712 : }
1713 75643 : context_ptr->md_stage_2_count[CAND_CLASS_0] = context_ptr->md_stage_1_count[CAND_CLASS_0];
1714 75643 : context_ptr->md_stage_2_count[CAND_CLASS_1] = context_ptr->md_stage_1_count[CAND_CLASS_1];
1715 75643 : context_ptr->md_stage_2_count[CAND_CLASS_2] = context_ptr->md_stage_1_count[CAND_CLASS_2];
1716 75643 : if (!context_ptr->combine_class12)
1717 0 : context_ptr->md_stage_2_count[CAND_CLASS_3] = context_ptr->md_stage_1_count[CAND_CLASS_3];
1718 : }
1719 : }
1720 :
1721 : // Step 3: update count for md_stage_1 and d_stage_2 if bypassed (no NIC setting should be done beyond this point)
1722 811174 : context_ptr->md_stage_2_count[CAND_CLASS_0] = context_ptr->bypass_md_stage_1[CAND_CLASS_0] ? context_ptr->md_stage_1_count[CAND_CLASS_0] : context_ptr->md_stage_2_count[CAND_CLASS_0];
1723 811174 : context_ptr->md_stage_2_count[CAND_CLASS_1] = context_ptr->bypass_md_stage_1[CAND_CLASS_1] ? context_ptr->md_stage_1_count[CAND_CLASS_1] : context_ptr->md_stage_2_count[CAND_CLASS_1];
1724 811174 : context_ptr->md_stage_2_count[CAND_CLASS_2] = context_ptr->bypass_md_stage_1[CAND_CLASS_2] ? context_ptr->md_stage_1_count[CAND_CLASS_2] : context_ptr->md_stage_2_count[CAND_CLASS_2];
1725 811174 : context_ptr->md_stage_2_count[CAND_CLASS_3] = context_ptr->bypass_md_stage_1[CAND_CLASS_3] ? context_ptr->md_stage_1_count[CAND_CLASS_3] : context_ptr->md_stage_2_count[CAND_CLASS_3];
1726 811174 : context_ptr->md_stage_2_count[CAND_CLASS_4] = context_ptr->bypass_md_stage_1[CAND_CLASS_4] ? context_ptr->md_stage_1_count[CAND_CLASS_4] : context_ptr->md_stage_2_count[CAND_CLASS_4];
1727 811174 : context_ptr->md_stage_2_count[CAND_CLASS_5] = context_ptr->bypass_md_stage_1[CAND_CLASS_5] ? context_ptr->md_stage_1_count[CAND_CLASS_5] : context_ptr->md_stage_2_count[CAND_CLASS_5];
1728 811174 : context_ptr->md_stage_2_count[CAND_CLASS_6] = context_ptr->bypass_md_stage_1[CAND_CLASS_6] ? context_ptr->md_stage_1_count[CAND_CLASS_6] : context_ptr->md_stage_2_count[CAND_CLASS_6];
1729 811174 : context_ptr->md_stage_2_count[CAND_CLASS_8] = context_ptr->bypass_md_stage_1[CAND_CLASS_8] ? context_ptr->md_stage_1_count[CAND_CLASS_8] : context_ptr->md_stage_2_count[CAND_CLASS_8];
1730 :
1731 :
1732 : #if PAL_CLASS
1733 : //TODO: use actual number of stages on the setting section and update using the following logic.
1734 : // stage1_cand_count[CAND_CLASS_i] = bypass_stage1 ? stage2_cand_count[CAND_CLASS_i] : stage1_cand_count[CAND_CLASS_i];
1735 811174 : context_ptr->md_stage_2_count[CAND_CLASS_7] = context_ptr->bypass_md_stage_1[CAND_CLASS_7] ? context_ptr->md_stage_1_count[CAND_CLASS_7] : context_ptr->md_stage_2_count[CAND_CLASS_7];
1736 : #endif
1737 :
1738 : // Step 4: zero-out count for CAND_CLASS_3 if CAND_CLASS_1 and CAND_CLASS_2 are merged (i.e. shift to the left)
1739 811174 : if (context_ptr->combine_class12)
1740 75646 : context_ptr->md_stage_1_count[CAND_CLASS_3] = context_ptr->md_stage_2_count[CAND_CLASS_3] = 0;
1741 811174 : }
1742 : #else
1743 : void set_md_stage_counts(
1744 : PictureControlSet *picture_control_set_ptr,
1745 : ModeDecisionContext *context_ptr,
1746 : uint32_t fastCandidateTotalCount)
1747 : {
1748 : SequenceControlSet* scs = (SequenceControlSet*)(picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr);
1749 : // Step 0: derive bypass_stage1 flags
1750 : if (context_ptr->md_staging_mode) {
1751 : context_ptr->bypass_stage1[CAND_CLASS_0] = EB_TRUE;
1752 : #if FILTER_INTRA_FLAG
1753 : context_ptr->bypass_stage1[CAND_CLASS_6] = EB_TRUE;
1754 : #endif
1755 : #if PAL_CLASS
1756 : context_ptr->bypass_stage1[CAND_CLASS_7] = EB_TRUE;
1757 : #endif
1758 : context_ptr->bypass_stage1[CAND_CLASS_1] = EB_FALSE;
1759 : context_ptr->bypass_stage1[CAND_CLASS_2] = EB_FALSE;
1760 : context_ptr->bypass_stage1[CAND_CLASS_3] = context_ptr->combine_class12 ? EB_TRUE : EB_FALSE;
1761 : #if II_COMP_FLAG
1762 : context_ptr->bypass_stage1[CAND_CLASS_4] = EB_FALSE;
1763 : #endif
1764 : #if OBMC_FLAG
1765 : context_ptr->bypass_stage1[CAND_CLASS_5] = EB_FALSE;
1766 : #endif
1767 : context_ptr->bypass_stage1[CAND_CLASS_8] = EB_FALSE;
1768 : }
1769 : else
1770 : memset(context_ptr->bypass_stage1, EB_TRUE, CAND_CLASS_TOTAL);
1771 : // Step 1: derive bypass_stage1 flags
1772 : if (context_ptr->md_staging_mode)
1773 : {
1774 : context_ptr->bypass_stage2[CAND_CLASS_0] = EB_FALSE;
1775 : #if FILTER_INTRA_FLAG
1776 : context_ptr->bypass_stage2[CAND_CLASS_6] = EB_FALSE;
1777 : #endif
1778 : #if PAL_CLASS
1779 : context_ptr->bypass_stage2[CAND_CLASS_7] = EB_FALSE;
1780 : #endif
1781 : if (context_ptr->md_staging_mode == MD_STAGING_MODE_2 || context_ptr->md_staging_mode == MD_STAGING_MODE_3) {
1782 : context_ptr->bypass_stage2[CAND_CLASS_1] = EB_FALSE;
1783 : context_ptr->bypass_stage2[CAND_CLASS_2] = EB_FALSE;
1784 : context_ptr->bypass_stage2[CAND_CLASS_3] = context_ptr->combine_class12 ? EB_TRUE : EB_FALSE;
1785 : }
1786 : else {
1787 : context_ptr->bypass_stage2[CAND_CLASS_1] = EB_TRUE;
1788 : context_ptr->bypass_stage2[CAND_CLASS_2] = EB_TRUE;
1789 : context_ptr->bypass_stage2[CAND_CLASS_3] = EB_TRUE;
1790 : }
1791 : #if II_COMP_FLAG
1792 : context_ptr->bypass_stage2[CAND_CLASS_4] = EB_TRUE;
1793 : #endif
1794 : #if OBMC_FLAG
1795 : context_ptr->bypass_stage2[CAND_CLASS_5] = EB_TRUE;
1796 : #endif
1797 : context_ptr->bypass_stage2[CAND_CLASS_8] = EB_TRUE;
1798 : }
1799 : else
1800 : memset(context_ptr->bypass_stage2, EB_TRUE, CAND_CLASS_TOTAL);
1801 : // Step 2: set md_stage count
1802 : context_ptr->md_stage_1_count[CAND_CLASS_0] = (picture_control_set_ptr->slice_type == I_SLICE) ? fastCandidateTotalCount : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? INTRA_NFL : (INTRA_NFL >> 1);
1803 : #if FILTER_INTRA_FLAG
1804 : context_ptr->md_stage_1_count[CAND_CLASS_6] = 5;
1805 : #endif
1806 : #if PAL_CLASS
1807 : context_ptr->md_stage_1_count[CAND_CLASS_7] = 14;
1808 : #endif
1809 : context_ptr->md_stage_1_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? INTER_NEW_NFL : (INTER_NEW_NFL >> 1);
1810 : context_ptr->md_stage_1_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? INTER_PRED_NFL : (INTER_PRED_NFL >> 1);
1811 :
1812 : if (context_ptr->combine_class12) {
1813 : context_ptr->md_stage_1_count[CAND_CLASS_1] = context_ptr->md_stage_1_count[CAND_CLASS_1] * 2;
1814 : }
1815 : else {
1816 : context_ptr->md_stage_1_count[CAND_CLASS_3] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? INTER_PRED_NFL : (INTER_PRED_NFL >> 1);
1817 : }
1818 :
1819 : #if II_COMP_FLAG
1820 : context_ptr->md_stage_1_count[CAND_CLASS_4] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 14 :6;// INTER_PRED_NFL: (INTER_PRED_NFL >> 1);
1821 : #endif
1822 : #if OBMC_FLAG
1823 : if (picture_control_set_ptr->parent_pcs_ptr->pic_obmc_mode == 1)
1824 : context_ptr->md_stage_1_count[CAND_CLASS_5] = 16 ;
1825 : else if (picture_control_set_ptr->parent_pcs_ptr->pic_obmc_mode <= 3)
1826 : context_ptr->md_stage_1_count[CAND_CLASS_5] = (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 12 : 4;
1827 : else
1828 : context_ptr->md_stage_1_count[CAND_CLASS_5] = (picture_control_set_ptr->temporal_layer_index == 0 ) ? 12 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 8: 4;
1829 : #endif
1830 : context_ptr->md_stage_1_count[CAND_CLASS_8] = (picture_control_set_ptr->temporal_layer_index == 0) ? 5 : 4;
1831 : if (picture_control_set_ptr->enc_mode >= ENC_M2) {
1832 : context_ptr->md_stage_1_count[CAND_CLASS_1] = context_ptr->md_stage_1_count[CAND_CLASS_1] / 2;
1833 : context_ptr->md_stage_1_count[CAND_CLASS_2] = context_ptr->md_stage_1_count[CAND_CLASS_2] / 2;
1834 :
1835 : if (!context_ptr->combine_class12)
1836 : context_ptr->md_stage_1_count[CAND_CLASS_3] = context_ptr->md_stage_1_count[CAND_CLASS_3] / 2;
1837 : }
1838 :
1839 : context_ptr->md_stage_2_count[CAND_CLASS_0] = (picture_control_set_ptr->slice_type == I_SLICE) ? fastCandidateTotalCount : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? INTRA_NFL : (INTRA_NFL >> 1);
1840 : #if FILTER_INTRA_FLAG
1841 : context_ptr->md_stage_2_count[CAND_CLASS_6] = 5;
1842 : #endif
1843 : #if PAL_CLASS
1844 : context_ptr->md_stage_2_count[CAND_CLASS_7] = 14;// context_ptr->bypass_stage1[CAND_CLASS_7] ? context_ptr->md_stage_1_count[CAND_CLASS_7] : 14;
1845 : #endif
1846 : context_ptr->md_stage_2_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 14 : 4;
1847 : context_ptr->md_stage_2_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 14 : 4;
1848 :
1849 : if (context_ptr->combine_class12) {
1850 : context_ptr->md_stage_2_count[CAND_CLASS_1] = context_ptr->md_stage_2_count[CAND_CLASS_1] * 2;
1851 : }
1852 : else {
1853 : context_ptr->md_stage_2_count[CAND_CLASS_3] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 14 : 4;
1854 : }
1855 :
1856 : #if II_COMP_FLAG
1857 : context_ptr->md_stage_2_count[CAND_CLASS_4] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 12: 4;// 14 : 4;
1858 : #endif
1859 : #if OBMC_FLAG
1860 : context_ptr->md_stage_2_count[CAND_CLASS_5] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : 16;
1861 : #endif
1862 : context_ptr->md_stage_2_count[CAND_CLASS_8] = (picture_control_set_ptr->temporal_layer_index == 0) ? 5 : 4;
1863 :
1864 :
1865 : if (picture_control_set_ptr->enc_mode >= ENC_M2) {
1866 : context_ptr->md_stage_2_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 12 : 3;
1867 : context_ptr->md_stage_2_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 4 : 2;
1868 : if (!context_ptr->combine_class12) {
1869 : context_ptr->md_stage_2_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 6 : 2;
1870 : context_ptr->md_stage_2_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 6 : 2;
1871 : context_ptr->md_stage_2_count[CAND_CLASS_3] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 4 : 2;
1872 : }
1873 : }
1874 :
1875 : if (picture_control_set_ptr->enc_mode >= ENC_M1)
1876 : context_ptr->md_stage_3_count[CAND_CLASS_0] = (picture_control_set_ptr->slice_type == I_SLICE) ? 10 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 4 : 1;
1877 : else
1878 : context_ptr->md_stage_3_count[CAND_CLASS_0] = (picture_control_set_ptr->slice_type == I_SLICE) ? 10 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? ((scs->input_resolution >= INPUT_SIZE_1080i_RANGE) ? 7 : 10) : 4;
1879 : #if FILTER_INTRA_FLAG
1880 : context_ptr->md_stage_3_count[CAND_CLASS_6] = (picture_control_set_ptr->temporal_layer_index == 0) ? 5 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 3 : 2;
1881 : context_ptr->md_stage_3_count[CAND_CLASS_6] = (picture_control_set_ptr->temporal_layer_index == 0) ? 5 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 3 : 2;
1882 : #endif
1883 : #if PAL_CLASS
1884 : if (picture_control_set_ptr->parent_pcs_ptr->palette_mode == 1)
1885 : context_ptr->md_stage_3_count[CAND_CLASS_7] =
1886 : (picture_control_set_ptr->temporal_layer_index == 0) ? 7 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 4 : 4;
1887 : else if (picture_control_set_ptr->parent_pcs_ptr->palette_mode == 2 || picture_control_set_ptr->parent_pcs_ptr->palette_mode == 3)
1888 : context_ptr->md_stage_3_count[CAND_CLASS_7] =
1889 : (picture_control_set_ptr->temporal_layer_index == 0) ? 7 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 2;
1890 : else if (picture_control_set_ptr->parent_pcs_ptr->palette_mode == 4 || picture_control_set_ptr->parent_pcs_ptr->palette_mode == 5)
1891 : context_ptr->md_stage_3_count[CAND_CLASS_7] =
1892 : (picture_control_set_ptr->temporal_layer_index == 0) ? 4 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 1;
1893 : else
1894 : context_ptr->md_stage_3_count[CAND_CLASS_7] =
1895 : (picture_control_set_ptr->temporal_layer_index == 0) ? 2 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 1 : 1;
1896 : #endif
1897 :
1898 : context_ptr->md_stage_3_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 3 : 1;
1899 : context_ptr->md_stage_3_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 3 : 1;
1900 :
1901 : if (context_ptr->combine_class12) {
1902 : context_ptr->md_stage_3_count[CAND_CLASS_1] = context_ptr->md_stage_3_count[CAND_CLASS_1] * 2;
1903 : }
1904 : else {
1905 : context_ptr->md_stage_3_count[CAND_CLASS_3] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 3 : 1;
1906 : }
1907 :
1908 : #if II_COMP_FLAG
1909 : context_ptr->md_stage_3_count[CAND_CLASS_4] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 12 : 4;// 14 : 4;
1910 : #endif
1911 : #if OBMC_FLAG
1912 : if (picture_control_set_ptr->parent_pcs_ptr->pic_obmc_mode == 1)
1913 : context_ptr->md_stage_3_count[CAND_CLASS_5] = 16 ;
1914 : else if (picture_control_set_ptr->parent_pcs_ptr->pic_obmc_mode <= 3)
1915 : context_ptr->md_stage_3_count[CAND_CLASS_5] = (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 12 : 4;
1916 : else
1917 : context_ptr->md_stage_3_count[CAND_CLASS_5] = (picture_control_set_ptr->temporal_layer_index == 0 ) ? 12 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 8: 4;
1918 : #endif
1919 : context_ptr->md_stage_3_count[CAND_CLASS_8] = (picture_control_set_ptr->temporal_layer_index == 0) ? 4 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 3 : 2;
1920 :
1921 : if (!context_ptr->combine_class12 && picture_control_set_ptr->parent_pcs_ptr->sc_content_detected && picture_control_set_ptr->enc_mode == ENC_M0) {
1922 :
1923 : context_ptr->md_stage_2_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 16 : 8;
1924 : context_ptr->md_stage_2_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 16 : 8;
1925 : context_ptr->md_stage_2_count[CAND_CLASS_3] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 16 : 8;
1926 :
1927 : context_ptr->md_stage_3_count[CAND_CLASS_0] = (picture_control_set_ptr->slice_type == I_SLICE) ? 10 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 8 : 4;
1928 : context_ptr->md_stage_3_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 8 : 4;
1929 : context_ptr->md_stage_3_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 8 : 4;
1930 : context_ptr->md_stage_3_count[CAND_CLASS_3] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 8 : 4;
1931 : }
1932 :
1933 : if (picture_control_set_ptr->enc_mode >= ENC_M2 && picture_control_set_ptr->enc_mode <= ENC_M4) {
1934 : context_ptr->md_stage_3_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 4 : 2;
1935 : context_ptr->md_stage_3_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 1;
1936 :
1937 : if (!context_ptr->combine_class12) {
1938 : context_ptr->md_stage_3_count[CAND_CLASS_1] = context_ptr->md_stage_3_count[CAND_CLASS_1] / 2;
1939 : context_ptr->md_stage_3_count[CAND_CLASS_3] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 1;
1940 : }
1941 : }
1942 : else if (picture_control_set_ptr->enc_mode >= ENC_M5) {
1943 : if (context_ptr->md_staging_mode == MD_STAGING_MODE_0 && picture_control_set_ptr->enc_mode <= ENC_M6) {
1944 : context_ptr->md_stage_1_count[CAND_CLASS_0] = (picture_control_set_ptr->slice_type == I_SLICE) ? 8 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 3 : 1;
1945 :
1946 : if (context_ptr->combine_class12) {
1947 : context_ptr->md_stage_1_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 5 : 3;
1948 : context_ptr->md_stage_1_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 1 : 1;
1949 :
1950 : }
1951 : else {
1952 : context_ptr->md_stage_1_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 1;
1953 : context_ptr->md_stage_1_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 1;
1954 : context_ptr->md_stage_1_count[CAND_CLASS_3] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 1 : 1;
1955 : }
1956 :
1957 : context_ptr->md_stage_2_count[CAND_CLASS_0] = context_ptr->md_stage_1_count[CAND_CLASS_0];
1958 : context_ptr->md_stage_2_count[CAND_CLASS_1] = context_ptr->md_stage_1_count[CAND_CLASS_1];
1959 : context_ptr->md_stage_2_count[CAND_CLASS_2] = context_ptr->md_stage_1_count[CAND_CLASS_2];
1960 :
1961 : if (!context_ptr->combine_class12)
1962 : context_ptr->md_stage_2_count[CAND_CLASS_3] = context_ptr->md_stage_1_count[CAND_CLASS_3];
1963 :
1964 : context_ptr->md_stage_3_count[CAND_CLASS_0] = context_ptr->md_stage_2_count[CAND_CLASS_0];
1965 : context_ptr->md_stage_3_count[CAND_CLASS_1] = context_ptr->md_stage_2_count[CAND_CLASS_1];
1966 : context_ptr->md_stage_3_count[CAND_CLASS_2] = context_ptr->md_stage_2_count[CAND_CLASS_2];
1967 : if (!context_ptr->combine_class12)
1968 : context_ptr->md_stage_3_count[CAND_CLASS_3] = context_ptr->md_stage_2_count[CAND_CLASS_3];
1969 : }
1970 : else {
1971 : context_ptr->md_stage_1_count[CAND_CLASS_0] = (picture_control_set_ptr->slice_type == I_SLICE) ? 6 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 1;
1972 :
1973 : if (context_ptr->combine_class12) {
1974 : context_ptr->md_stage_1_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 4 : 2;
1975 : context_ptr->md_stage_1_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 1 : 1;
1976 :
1977 : }
1978 : else {
1979 : context_ptr->md_stage_1_count[CAND_CLASS_1] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 1;
1980 : context_ptr->md_stage_1_count[CAND_CLASS_2] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 2 : 1;
1981 : context_ptr->md_stage_1_count[CAND_CLASS_3] = (picture_control_set_ptr->slice_type == I_SLICE) ? 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag) ? 1 : 1;
1982 :
1983 : }
1984 :
1985 : context_ptr->md_stage_2_count[CAND_CLASS_0] = context_ptr->md_stage_1_count[CAND_CLASS_0];
1986 : context_ptr->md_stage_2_count[CAND_CLASS_1] = context_ptr->md_stage_1_count[CAND_CLASS_1];
1987 : context_ptr->md_stage_2_count[CAND_CLASS_2] = context_ptr->md_stage_1_count[CAND_CLASS_2];
1988 :
1989 : if (!context_ptr->combine_class12)
1990 : context_ptr->md_stage_2_count[CAND_CLASS_3] = context_ptr->md_stage_1_count[CAND_CLASS_3];
1991 :
1992 : context_ptr->md_stage_3_count[CAND_CLASS_0] = context_ptr->md_stage_2_count[CAND_CLASS_0];
1993 : context_ptr->md_stage_3_count[CAND_CLASS_1] = context_ptr->md_stage_2_count[CAND_CLASS_1];
1994 : context_ptr->md_stage_3_count[CAND_CLASS_2] = context_ptr->md_stage_2_count[CAND_CLASS_2];
1995 :
1996 : if (!context_ptr->combine_class12)
1997 : context_ptr->md_stage_3_count[CAND_CLASS_3] = context_ptr->md_stage_2_count[CAND_CLASS_3];
1998 : }
1999 : }
2000 :
2001 : // Step 3: update count for md_stage_1 and d_stage_2 if bypassed (no NIC setting should be done beyond this point)
2002 : context_ptr->md_stage_2_count[CAND_CLASS_0] = context_ptr->bypass_stage1[CAND_CLASS_0] ? context_ptr->md_stage_1_count[CAND_CLASS_0] : context_ptr->md_stage_2_count[CAND_CLASS_0];
2003 : context_ptr->md_stage_2_count[CAND_CLASS_1] = context_ptr->bypass_stage1[CAND_CLASS_1] ? context_ptr->md_stage_1_count[CAND_CLASS_1] : context_ptr->md_stage_2_count[CAND_CLASS_1];
2004 : context_ptr->md_stage_2_count[CAND_CLASS_2] = context_ptr->bypass_stage1[CAND_CLASS_2] ? context_ptr->md_stage_1_count[CAND_CLASS_2] : context_ptr->md_stage_2_count[CAND_CLASS_2];
2005 : context_ptr->md_stage_2_count[CAND_CLASS_3] = context_ptr->bypass_stage1[CAND_CLASS_3] ? context_ptr->md_stage_1_count[CAND_CLASS_3] : context_ptr->md_stage_2_count[CAND_CLASS_3];
2006 :
2007 : context_ptr->md_stage_3_count[CAND_CLASS_0] = context_ptr->bypass_stage2[CAND_CLASS_0] ? context_ptr->md_stage_2_count[CAND_CLASS_0] : context_ptr->md_stage_3_count[CAND_CLASS_0];
2008 : context_ptr->md_stage_3_count[CAND_CLASS_1] = context_ptr->bypass_stage2[CAND_CLASS_1] ? context_ptr->md_stage_2_count[CAND_CLASS_1] : context_ptr->md_stage_3_count[CAND_CLASS_1];
2009 : context_ptr->md_stage_3_count[CAND_CLASS_2] = context_ptr->bypass_stage2[CAND_CLASS_2] ? context_ptr->md_stage_2_count[CAND_CLASS_2] : context_ptr->md_stage_3_count[CAND_CLASS_2];
2010 : context_ptr->md_stage_3_count[CAND_CLASS_3] = context_ptr->bypass_stage2[CAND_CLASS_3] ? context_ptr->md_stage_2_count[CAND_CLASS_3] : context_ptr->md_stage_3_count[CAND_CLASS_3];
2011 : //TODO: use actual number of stages on the setting section and update using the following logic.
2012 : // stage2_cand_count[CAND_CLASS_i] = bypass_stage2 ? stage3_cand_count[CAND_CLASS_i] : stage2_cand_count[CAND_CLASS_i];
2013 : // stage1_cand_count[CAND_CLASS_i] = bypass_stage1 ? stage2_cand_count[CAND_CLASS_i] : stage1_cand_count[CAND_CLASS_i];
2014 :
2015 :
2016 : #if PAL_CLASS //THIS SHOULD BE rEMOVED AFTER REBAS~~~
2017 : context_ptr->md_stage_2_count[CAND_CLASS_7] = context_ptr->bypass_stage1[CAND_CLASS_7] ? context_ptr->md_stage_1_count[CAND_CLASS_7] : context_ptr->md_stage_2_count[CAND_CLASS_7];
2018 : context_ptr->md_stage_3_count[CAND_CLASS_7] = context_ptr->bypass_stage2[CAND_CLASS_7] ? context_ptr->md_stage_2_count[CAND_CLASS_7] : context_ptr->md_stage_3_count[CAND_CLASS_7];
2019 : #endif
2020 :
2021 : // Step 4: zero-out count for CAND_CLASS_3 if CAND_CLASS_1 and CAND_CLASS_2 are merged (i.e. shift to the left)
2022 : if (context_ptr->combine_class12)
2023 : context_ptr->md_stage_1_count[CAND_CLASS_3] = context_ptr->md_stage_2_count[CAND_CLASS_3] = context_ptr->md_stage_3_count[CAND_CLASS_3] = 0;
2024 : }
2025 : #endif
2026 3305680 : void sort_stage0_fast_candidates(
2027 : struct ModeDecisionContext *context_ptr,
2028 : uint32_t input_buffer_start_idx,
2029 : uint32_t input_buffer_count, //how many cand buffers to sort. one of the buffer can have max cost.
2030 : uint32_t *cand_buff_indices
2031 : )
2032 : {
2033 3305680 : ModeDecisionCandidateBuffer **buffer_ptr_array = context_ptr->candidate_buffer_ptr_array;
2034 : #if !SPEED_OPT
2035 : // fill cand_buff_indices with surviving buffer indices ; move the scratch candidates (MAX_CU_COST) to the last spots (if any)
2036 : uint32_t ordered_start_idx = 0;
2037 : uint32_t ordered_end_idx = input_buffer_count - 1;
2038 : #endif
2039 :
2040 3305680 : uint32_t input_buffer_end_idx = input_buffer_start_idx + input_buffer_count - 1;
2041 : #if SPEED_OPT
2042 : uint32_t buffer_index, i, j;
2043 3305680 : uint32_t k = 0;
2044 39270100 : for (buffer_index = input_buffer_start_idx; buffer_index <= input_buffer_end_idx; buffer_index++, k++) {
2045 35964400 : cand_buff_indices[k] = buffer_index;
2046 : }
2047 35940500 : for (i = 0; i < input_buffer_count - 1; ++i) {
2048 297162000 : for (j = i + 1; j < input_buffer_count; ++j) {
2049 264527000 : if (*(buffer_ptr_array[cand_buff_indices[j]]->fast_cost_ptr) < *(buffer_ptr_array[cand_buff_indices[i]]->fast_cost_ptr)) {
2050 136108000 : buffer_index = cand_buff_indices[i];
2051 136108000 : cand_buff_indices[i] = (uint32_t)cand_buff_indices[j];
2052 136108000 : cand_buff_indices[j] = (uint32_t)buffer_index;
2053 :
2054 : }
2055 : }
2056 : }
2057 : #else
2058 : for (uint32_t buffer_index = input_buffer_start_idx; buffer_index <= input_buffer_end_idx; buffer_index++) {
2059 : if (*(buffer_ptr_array[buffer_index]->fast_cost_ptr) == MAX_CU_COST)
2060 : cand_buff_indices[ordered_end_idx--] = buffer_index;
2061 : else
2062 : cand_buff_indices[ordered_start_idx++] = buffer_index;
2063 : }
2064 : #endif
2065 3305680 : }
2066 :
2067 130202000 : static INLINE void heap_sort_stage_max_node_fast_cost_ptr(
2068 : ModeDecisionCandidateBuffer **buffer_ptr,
2069 : uint32_t* sort_index, uint32_t i, uint32_t num)
2070 : {
2071 : uint32_t left, right, max;
2072 :
2073 : /* Loop for removing recursion. */
2074 102009000 : while (1) {
2075 130202000 : left = 2 * i;
2076 130202000 : right = 2 * i + 1;
2077 130202000 : max = i;
2078 :
2079 130202000 : if (left <= num && *(buffer_ptr[sort_index[left]]->fast_cost_ptr) >
2080 108461000 : *(buffer_ptr[sort_index[i]]->fast_cost_ptr)) {
2081 79996400 : max = left;
2082 : }
2083 :
2084 130202000 : if (right <= num && *(buffer_ptr[sort_index[right]]->fast_cost_ptr) >
2085 106788000 : *(buffer_ptr[sort_index[max]]->fast_cost_ptr)) {
2086 58705100 : max = right;
2087 : }
2088 :
2089 130202000 : if (max == i) {
2090 28192800 : break;
2091 : }
2092 :
2093 102009000 : uint32_t swap = sort_index[i];
2094 102009000 : sort_index[i] = sort_index[max];
2095 102009000 : sort_index[max] = swap;
2096 102009000 : i = max;
2097 : }
2098 28192800 : }
2099 :
2100 18736100 : static void qsort_stage_max_node_fast_cost_ptr(
2101 : ModeDecisionCandidateBuffer **buffer_ptr_array, uint32_t *dst,
2102 : uint32_t *a, uint32_t *b, int num)
2103 : {
2104 18736100 : if (num < 4) {
2105 10058500 : if (num < 2) {
2106 6666630 : if (num) {
2107 : //num = 1
2108 1643240 : dst[0] = a[0];
2109 : }
2110 6666630 : return;
2111 : }
2112 3391900 : if (num > 2) {
2113 : //num = 3
2114 2041850 : uint32_t tmp_a = a[0];
2115 2041850 : uint32_t tmp_b = a[1];
2116 2041850 : uint32_t tmp_c = a[2];
2117 2041850 : uint64_t val_a = *(buffer_ptr_array[tmp_a]->fast_cost_ptr);
2118 2041850 : uint64_t val_b = *(buffer_ptr_array[tmp_b]->fast_cost_ptr);
2119 2041850 : uint64_t val_c = *(buffer_ptr_array[tmp_c]->fast_cost_ptr);
2120 :
2121 2041850 : if (val_a < val_b) {
2122 629721 : if (val_b < val_c) {
2123 : //Sorted abc
2124 347518 : dst[0] = tmp_a;
2125 347518 : dst[1] = tmp_b;
2126 347518 : dst[2] = tmp_c;
2127 : }
2128 : else {
2129 : //xcx
2130 282203 : if (val_a < val_c) {
2131 : //Sorted 132
2132 128389 : dst[0] = tmp_a;
2133 128389 : dst[1] = tmp_c;
2134 128389 : dst[2] = tmp_b;
2135 : }
2136 : else {
2137 : //Sorted 231
2138 153814 : dst[0] = tmp_c;
2139 153814 : dst[1] = tmp_a;
2140 153814 : dst[2] = tmp_b;
2141 : }
2142 : }
2143 : }
2144 : else {
2145 : //a>b
2146 1412130 : if (val_b > val_c) {
2147 : //Sorted cba
2148 1041830 : dst[0] = tmp_c;
2149 1041830 : dst[1] = tmp_b;
2150 1041830 : dst[2] = tmp_a;
2151 : }
2152 : else {
2153 : //bxx
2154 370301 : if (val_a < val_c) {
2155 : //Sorted bac
2156 148765 : dst[0] = tmp_b;
2157 148765 : dst[1] = tmp_a;
2158 148765 : dst[2] = tmp_c;
2159 : }
2160 : else {
2161 : //Sorted bca
2162 221536 : dst[0] = tmp_b;
2163 221536 : dst[1] = tmp_c;
2164 221536 : dst[2] = tmp_a;
2165 : }
2166 : }
2167 : }
2168 2041850 : return;
2169 : }
2170 :
2171 : /* bacuse a and dst can point on this same array, copy temporary values*/
2172 1350050 : uint32_t tmp_a = a[0];
2173 1350050 : uint32_t tmp_b = a[1];
2174 1350050 : if (*(buffer_ptr_array[tmp_a]->fast_cost_ptr) < *(buffer_ptr_array[tmp_b]->fast_cost_ptr)) {
2175 822796 : dst[0] = tmp_a;
2176 822796 : dst[1] = tmp_b;
2177 : }
2178 : else {
2179 527256 : dst[0] = tmp_b;
2180 527256 : dst[1] = tmp_a;
2181 : }
2182 1350050 : return;
2183 : }
2184 :
2185 8677560 : int sorted_down = 0;
2186 8677560 : int sorted_up = num - 1;
2187 :
2188 8677560 : uint64_t pivot_val = *(buffer_ptr_array[a[0]]->fast_cost_ptr);
2189 100090000 : for (int i = 1; i < num; ++i) {
2190 91412700 : if (pivot_val < *(buffer_ptr_array[a[i]]->fast_cost_ptr)) {
2191 33337100 : b[sorted_up] = a[i];
2192 33337100 : sorted_up--;
2193 : }
2194 : else {
2195 58075700 : b[sorted_down] = a[i];
2196 58075700 : sorted_down++;
2197 : }
2198 : }
2199 :
2200 8677560 : dst[sorted_down] = a[0];
2201 :
2202 8677560 : qsort_stage_max_node_fast_cost_ptr(buffer_ptr_array, dst,
2203 : b, a, sorted_down);
2204 :
2205 8681790 : qsort_stage_max_node_fast_cost_ptr(buffer_ptr_array, dst + (sorted_down + 1),
2206 8681790 : b + (sorted_down + 1), a + (sorted_down + 1), num - (sorted_down)-1);
2207 : }
2208 :
2209 1622750 : static INLINE void sort_array_index_fast_cost_ptr(
2210 : ModeDecisionCandidateBuffer** buffer_ptr,
2211 : uint32_t* sort_index, uint32_t num)
2212 : {
2213 1622750 : if (num <= 60) {
2214 : //For small array uses 'quick sort', work much faster for small array,
2215 : //but required alloc temporary memory.
2216 : uint32_t sorted_tmp[60];
2217 1377300 : qsort_stage_max_node_fast_cost_ptr(buffer_ptr, sort_index, sort_index, sorted_tmp, num);
2218 1377220 : return;
2219 : }
2220 :
2221 : //For big arrays uses 'heap sort', not need allocate memory
2222 : //For small array less that 40 elements heap sort work slower than 'insertion sort'
2223 : uint32_t i;
2224 9522190 : for (i = (num - 1) / 2; i > 0; i--)
2225 : {
2226 9276800 : heap_sort_stage_max_node_fast_cost_ptr(
2227 : buffer_ptr, sort_index, i, num - 1);
2228 : }
2229 :
2230 245390 : heap_sort_stage_max_node_fast_cost_ptr(
2231 : buffer_ptr, sort_index, 0, num - 1);
2232 :
2233 18929500 : for (i = num - 1; i > 0; i--)
2234 : {
2235 18684300 : uint32_t swap = sort_index[i];
2236 18684300 : sort_index[i] = sort_index[0];
2237 18684300 : sort_index[0] = swap;
2238 18684300 : heap_sort_stage_max_node_fast_cost_ptr(
2239 : buffer_ptr, sort_index, 0, i - 1);
2240 : }
2241 : }
2242 :
2243 : #if FIX_SORTING_METHOD
2244 0 : void sort_stage1_fast_candidates(
2245 : struct ModeDecisionContext *context_ptr,
2246 : uint32_t num_of_cand_to_sort,
2247 : uint32_t *cand_buff_indices)
2248 : {
2249 : uint32_t i, j, index;
2250 0 : ModeDecisionCandidateBuffer **buffer_ptr_array = context_ptr->candidate_buffer_ptr_array;
2251 :
2252 0 : for (i = 0; i < num_of_cand_to_sort - 1; ++i) {
2253 0 : for (j = i + 1; j < num_of_cand_to_sort; ++j) {
2254 0 : if (*(buffer_ptr_array[cand_buff_indices[j]]->fast_cost_ptr) < *(buffer_ptr_array[cand_buff_indices[i]]->fast_cost_ptr)) {
2255 0 : index = cand_buff_indices[i];
2256 0 : cand_buff_indices[i] = (uint32_t)cand_buff_indices[j];
2257 0 : cand_buff_indices[j] = (uint32_t)index;
2258 :
2259 : }
2260 : }
2261 : }
2262 0 : }
2263 : #else
2264 : void sort_stage1_fast_candidates(
2265 : struct ModeDecisionContext *context_ptr,
2266 : uint32_t num_of_cand_to_sort,
2267 : uint32_t *cand_buff_indices)
2268 : {
2269 : ModeDecisionCandidateBuffer **buffer_ptr_array = context_ptr->candidate_buffer_ptr_array;
2270 :
2271 : //sorted best: *(buffer_ptr_array[sorted_candidate_index_array[?]]->fast_cost_ptr)
2272 : sort_array_index_fast_cost_ptr(buffer_ptr_array,
2273 : cand_buff_indices, num_of_cand_to_sort);
2274 : }
2275 : #endif
2276 : #if REMOVE_MD_STAGE_1
2277 3160710 : void sort_stage1_candidates(
2278 : #else
2279 : void sort_stage2_candidates(
2280 : #endif
2281 : struct ModeDecisionContext *context_ptr,
2282 : uint32_t num_of_cand_to_sort,
2283 : uint32_t *cand_buff_indices)
2284 : {
2285 : uint32_t i, j, index;
2286 3160710 : ModeDecisionCandidateBuffer **buffer_ptr_array = context_ptr->candidate_buffer_ptr_array;
2287 33624900 : for (i = 0; i < num_of_cand_to_sort - 1; ++i) {
2288 272014000 : for (j = i + 1; j < num_of_cand_to_sort; ++j) {
2289 241550000 : if (*(buffer_ptr_array[cand_buff_indices[j]]->full_cost_ptr) < *(buffer_ptr_array[cand_buff_indices[i]]->full_cost_ptr)) {
2290 48515900 : index = cand_buff_indices[i];
2291 48515900 : cand_buff_indices[i] = (uint32_t)cand_buff_indices[j];
2292 48515900 : cand_buff_indices[j] = (uint32_t)index;
2293 : }
2294 : }
2295 : }
2296 3160710 : }
2297 : #if REMOVE_MD_STAGE_1
2298 811398 : void construct_best_sorted_arrays_md_stage_1(
2299 : #else
2300 : void construct_best_sorted_arrays_md_stage_2(
2301 : #endif
2302 : struct ModeDecisionContext *context_ptr,
2303 : ModeDecisionCandidateBuffer **buffer_ptr_array,
2304 : uint32_t *best_candidate_index_array,
2305 : uint32_t *sorted_candidate_index_array,
2306 : uint64_t *ref_fast_cost
2307 : )
2308 : {
2309 : //best = union from all classes
2310 811398 : uint32_t best_candi = 0;
2311 8113370 : for (CAND_CLASS class_i = CAND_CLASS_0; class_i < CAND_CLASS_TOTAL; class_i++)
2312 : #if REMOVE_MD_STAGE_1
2313 41282500 : for (uint32_t candi = 0; candi < context_ptr->md_stage_1_count[class_i]; candi++)
2314 : #else
2315 : for (uint32_t candi = 0; candi < context_ptr->md_stage_2_count[class_i]; candi++)
2316 : #endif
2317 33980500 : sorted_candidate_index_array[best_candi++] = context_ptr->cand_buff_indices[class_i][candi];
2318 :
2319 : #if REMOVE_MD_STAGE_1
2320 811398 : assert(best_candi == context_ptr->md_stage_1_total_count);
2321 811398 : uint32_t fullReconCandidateCount = context_ptr->md_stage_1_total_count;
2322 : #else
2323 : assert(best_candi == context_ptr->md_stage_2_total_count);
2324 : uint32_t fullReconCandidateCount = context_ptr->md_stage_2_total_count;
2325 : #endif
2326 :
2327 : //sort best: inter, then intra
2328 : uint32_t i, id;
2329 811398 : uint32_t id_inter = 0;
2330 811398 : uint32_t id_intra = fullReconCandidateCount - 1;
2331 34790200 : for (i = 0; i < fullReconCandidateCount; ++i) {
2332 33978800 : id = sorted_candidate_index_array[i];
2333 33978800 : if (buffer_ptr_array[id]->candidate_ptr->type == INTER_MODE) {
2334 28832400 : best_candidate_index_array[id_inter++] = id;
2335 : }
2336 : else {
2337 5146400 : assert(buffer_ptr_array[id]->candidate_ptr->type == INTRA_MODE);
2338 5146400 : best_candidate_index_array[id_intra--] = id;
2339 : }
2340 : }
2341 :
2342 : //sorted best: *(buffer_ptr_array[sorted_candidate_index_array[?]]->fast_cost_ptr)
2343 811398 : sort_array_index_fast_cost_ptr(buffer_ptr_array,
2344 : sorted_candidate_index_array, fullReconCandidateCount);
2345 :
2346 : // tx search
2347 811395 : *ref_fast_cost = *(buffer_ptr_array[sorted_candidate_index_array[0]]->fast_cost_ptr);
2348 811395 : }
2349 :
2350 : #if REMOVE_MD_STAGE_1
2351 811359 : void construct_best_sorted_arrays_md_stage_2(
2352 : #else
2353 : void construct_best_sorted_arrays_md_stage_3(
2354 : #endif
2355 : struct ModeDecisionContext *context_ptr,
2356 : ModeDecisionCandidateBuffer **buffer_ptr_array,
2357 : uint32_t *best_candidate_index_array,
2358 : uint32_t *sorted_candidate_index_array)
2359 : {
2360 :
2361 : //best = union from all classes
2362 811359 : uint32_t best_candi = 0;
2363 8113510 : for (CAND_CLASS class_i = CAND_CLASS_0; class_i < CAND_CLASS_TOTAL; class_i++)
2364 : #if REMOVE_MD_STAGE_1
2365 11417000 : for (uint32_t candi = 0; candi < context_ptr->md_stage_2_count[class_i]; candi++)
2366 : #else
2367 : for (uint32_t candi = 0; candi < context_ptr->md_stage_3_count[class_i]; candi++)
2368 : #endif
2369 4114860 : sorted_candidate_index_array[best_candi++] = context_ptr->cand_buff_indices[class_i][candi];
2370 :
2371 : #if REMOVE_MD_STAGE_1
2372 811359 : assert(best_candi == context_ptr->md_stage_2_total_count);
2373 811359 : uint32_t fullReconCandidateCount = context_ptr->md_stage_2_total_count;
2374 : #else
2375 : assert(best_candi == context_ptr->md_stage_3_total_count);
2376 : uint32_t fullReconCandidateCount = context_ptr->md_stage_3_total_count;
2377 : #endif
2378 : //sort best: inter, then intra
2379 : uint32_t i, id;
2380 811359 : uint32_t id_inter = 0;
2381 811359 : uint32_t id_intra = fullReconCandidateCount - 1;
2382 4926180 : for (i = 0; i < fullReconCandidateCount; ++i) {
2383 4114820 : id = sorted_candidate_index_array[i];
2384 4114820 : if (buffer_ptr_array[id]->candidate_ptr->type == INTER_MODE) {
2385 3242480 : best_candidate_index_array[id_inter++] = id;
2386 : }
2387 : else {
2388 872341 : assert(buffer_ptr_array[id]->candidate_ptr->type == INTRA_MODE);
2389 872341 : best_candidate_index_array[id_intra--] = id;
2390 : }
2391 : }
2392 :
2393 : //sorted best: *(buffer_ptr_array[sorted_candidate_index_array[?]]->fast_cost_ptr)
2394 811359 : sort_array_index_fast_cost_ptr(buffer_ptr_array,
2395 : sorted_candidate_index_array, fullReconCandidateCount);
2396 811415 : }
2397 :
2398 3305790 : void md_stage_0(
2399 :
2400 : PictureControlSet *picture_control_set_ptr,
2401 : ModeDecisionContext *context_ptr,
2402 : ModeDecisionCandidateBuffer **candidate_buffer_ptr_array_base,
2403 : ModeDecisionCandidate *fast_candidate_array,
2404 : int32_t fast_candidate_start_index,
2405 : int32_t fast_candidate_end_index,
2406 : EbPictureBufferDesc *input_picture_ptr,
2407 : uint32_t inputOriginIndex,
2408 : uint32_t inputCbOriginIndex,
2409 : uint32_t inputCrOriginIndex,
2410 : CodingUnit *cu_ptr,
2411 : uint32_t cuOriginIndex,
2412 : uint32_t cuChromaOriginIndex,
2413 : uint32_t candidate_buffer_start_index,
2414 : uint32_t maxBuffers,
2415 : EbBool scratch_buffer_pesent_flag,
2416 : EbBool use_ssd)
2417 : {
2418 : int32_t fastLoopCandidateIndex;
2419 : uint64_t lumaFastDistortion;
2420 : uint32_t highestCostIndex;
2421 : uint64_t highestCost;
2422 3305790 : uint64_t bestFirstFastCostSearchCandidateCost = MAX_CU_COST;
2423 3305790 : int32_t bestFirstFastCostSearchCandidateIndex = INVALID_FAST_CANDIDATE_INDEX;
2424 :
2425 :
2426 : // Set MD Staging fast_loop_core settings
2427 : #if REMOVE_MD_STAGE_1
2428 3305790 : context_ptr->md_staging_skip_interpolation_search = (context_ptr->md_staging_mode == MD_STAGING_MODE_1) ? EB_TRUE : picture_control_set_ptr->parent_pcs_ptr->interpolation_search_level >= IT_SEARCH_FAST_LOOP_UV_BLIND ? EB_FALSE : EB_TRUE;
2429 : #else
2430 : context_ptr->md_staging_skip_interpolation_search = (context_ptr->md_staging_mode) ? EB_TRUE : picture_control_set_ptr->parent_pcs_ptr->interpolation_search_level >= IT_SEARCH_FAST_LOOP_UV_BLIND ? EB_FALSE : EB_TRUE;
2431 : #endif
2432 : #if FILTER_INTRA_FLAG
2433 : #if REMOVE_MD_STAGE_1
2434 : #if PAL_CLASS
2435 9772310 : context_ptr->md_staging_skip_inter_chroma_pred = (context_ptr->md_staging_mode == MD_STAGING_MODE_1 &&
2436 3305790 : context_ptr->target_class != CAND_CLASS_0 && context_ptr->target_class != CAND_CLASS_6 && context_ptr->target_class != CAND_CLASS_7) ? EB_TRUE : EB_FALSE;
2437 : #else
2438 : context_ptr->md_staging_skip_inter_chroma_pred = (context_ptr->md_staging_mode == MD_STAGING_MODE_1 && context_ptr->target_class != CAND_CLASS_0 && context_ptr->target_class != CAND_CLASS_6) ? EB_TRUE : EB_FALSE;
2439 : #endif
2440 : #else
2441 : context_ptr->md_staging_skip_inter_chroma_pred = (context_ptr->md_staging_mode && context_ptr->md_stage == MD_STAGE_0 && context_ptr->target_class != CAND_CLASS_0 && context_ptr->target_class != CAND_CLASS_6) ? EB_TRUE : EB_FALSE;
2442 : #endif
2443 : #else
2444 : context_ptr->md_staging_skip_inter_chroma_pred = (context_ptr->md_staging_mode && context_ptr->md_stage == MD_STAGE_0 && context_ptr->target_class != CAND_CLASS_0) ? EB_TRUE : EB_FALSE;
2445 : #endif
2446 : #if REMOVE_MD_STAGE_1
2447 3305790 : context_ptr->md_staging_use_bilinear = (context_ptr->md_staging_mode == MD_STAGING_MODE_1) ? EB_TRUE : EB_FALSE;
2448 : #else
2449 : context_ptr->md_staging_use_bilinear = (context_ptr->md_staging_mode) ? EB_TRUE : EB_FALSE;
2450 : #endif
2451 : // 1st fast loop: src-to-src
2452 3305790 : fastLoopCandidateIndex = fast_candidate_end_index;
2453 655418000 : while (fastLoopCandidateIndex >= fast_candidate_start_index)
2454 : {
2455 652112000 : if (fast_candidate_array[fastLoopCandidateIndex].cand_class == context_ptr->target_class) {
2456 : // Set the Candidate Buffer
2457 111535000 : ModeDecisionCandidateBuffer *candidate_buffer = candidate_buffer_ptr_array_base[candidate_buffer_start_index];
2458 111535000 : ModeDecisionCandidate *candidate_ptr = candidate_buffer->candidate_ptr = &fast_candidate_array[fastLoopCandidateIndex];
2459 : // Initialize tx_depth
2460 111535000 : candidate_buffer->candidate_ptr->tx_depth = 0;
2461 : // Only check (src - src) candidates (Tier0 candidates)
2462 111535000 : if (candidate_ptr->distortion_ready) {
2463 : // Distortion
2464 0 : lumaFastDistortion = candidate_ptr->me_distortion;
2465 :
2466 : // Fast Cost
2467 0 : *(candidate_buffer->fast_cost_ptr) = Av1ProductFastCostFuncTable[candidate_ptr->type](
2468 : cu_ptr,
2469 0 : candidate_buffer->candidate_ptr,
2470 0 : cu_ptr->qp,
2471 : lumaFastDistortion,
2472 : 0,
2473 0 : context_ptr->fast_lambda,
2474 : 0,
2475 : picture_control_set_ptr,
2476 0 : &(context_ptr->md_local_cu_unit[context_ptr->blk_geom->blkidx_mds].ed_ref_mv_stack[candidate_ptr->ref_frame_type][0]),
2477 : context_ptr->blk_geom,
2478 0 : context_ptr->cu_origin_y >> MI_SIZE_LOG2,
2479 0 : context_ptr->cu_origin_x >> MI_SIZE_LOG2,
2480 : 1,
2481 0 : context_ptr->intra_luma_left_mode,
2482 0 : context_ptr->intra_luma_top_mode);
2483 :
2484 : // Keep track of the candidate index of the best (src - src) candidate
2485 0 : if (*(candidate_buffer->fast_cost_ptr) <= bestFirstFastCostSearchCandidateCost) {
2486 0 : bestFirstFastCostSearchCandidateIndex = fastLoopCandidateIndex;
2487 0 : bestFirstFastCostSearchCandidateCost = *(candidate_buffer->fast_cost_ptr);
2488 : }
2489 :
2490 : // Initialize Fast Cost - to do not interact with the second Fast-Cost Search
2491 0 : *(candidate_buffer->fast_cost_ptr) = MAX_CU_COST;
2492 : }
2493 : }
2494 652112000 : --fastLoopCandidateIndex;
2495 : }
2496 :
2497 : // 2nd fast loop: src-to-recon
2498 3305790 : highestCostIndex = candidate_buffer_start_index;
2499 3305790 : fastLoopCandidateIndex = fast_candidate_end_index;
2500 655014000 : while (fastLoopCandidateIndex >= fast_candidate_start_index)
2501 : {
2502 651790000 : if (fast_candidate_array[fastLoopCandidateIndex].cand_class == context_ptr->target_class) {
2503 111222000 : ModeDecisionCandidateBuffer *candidate_buffer = candidate_buffer_ptr_array_base[highestCostIndex];
2504 111222000 : ModeDecisionCandidate *candidate_ptr = candidate_buffer->candidate_ptr = &fast_candidate_array[fastLoopCandidateIndex];
2505 : // Initialize tx_depth
2506 111222000 : candidate_buffer->candidate_ptr->tx_depth = 0;
2507 111222000 : if (!candidate_ptr->distortion_ready || fastLoopCandidateIndex == bestFirstFastCostSearchCandidateIndex) {
2508 :
2509 : // Prediction
2510 111222000 : fast_loop_core(
2511 : candidate_buffer,
2512 : picture_control_set_ptr,
2513 : context_ptr,
2514 : input_picture_ptr,
2515 : inputOriginIndex,
2516 : inputCbOriginIndex,
2517 : inputCrOriginIndex,
2518 : cu_ptr,
2519 : cuOriginIndex,
2520 : cuChromaOriginIndex,
2521 : use_ssd);
2522 :
2523 : }
2524 :
2525 : // Find the buffer with the highest cost
2526 111140000 : if (fastLoopCandidateIndex || scratch_buffer_pesent_flag)
2527 : {
2528 : // maxCost is volatile to prevent the compiler from loading 0xFFFFFFFFFFFFFF
2529 : // as a const at the early-out. Loading a large constant on intel x64 processors
2530 : // clogs the i-cache/intstruction decode. This still reloads the variable from
2531 : // the stack each pass, so a better solution would be to register the variable,
2532 : // but this might require asm.
2533 110676000 : volatile uint64_t maxCost = MAX_CU_COST;
2534 110676000 : const uint64_t *fast_cost_array = context_ptr->fast_cost_array;
2535 110676000 : const uint32_t bufferIndexStart = candidate_buffer_start_index;
2536 110676000 : const uint32_t bufferIndexEnd = bufferIndexStart + maxBuffers;
2537 : uint32_t bufferIndex;
2538 :
2539 110676000 : highestCostIndex = bufferIndexStart;
2540 110676000 : bufferIndex = bufferIndexStart + 1;
2541 :
2542 : do {
2543 1325930000 : highestCost = fast_cost_array[highestCostIndex];
2544 1325930000 : if (highestCost == maxCost)
2545 29419900 : break;
2546 :
2547 1296510000 : if (fast_cost_array[bufferIndex] > highestCost)
2548 247814000 : highestCostIndex = bufferIndex;
2549 1296510000 : } while (++bufferIndex < bufferIndexEnd);
2550 : }
2551 : }
2552 651708000 : --fastLoopCandidateIndex;
2553 : }
2554 :
2555 : // Set the cost of the scratch canidate to max to get discarded @ the sorting phase
2556 3223960 : *(candidate_buffer_ptr_array_base[highestCostIndex]->fast_cost_ptr) = (scratch_buffer_pesent_flag) ?
2557 3223960 : MAX_CU_COST :
2558 1320610 : *(candidate_buffer_ptr_array_base[highestCostIndex]->fast_cost_ptr);
2559 3223960 : }
2560 : #if !REMOVE_MD_STAGE_1
2561 : void md_stage_1(
2562 : PictureControlSet *picture_control_set_ptr,
2563 : ModeDecisionContext *context_ptr,
2564 : ModeDecisionCandidateBuffer **candidate_buffer_ptr_array_base,
2565 : uint32_t num_of_candidates,
2566 : EbPictureBufferDesc *input_picture_ptr,
2567 : uint32_t inputOriginIndex,
2568 : uint32_t inputCbOriginIndex,
2569 : uint32_t inputCrOriginIndex,
2570 : CodingUnit *cu_ptr,
2571 : uint32_t cuOriginIndex,
2572 : uint32_t cuChromaOriginIndex,
2573 : EbBool use_ssd)
2574 : {
2575 :
2576 : // Set MD Staging fast_loop_core settings
2577 : context_ptr->md_staging_skip_interpolation_search = (context_ptr->md_staging_mode == MD_STAGING_MODE_3) ? EB_TRUE : picture_control_set_ptr->parent_pcs_ptr->interpolation_search_level >= IT_SEARCH_FAST_LOOP_UV_BLIND ? EB_FALSE : EB_TRUE;
2578 : context_ptr->md_staging_skip_inter_chroma_pred = EB_FALSE;
2579 : context_ptr->md_staging_use_bilinear = EB_FALSE;
2580 :
2581 : for (uint32_t cand_idx = 0; cand_idx < num_of_candidates; ++cand_idx)
2582 : {
2583 :
2584 : uint32_t candidateIndex = context_ptr->cand_buff_indices[context_ptr->target_class][cand_idx];
2585 : ModeDecisionCandidateBuffer *candidate_buffer = candidate_buffer_ptr_array_base[candidateIndex];
2586 : ModeDecisionCandidate *candidate_ptr = candidate_buffer->candidate_ptr;
2587 :
2588 : // Initialize tx_depth
2589 : candidate_buffer->candidate_ptr->tx_depth = 0;
2590 :
2591 : if (!candidate_ptr->distortion_ready) {
2592 :
2593 : fast_loop_core(
2594 : candidate_buffer,
2595 : picture_control_set_ptr,
2596 : context_ptr,
2597 : input_picture_ptr,
2598 : inputOriginIndex,
2599 : inputCbOriginIndex,
2600 : inputCrOriginIndex,
2601 : cu_ptr,
2602 : cuOriginIndex,
2603 : cuChromaOriginIndex,
2604 : use_ssd);
2605 : }
2606 : }
2607 : }
2608 : #endif
2609 3078240 : void predictive_me_full_pel_search(
2610 : PictureControlSet *picture_control_set_ptr,
2611 : ModeDecisionContext *context_ptr,
2612 : EbPictureBufferDesc *input_picture_ptr,
2613 : uint32_t inputOriginIndex,
2614 : EbBool use_ssd,
2615 : uint8_t list_idx,
2616 : int8_t ref_idx,
2617 : int16_t mvx,
2618 : int16_t mvy,
2619 : int16_t search_position_start_x,
2620 : int16_t search_position_end_x,
2621 : int16_t search_position_start_y,
2622 : int16_t search_position_end_y,
2623 : int16_t search_step,
2624 : int16_t *best_mvx,
2625 : int16_t *best_mvy,
2626 : uint32_t *best_distortion)
2627 : {
2628 : uint32_t distortion;
2629 3078240 : ModeDecisionCandidateBuffer *candidate_buffer = &(context_ptr->candidate_buffer_ptr_array[0][0]);
2630 3078240 : candidate_buffer->candidate_ptr = &(context_ptr->fast_candidate_array[0]);
2631 :
2632 3078240 : EbReferenceObject *refObj = picture_control_set_ptr->ref_pic_ptr_array[list_idx][ref_idx]->object_ptr;
2633 6156480 : EbPictureBufferDesc *ref_pic = context_ptr->hbd_mode_decision ?
2634 3078240 : refObj->reference_picture16bit : refObj->reference_picture;
2635 24600300 : for (int32_t refinement_pos_x = search_position_start_x; refinement_pos_x <= search_position_end_x; ++refinement_pos_x) {
2636 128933000 : for (int32_t refinement_pos_y = search_position_start_y; refinement_pos_y <= search_position_end_y; ++refinement_pos_y) {
2637 :
2638 107411000 : uint32_t ref_origin_index = ref_pic->origin_x + (context_ptr->cu_origin_x + (mvx >> 3) + refinement_pos_x) + (context_ptr->cu_origin_y + (mvy >> 3) + ref_pic->origin_y + refinement_pos_y) * ref_pic->stride_y;
2639 107411000 : if (use_ssd) {
2640 214823000 : EbSpatialFullDistType spatial_full_dist_type_fun = context_ptr->hbd_mode_decision ?
2641 107411000 : full_distortion_kernel16_bits : spatial_full_distortion_kernel;
2642 :
2643 107394000 : distortion = (uint32_t) spatial_full_dist_type_fun(
2644 : input_picture_ptr->buffer_y,
2645 : inputOriginIndex,
2646 107411000 : input_picture_ptr->stride_y,
2647 : ref_pic->buffer_y,
2648 : ref_origin_index,
2649 107411000 : ref_pic->stride_y,
2650 107411000 : context_ptr->blk_geom->bwidth,
2651 107411000 : context_ptr->blk_geom->bheight);
2652 : }
2653 : else {
2654 0 : assert((context_ptr->blk_geom->bwidth >> 3) < 17);
2655 :
2656 0 : if (context_ptr->hbd_mode_decision) {
2657 0 : distortion = sad_16b_kernel(
2658 0 : ((uint16_t *)input_picture_ptr->buffer_y) + inputOriginIndex,
2659 0 : input_picture_ptr->stride_y,
2660 0 : ((uint16_t *)ref_pic->buffer_y) + ref_origin_index,
2661 0 : ref_pic->stride_y,
2662 0 : context_ptr->blk_geom->bheight,
2663 0 : context_ptr->blk_geom->bwidth);
2664 : } else {
2665 0 : distortion = nxm_sad_kernel_sub_sampled(
2666 0 : input_picture_ptr->buffer_y + inputOriginIndex,
2667 0 : input_picture_ptr->stride_y,
2668 0 : ref_pic->buffer_y + ref_origin_index,
2669 0 : ref_pic->stride_y,
2670 0 : context_ptr->blk_geom->bheight,
2671 0 : context_ptr->blk_geom->bwidth);
2672 : }
2673 : }
2674 :
2675 107398000 : if (distortion < *best_distortion) {
2676 19822500 : *best_mvx = mvx + (refinement_pos_x * search_step);
2677 19822500 : *best_mvy = mvy + (refinement_pos_y * search_step);
2678 19822500 : *best_distortion = distortion;
2679 : }
2680 : }
2681 : }
2682 3064960 : }
2683 :
2684 8170360 : void predictive_me_sub_pel_search(
2685 : PictureControlSet *picture_control_set_ptr,
2686 : ModeDecisionContext *context_ptr,
2687 : EbPictureBufferDesc *input_picture_ptr,
2688 : uint32_t inputOriginIndex,
2689 : uint32_t cuOriginIndex,
2690 : EbBool use_ssd,
2691 : uint8_t list_idx,
2692 : int8_t ref_idx,
2693 : int16_t mvx,
2694 : int16_t mvy,
2695 : int16_t search_position_start_x,
2696 : int16_t search_position_end_x,
2697 : int16_t search_position_start_y,
2698 : int16_t search_position_end_y,
2699 : int16_t search_step,
2700 : int16_t *best_mvx,
2701 : int16_t *best_mvy,
2702 : uint32_t *best_distortion,
2703 : uint8_t search_pattern)
2704 : {
2705 : uint32_t distortion;
2706 8170360 : ModeDecisionCandidateBuffer *candidate_buffer = &(context_ptr->candidate_buffer_ptr_array[0][0]);
2707 8170360 : candidate_buffer->candidate_ptr = &(context_ptr->fast_candidate_array[0]);
2708 :
2709 32680700 : for (int32_t refinement_pos_x = search_position_start_x; refinement_pos_x <= search_position_end_x; ++refinement_pos_x) {
2710 97895600 : for (int32_t refinement_pos_y = search_position_start_y; refinement_pos_y <= search_position_end_y; ++refinement_pos_y) {
2711 :
2712 73385300 : if (refinement_pos_x == 0 && refinement_pos_y == 0)
2713 8169810 : continue;
2714 :
2715 65215500 : if (search_pattern == 1 && refinement_pos_x != 0 && refinement_pos_y != 0)
2716 0 : continue;
2717 :
2718 65215500 : if (search_pattern == 2 && refinement_pos_y != 0)
2719 0 : continue;
2720 :
2721 65215500 : if (search_pattern == 3 && refinement_pos_x != 0)
2722 0 : continue;
2723 :
2724 65215500 : ModeDecisionCandidate *candidate_ptr = candidate_buffer->candidate_ptr;
2725 65215500 : EbPictureBufferDesc *prediction_ptr = candidate_buffer->prediction_ptr;
2726 :
2727 65215500 : candidate_ptr->type = INTER_MODE;
2728 65215500 : candidate_ptr->distortion_ready = 0;
2729 65215500 : candidate_ptr->use_intrabc = 0;
2730 65215500 : candidate_ptr->merge_flag = EB_FALSE;
2731 65215500 : candidate_ptr->prediction_direction[0] = (EbPredDirection)list_idx;
2732 65215500 : candidate_ptr->inter_mode = NEWMV;
2733 65215500 : candidate_ptr->pred_mode = NEWMV;
2734 65215500 : candidate_ptr->motion_mode = SIMPLE_TRANSLATION;
2735 : #if II_COMP_FLAG
2736 65215500 : candidate_ptr->is_interintra_used = 0;
2737 : #endif
2738 65215500 : candidate_ptr->is_compound = 0;
2739 65215500 : candidate_ptr->is_new_mv = 1;
2740 65215500 : candidate_ptr->is_zero_mv = 0;
2741 65215500 : candidate_ptr->drl_index = 0;
2742 65215500 : candidate_ptr->ref_mv_index = 0;
2743 65215500 : candidate_ptr->pred_mv_weight = 0;
2744 65215500 : candidate_ptr->ref_frame_type = svt_get_ref_frame_type(list_idx, ref_idx);
2745 65260600 : candidate_ptr->transform_type[PLANE_TYPE_Y] = DCT_DCT;
2746 65260600 : candidate_ptr->transform_type[PLANE_TYPE_UV] = DCT_DCT;
2747 65260600 : candidate_ptr->motion_vector_xl0 = list_idx == 0 ? mvx + (refinement_pos_x * search_step) : 0;
2748 65260600 : candidate_ptr->motion_vector_yl0 = list_idx == 0 ? mvy + (refinement_pos_y * search_step) : 0;
2749 65260600 : candidate_ptr->motion_vector_xl1 = list_idx == 1 ? mvx + (refinement_pos_x * search_step) : 0;
2750 65260600 : candidate_ptr->motion_vector_yl1 = list_idx == 1 ? mvy + (refinement_pos_y * search_step) : 0;
2751 65260600 : candidate_ptr->ref_frame_index_l0 = list_idx == 0 ? ref_idx : -1;
2752 65260600 : candidate_ptr->ref_frame_index_l1 = list_idx == 1 ? ref_idx : -1;
2753 65260600 : candidate_ptr->interp_filters = 0;
2754 :
2755 : // Prediction
2756 65260600 : context_ptr->md_staging_skip_interpolation_search = EB_TRUE;
2757 65260600 : context_ptr->md_staging_skip_inter_chroma_pred = EB_TRUE;
2758 65260600 : ProductPredictionFunTable[INTER_MODE](
2759 : context_ptr,
2760 : picture_control_set_ptr,
2761 : candidate_buffer);
2762 :
2763 : // Distortion
2764 65218400 : if (use_ssd) {
2765 130448000 : EbSpatialFullDistType spatial_full_dist_type_fun = context_ptr->hbd_mode_decision ?
2766 65224000 : full_distortion_kernel16_bits : spatial_full_distortion_kernel;
2767 :
2768 65248000 : distortion = (uint32_t) spatial_full_dist_type_fun(
2769 : input_picture_ptr->buffer_y,
2770 : inputOriginIndex,
2771 65224000 : input_picture_ptr->stride_y,
2772 : prediction_ptr->buffer_y,
2773 : cuOriginIndex,
2774 65224000 : prediction_ptr->stride_y,
2775 65224000 : context_ptr->blk_geom->bwidth,
2776 65224000 : context_ptr->blk_geom->bheight);
2777 : }
2778 : else {
2779 0 : assert((context_ptr->blk_geom->bwidth >> 3) < 17);
2780 :
2781 0 : if (context_ptr->hbd_mode_decision) {
2782 0 : distortion = sad_16b_kernel(
2783 0 : ((uint16_t *)input_picture_ptr->buffer_y) + inputOriginIndex,
2784 0 : input_picture_ptr->stride_y,
2785 0 : ((uint16_t *)prediction_ptr->buffer_y) + cuOriginIndex,
2786 0 : prediction_ptr->stride_y,
2787 0 : context_ptr->blk_geom->bheight,
2788 0 : context_ptr->blk_geom->bwidth);
2789 : } else {
2790 0 : distortion = nxm_sad_kernel_sub_sampled(
2791 0 : input_picture_ptr->buffer_y + inputOriginIndex,
2792 0 : input_picture_ptr->stride_y,
2793 0 : prediction_ptr->buffer_y + cuOriginIndex,
2794 0 : prediction_ptr->stride_y,
2795 0 : context_ptr->blk_geom->bheight,
2796 0 : context_ptr->blk_geom->bwidth);
2797 : }
2798 : }
2799 65228800 : if (distortion < *best_distortion) {
2800 8199010 : *best_mvx = mvx + (refinement_pos_x * search_step);
2801 8199010 : *best_mvy = mvy + (refinement_pos_y * search_step);
2802 8199010 : *best_distortion = distortion;
2803 : }
2804 : }
2805 : }
2806 8183660 : }
2807 :
2808 : void av1_set_ref_frame(MvReferenceFrame *rf, int8_t ref_frame_type);
2809 : uint8_t GetMaxDrlIndex(uint8_t refmvCnt, PredictionMode mode);
2810 :
2811 675242 : void predictive_me_search(
2812 : PictureControlSet *picture_control_set_ptr,
2813 : ModeDecisionContext *context_ptr,
2814 : EbPictureBufferDesc *input_picture_ptr,
2815 : uint32_t inputOriginIndex,
2816 : uint32_t cuOriginIndex) {
2817 :
2818 675242 : const SequenceControlSet *sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
2819 :
2820 675242 : EbBool use_ssd = EB_TRUE;
2821 :
2822 : // Reset valid_refined_mv
2823 675242 : memset(context_ptr->valid_refined_mv, 0, 8); // [2][4]
2824 :
2825 8316710 : for (uint32_t refIt = 0; refIt < picture_control_set_ptr->parent_pcs_ptr->tot_ref_frame_types; ++refIt) {
2826 7641380 : MvReferenceFrame ref_pair = picture_control_set_ptr->parent_pcs_ptr->ref_frame_type_arr[refIt];
2827 :
2828 7641380 : MacroBlockD *xd = context_ptr->cu_ptr->av1xd;
2829 : uint8_t drli, maxDrlIndex;
2830 : IntMv nearestmv[2], nearmv[2], ref_mv[2];
2831 :
2832 : MvReferenceFrame rf[2];
2833 7641380 : av1_set_ref_frame(rf, ref_pair);
2834 :
2835 : // Reset search variable(s)
2836 7641310 : uint32_t best_mvp_distortion = (int32_t)~0;
2837 : uint32_t mvp_distortion;
2838 :
2839 7641310 : int16_t best_search_mvx = (int16_t)~0;
2840 7641310 : int16_t best_search_mvy = (int16_t)~0;
2841 7641310 : uint32_t best_search_distortion = (int32_t)~0;
2842 :
2843 : // Step 0: derive the MVP list; 1 nearest and up to 3 near
2844 : int16_t mvp_x_array[PREDICTIVE_ME_MAX_MVP_CANIDATES];
2845 : int16_t mvp_y_array[PREDICTIVE_ME_MAX_MVP_CANIDATES];
2846 7641310 : int8_t mvp_count = 0;
2847 7641310 : if (rf[1] == NONE_FRAME)
2848 : {
2849 3078130 : MvReferenceFrame frame_type = rf[0];
2850 3078130 : uint8_t list_idx = get_list_idx(rf[0]);
2851 3078140 : uint8_t ref_idx = get_ref_frame_idx(rf[0]);
2852 : // Get the ME MV
2853 3078320 : const MeLcuResults *me_results = picture_control_set_ptr->parent_pcs_ptr->me_results[context_ptr->me_sb_addr];
2854 : int16_t me_mv_x;
2855 : int16_t me_mv_y;
2856 3078320 : if (list_idx == 0) {
2857 1884620 : me_mv_x = (me_results->me_mv_array[context_ptr->me_block_offset][ref_idx].x_mv) << 1;
2858 1884620 : me_mv_y = (me_results->me_mv_array[context_ptr->me_block_offset][ref_idx].y_mv) << 1;
2859 : }
2860 : else {
2861 1193700 : me_mv_x = (me_results->me_mv_array[context_ptr->me_block_offset][((sequence_control_set_ptr->mrp_mode == 0) ? 4 : 2) + ref_idx].x_mv) << 1;
2862 1193700 : me_mv_y = (me_results->me_mv_array[context_ptr->me_block_offset][((sequence_control_set_ptr->mrp_mode == 0) ? 4 : 2) + ref_idx].y_mv) << 1;
2863 : }
2864 : // Round-up to the closest integer the ME MV
2865 3078320 : me_mv_x = (me_mv_x + 4)&~0x07;
2866 3078320 : me_mv_y = (me_mv_y + 4)&~0x07;
2867 :
2868 : uint32_t pa_me_distortion;
2869 3078320 : EbReferenceObject *refObj = picture_control_set_ptr->ref_pic_ptr_array[list_idx][ref_idx]->object_ptr;
2870 6156640 : EbPictureBufferDesc *ref_pic = context_ptr->hbd_mode_decision ?
2871 3078320 : refObj->reference_picture16bit : refObj->reference_picture;
2872 :
2873 3078320 : uint32_t ref_origin_index = ref_pic->origin_x + (context_ptr->cu_origin_x + (me_mv_x >> 3)) + (context_ptr->cu_origin_y + (me_mv_y >> 3) + ref_pic->origin_y) * ref_pic->stride_y;
2874 3078320 : if (use_ssd) {
2875 6156470 : EbSpatialFullDistType spatial_full_dist_type_fun = context_ptr->hbd_mode_decision ?
2876 3078240 : full_distortion_kernel16_bits : spatial_full_distortion_kernel;
2877 :
2878 3078150 : pa_me_distortion = (uint32_t) spatial_full_dist_type_fun(
2879 : input_picture_ptr->buffer_y,
2880 : inputOriginIndex,
2881 3078240 : input_picture_ptr->stride_y,
2882 : ref_pic->buffer_y,
2883 : ref_origin_index,
2884 3078240 : ref_pic->stride_y,
2885 3078240 : context_ptr->blk_geom->bwidth,
2886 3078240 : context_ptr->blk_geom->bheight);
2887 : }
2888 : else {
2889 86 : assert((context_ptr->blk_geom->bwidth >> 3) < 17);
2890 :
2891 86 : if (context_ptr->hbd_mode_decision) {
2892 0 : pa_me_distortion = sad_16b_kernel(
2893 0 : ((uint16_t *)input_picture_ptr->buffer_y) + inputOriginIndex,
2894 0 : input_picture_ptr->stride_y,
2895 0 : ((uint16_t *)ref_pic->buffer_y) + ref_origin_index,
2896 0 : ref_pic->stride_y,
2897 0 : context_ptr->blk_geom->bheight,
2898 0 : context_ptr->blk_geom->bwidth);
2899 : } else {
2900 86 : pa_me_distortion = nxm_sad_kernel_sub_sampled(
2901 86 : input_picture_ptr->buffer_y + inputOriginIndex,
2902 86 : input_picture_ptr->stride_y,
2903 86 : ref_pic->buffer_y + ref_origin_index,
2904 86 : ref_pic->stride_y,
2905 86 : context_ptr->blk_geom->bheight,
2906 86 : context_ptr->blk_geom->bwidth);
2907 : }
2908 : }
2909 :
2910 3078150 : if (pa_me_distortion != 0 || context_ptr->predictive_me_level >= 5) {
2911 :
2912 : //NEAREST
2913 3078100 : mvp_x_array[mvp_count] = (context_ptr->cu_ptr->ref_mvs[frame_type][0].as_mv.col + 4)&~0x07;
2914 3078100 : mvp_y_array[mvp_count] = (context_ptr->cu_ptr->ref_mvs[frame_type][0].as_mv.row + 4)&~0x07;
2915 :
2916 3078100 : mvp_count++;
2917 :
2918 : //NEAR
2919 3078100 : maxDrlIndex = GetMaxDrlIndex(xd->ref_mv_count[frame_type], NEARMV);
2920 :
2921 8220700 : for (drli = 0; drli < maxDrlIndex; drli++) {
2922 5142520 : get_av1_mv_pred_drl(
2923 : context_ptr,
2924 : context_ptr->cu_ptr,
2925 : frame_type,
2926 : 0,
2927 : NEARMV,
2928 : drli,
2929 : nearestmv,
2930 : nearmv,
2931 : ref_mv);
2932 :
2933 5142600 : if (((nearmv[0].as_mv.col + 4)&~0x07) != mvp_x_array[0] && ((nearmv[0].as_mv.row + 4)&~0x07) != mvp_y_array[0]) {
2934 1942380 : mvp_x_array[mvp_count] = (nearmv[0].as_mv.col + 4)&~0x07;
2935 1942380 : mvp_y_array[mvp_count] = (nearmv[0].as_mv.row + 4)&~0x07;
2936 1942380 : mvp_count++;
2937 : }
2938 :
2939 : }
2940 : // Step 1: derive the best MVP in term of distortion
2941 3078180 : int16_t best_mvp_x = 0;
2942 3078180 : int16_t best_mvp_y = 0;
2943 :
2944 8098340 : for (int8_t mvp_index = 0; mvp_index < mvp_count; mvp_index++) {
2945 :
2946 : // MVP Distortion
2947 5020070 : EbReferenceObject *refObj = picture_control_set_ptr->ref_pic_ptr_array[list_idx][ref_idx]->object_ptr;
2948 10040100 : EbPictureBufferDesc *ref_pic = context_ptr->hbd_mode_decision ?
2949 5020070 : refObj->reference_picture16bit : refObj->reference_picture;
2950 :
2951 5020070 : uint32_t ref_origin_index = ref_pic->origin_x + (context_ptr->cu_origin_x + (mvp_x_array[mvp_index] >> 3)) + (context_ptr->cu_origin_y + (mvp_y_array[mvp_index] >> 3) + ref_pic->origin_y) * ref_pic->stride_y;
2952 5020070 : if (use_ssd) {
2953 10040300 : EbSpatialFullDistType spatial_full_dist_type_fun = context_ptr->hbd_mode_decision ?
2954 5020160 : full_distortion_kernel16_bits : spatial_full_distortion_kernel;
2955 :
2956 5020180 : mvp_distortion = (uint32_t) spatial_full_dist_type_fun(
2957 : input_picture_ptr->buffer_y,
2958 : inputOriginIndex,
2959 5020160 : input_picture_ptr->stride_y,
2960 : ref_pic->buffer_y,
2961 : ref_origin_index,
2962 5020160 : ref_pic->stride_y,
2963 5020160 : context_ptr->blk_geom->bwidth,
2964 5020160 : context_ptr->blk_geom->bheight);
2965 : }
2966 : else {
2967 0 : assert((context_ptr->blk_geom->bwidth >> 3) < 17);
2968 :
2969 0 : if (context_ptr->hbd_mode_decision) {
2970 0 : mvp_distortion = sad_16b_kernel(
2971 0 : ((uint16_t *)input_picture_ptr->buffer_y) + inputOriginIndex,
2972 0 : input_picture_ptr->stride_y,
2973 0 : ((uint16_t *)ref_pic->buffer_y) + ref_origin_index,
2974 0 : ref_pic->stride_y,
2975 0 : context_ptr->blk_geom->bheight,
2976 0 : context_ptr->blk_geom->bwidth);
2977 : } else {
2978 0 : mvp_distortion = nxm_sad_kernel_sub_sampled(
2979 0 : input_picture_ptr->buffer_y + inputOriginIndex,
2980 0 : input_picture_ptr->stride_y,
2981 0 : ref_pic->buffer_y + ref_origin_index,
2982 0 : ref_pic->stride_y,
2983 0 : context_ptr->blk_geom->bheight,
2984 0 : context_ptr->blk_geom->bwidth);
2985 : }
2986 : }
2987 :
2988 5020170 : if (mvp_distortion < best_mvp_distortion) {
2989 3498400 : best_mvp_distortion = mvp_distortion;
2990 3498400 : best_mvp_x = mvp_x_array[mvp_index];
2991 3498400 : best_mvp_y = mvp_y_array[mvp_index];
2992 : }
2993 : }
2994 :
2995 : // Step 2: perform full pel search around the best MVP
2996 3078270 : best_mvp_x = (best_mvp_x + 4)&~0x07;
2997 3078270 : best_mvp_y = (best_mvp_y + 4)&~0x07;
2998 :
2999 3078270 : predictive_me_full_pel_search(
3000 : picture_control_set_ptr,
3001 : context_ptr,
3002 : input_picture_ptr,
3003 : inputOriginIndex,
3004 : use_ssd,
3005 : list_idx,
3006 : ref_idx,
3007 : best_mvp_x,
3008 : best_mvp_y,
3009 : -(FULL_PEL_REF_WINDOW_WIDTH >> 1),
3010 : +(FULL_PEL_REF_WINDOW_WIDTH >> 1),
3011 : -(FULL_PEL_REF_WINDOW_HEIGHT >> 1),
3012 : +(FULL_PEL_REF_WINDOW_HEIGHT >> 1),
3013 : 8,
3014 : &best_search_mvx,
3015 : &best_search_mvy,
3016 : &best_search_distortion);
3017 :
3018 : EbBool exit_predictive_me_sub_pel;
3019 :
3020 3078180 : if (pa_me_distortion == 0)
3021 0 : exit_predictive_me_sub_pel = EB_TRUE;
3022 3078180 : else if (best_search_distortion <= pa_me_distortion)
3023 2608370 : exit_predictive_me_sub_pel = EB_FALSE;
3024 : else {
3025 469811 : exit_predictive_me_sub_pel = ((((best_search_distortion - pa_me_distortion) * 100) / pa_me_distortion) < PREDICTIVE_ME_DEVIATION_TH) ?
3026 469811 : EB_FALSE :
3027 : EB_TRUE;
3028 : }
3029 :
3030 3078180 : if (exit_predictive_me_sub_pel == EB_FALSE || context_ptr->predictive_me_level >= 5) {
3031 :
3032 2723640 : if (context_ptr->predictive_me_level >= 2) {
3033 :
3034 : uint8_t search_pattern;
3035 : // 0: all possible position(s): horizontal, vertical, diagonal
3036 : // 1: horizontal, vertical
3037 : // 2: horizontal only
3038 : // 3: vertical only
3039 :
3040 : // Step 3: perform half pel search around the best full pel position
3041 2723760 : search_pattern = (context_ptr->predictive_me_level >= 4) ? 0 : 1;
3042 :
3043 2723760 : predictive_me_sub_pel_search(
3044 : picture_control_set_ptr,
3045 : context_ptr,
3046 : input_picture_ptr,
3047 : inputOriginIndex,
3048 : cuOriginIndex,
3049 : use_ssd,
3050 : list_idx,
3051 : ref_idx,
3052 : best_search_mvx,
3053 : best_search_mvy,
3054 : -(HALF_PEL_REF_WINDOW >> 1),
3055 : +(HALF_PEL_REF_WINDOW >> 1),
3056 : -(HALF_PEL_REF_WINDOW >> 1),
3057 : +(HALF_PEL_REF_WINDOW >> 1),
3058 : 4,
3059 : &best_search_mvx,
3060 : &best_search_mvy,
3061 : &best_search_distortion,
3062 : search_pattern);
3063 :
3064 2723650 : if (context_ptr->predictive_me_level == 3) {
3065 0 : if ((best_search_mvx & 0x07) != 0 || (best_search_mvy & 0x07) != 0) {
3066 :
3067 0 : if ((best_search_mvx & 0x07) == 0)
3068 0 : search_pattern = 2;
3069 : else // if(best_search_mvy & 0x07 == 0)
3070 0 : search_pattern = 3;
3071 :
3072 0 : predictive_me_sub_pel_search(
3073 : picture_control_set_ptr,
3074 : context_ptr,
3075 : input_picture_ptr,
3076 : inputOriginIndex,
3077 : cuOriginIndex,
3078 : use_ssd,
3079 : list_idx,
3080 : ref_idx,
3081 : best_search_mvx,
3082 : best_search_mvy,
3083 : -(HALF_PEL_REF_WINDOW >> 1),
3084 : +(HALF_PEL_REF_WINDOW >> 1),
3085 : -(HALF_PEL_REF_WINDOW >> 1),
3086 : +(HALF_PEL_REF_WINDOW >> 1),
3087 : 4,
3088 : &best_search_mvx,
3089 : &best_search_mvy,
3090 : &best_search_distortion,
3091 : search_pattern);
3092 : }
3093 : }
3094 :
3095 : // Step 4: perform quarter pel search around the best half pel position
3096 2723650 : search_pattern = (context_ptr->predictive_me_level >= 4) ? 0 : 1;
3097 2723650 : predictive_me_sub_pel_search(
3098 : picture_control_set_ptr,
3099 : context_ptr,
3100 : input_picture_ptr,
3101 : inputOriginIndex,
3102 : cuOriginIndex,
3103 : use_ssd,
3104 : list_idx,
3105 : ref_idx,
3106 : best_search_mvx,
3107 : best_search_mvy,
3108 : -(QUARTER_PEL_REF_WINDOW >> 1),
3109 : +(QUARTER_PEL_REF_WINDOW >> 1),
3110 : -(QUARTER_PEL_REF_WINDOW >> 1),
3111 : +(QUARTER_PEL_REF_WINDOW >> 1),
3112 : 2,
3113 : &best_search_mvx,
3114 : &best_search_mvy,
3115 : &best_search_distortion,
3116 : search_pattern);
3117 :
3118 2723690 : if (context_ptr->predictive_me_level == 3) {
3119 0 : if ((best_search_mvx & 0x03) != 0 || (best_search_mvy & 0x03) != 0) {
3120 :
3121 0 : if ((best_search_mvx & 0x03) == 0)
3122 0 : search_pattern = 2;
3123 : else // if(best_search_mvy & 0x03 == 0)
3124 0 : search_pattern = 3;
3125 :
3126 0 : predictive_me_sub_pel_search(
3127 : picture_control_set_ptr,
3128 : context_ptr,
3129 : input_picture_ptr,
3130 : inputOriginIndex,
3131 : cuOriginIndex,
3132 : use_ssd,
3133 : list_idx,
3134 : ref_idx,
3135 : best_search_mvx,
3136 : best_search_mvy,
3137 : -(QUARTER_PEL_REF_WINDOW >> 1),
3138 : +(QUARTER_PEL_REF_WINDOW >> 1),
3139 : -(QUARTER_PEL_REF_WINDOW >> 1),
3140 : +(QUARTER_PEL_REF_WINDOW >> 1),
3141 : 2,
3142 : &best_search_mvx,
3143 : &best_search_mvy,
3144 : &best_search_distortion,
3145 : search_pattern);
3146 : }
3147 : }
3148 : }
3149 : #if EIGHT_PEL_PREDICTIVE_ME
3150 : // Step 5: perform eigh pel search around the best quarter pel position
3151 2723570 : if (picture_control_set_ptr->parent_pcs_ptr->frm_hdr.allow_high_precision_mv) {
3152 2723710 : uint8_t search_pattern = 0;
3153 2723710 : predictive_me_sub_pel_search(
3154 : picture_control_set_ptr,
3155 : context_ptr,
3156 : input_picture_ptr,
3157 : inputOriginIndex,
3158 : cuOriginIndex,
3159 : use_ssd,
3160 : list_idx,
3161 : ref_idx,
3162 : best_search_mvx,
3163 : best_search_mvy,
3164 : #if MDC_ADAPTIVE_LEVEL
3165 : -(EIGHT_PEL_REF_WINDOW >> 1),
3166 : +(EIGHT_PEL_REF_WINDOW >> 1),
3167 : -(EIGHT_PEL_REF_WINDOW >> 1),
3168 : +(EIGHT_PEL_REF_WINDOW >> 1),
3169 : #else
3170 : -(QUARTER_PEL_REF_WINDOW >> 1),
3171 : +(QUARTER_PEL_REF_WINDOW >> 1),
3172 : -(QUARTER_PEL_REF_WINDOW >> 1),
3173 : +(QUARTER_PEL_REF_WINDOW >> 1),
3174 : #endif
3175 : 1,
3176 : &best_search_mvx,
3177 : &best_search_mvy,
3178 : &best_search_distortion,
3179 : search_pattern);
3180 : }
3181 : #endif
3182 2723700 : context_ptr->best_spatial_pred_mv[list_idx][ref_idx][0] = best_search_mvx;
3183 2723700 : context_ptr->best_spatial_pred_mv[list_idx][ref_idx][1] = best_search_mvy;
3184 2723700 : context_ptr->valid_refined_mv[list_idx][ref_idx] = 1;
3185 : }
3186 : }
3187 : }
3188 : }
3189 675325 : }
3190 9266730 : void AV1CostCalcCfl(
3191 : PictureControlSet *picture_control_set_ptr,
3192 : ModeDecisionCandidateBuffer *candidate_buffer,
3193 : LargestCodingUnit *sb_ptr,
3194 : ModeDecisionContext *context_ptr,
3195 : uint32_t component_mask,
3196 : EbPictureBufferDesc *input_picture_ptr,
3197 : uint32_t inputCbOriginIndex,
3198 : uint32_t cuChromaOriginIndex,
3199 : uint64_t full_distortion[DIST_CALC_TOTAL],
3200 : uint64_t *coeffBits,
3201 : EbBool check_dc)
3202 : {
3203 9266730 : ModeDecisionCandidate *candidate_ptr = candidate_buffer->candidate_ptr;
3204 : uint32_t count_non_zero_coeffs[3][MAX_NUM_OF_TU_PER_CU];
3205 : uint64_t cbFullDistortion[DIST_CALC_TOTAL];
3206 : uint64_t crFullDistortion[DIST_CALC_TOTAL];
3207 9266730 : uint64_t cb_coeff_bits = 0;
3208 9266730 : uint64_t cr_coeff_bits = 0;
3209 9266730 : uint32_t chroma_width = context_ptr->blk_geom->bwidth_uv;
3210 9266730 : uint32_t chroma_height = context_ptr->blk_geom->bheight_uv;
3211 : // FullLoop and TU search
3212 : int32_t alpha_q3;
3213 9266730 : uint16_t cb_qp = context_ptr->qp;
3214 9266730 : uint16_t cr_qp = context_ptr->qp;
3215 :
3216 9266730 : full_distortion[DIST_CALC_RESIDUAL] = 0;
3217 9266730 : full_distortion[DIST_CALC_PREDICTION] = 0;
3218 9266730 : *coeffBits = 0;
3219 :
3220 : // Loop over alphas and find the best
3221 9266730 : if (component_mask == COMPONENT_CHROMA_CB || component_mask == COMPONENT_CHROMA || component_mask == COMPONENT_ALL) {
3222 5078560 : cbFullDistortion[DIST_CALC_RESIDUAL] = 0;
3223 5078560 : crFullDistortion[DIST_CALC_RESIDUAL] = 0;
3224 5078560 : cbFullDistortion[DIST_CALC_PREDICTION] = 0;
3225 5078560 : crFullDistortion[DIST_CALC_PREDICTION] = 0;
3226 5078560 : cb_coeff_bits = 0;
3227 5078560 : cr_coeff_bits = 0;
3228 5078560 : alpha_q3 = (check_dc) ? 0:
3229 4556090 : cfl_idx_to_alpha(candidate_ptr->cfl_alpha_idx, candidate_ptr->cfl_alpha_signs, CFL_PRED_U); // once for U, once for V
3230 5077980 : assert(chroma_width * CFL_BUF_LINE + chroma_height <=
3231 : CFL_BUF_SQUARE);
3232 :
3233 5077980 : if (!context_ptr->hbd_mode_decision) {
3234 5078030 : eb_cfl_predict_lbd(
3235 5078030 : context_ptr->pred_buf_q3,
3236 5078030 : &(candidate_buffer->prediction_ptr->buffer_cb[cuChromaOriginIndex]),
3237 5078030 : candidate_buffer->prediction_ptr->stride_cb,
3238 5078030 : &(candidate_buffer->cfl_temp_prediction_ptr->buffer_cb[cuChromaOriginIndex]),
3239 5078030 : candidate_buffer->cfl_temp_prediction_ptr->stride_cb,
3240 : alpha_q3,
3241 : 8,
3242 : chroma_width,
3243 : chroma_height);
3244 : } else {
3245 0 : eb_cfl_predict_hbd(
3246 0 : context_ptr->pred_buf_q3,
3247 0 : ((uint16_t*)candidate_buffer->prediction_ptr->buffer_cb) + cuChromaOriginIndex,
3248 0 : candidate_buffer->prediction_ptr->stride_cb,
3249 0 : ((uint16_t*)candidate_buffer->cfl_temp_prediction_ptr->buffer_cb) + cuChromaOriginIndex,
3250 0 : candidate_buffer->cfl_temp_prediction_ptr->stride_cb,
3251 : alpha_q3,
3252 : 10,
3253 : chroma_width,
3254 : chroma_height);
3255 : }
3256 :
3257 : // Cb Residual
3258 5077570 : residual_kernel(
3259 : input_picture_ptr->buffer_cb,
3260 : inputCbOriginIndex,
3261 5077570 : input_picture_ptr->stride_cb,
3262 5077570 : candidate_buffer->cfl_temp_prediction_ptr->buffer_cb,
3263 : cuChromaOriginIndex,
3264 5077570 : candidate_buffer->cfl_temp_prediction_ptr->stride_cb,
3265 5077570 : (int16_t*)candidate_buffer->residual_ptr->buffer_cb,
3266 : cuChromaOriginIndex,
3267 5077570 : candidate_buffer->residual_ptr->stride_cb,
3268 5077570 : context_ptr->hbd_mode_decision,
3269 : chroma_width,
3270 : chroma_height);
3271 :
3272 5077400 : full_loop_r(
3273 : sb_ptr,
3274 : candidate_buffer,
3275 : context_ptr,
3276 : input_picture_ptr,
3277 : picture_control_set_ptr,
3278 : PICTURE_BUFFER_DESC_Cb_FLAG,
3279 : cb_qp,
3280 : cr_qp,
3281 : &(*count_non_zero_coeffs[1]),
3282 : &(*count_non_zero_coeffs[2]));
3283 :
3284 : // Create new function
3285 5077880 : cu_full_distortion_fast_tu_mode_r(
3286 : sb_ptr,
3287 : candidate_buffer,
3288 : context_ptr,
3289 : candidate_ptr,
3290 : picture_control_set_ptr,
3291 : input_picture_ptr,
3292 : cbFullDistortion,
3293 : crFullDistortion,
3294 : count_non_zero_coeffs,
3295 : COMPONENT_CHROMA_CB,
3296 : &cb_coeff_bits,
3297 : &cr_coeff_bits,
3298 : 0);
3299 :
3300 5077660 : full_distortion[DIST_CALC_RESIDUAL] += cbFullDistortion[DIST_CALC_RESIDUAL];
3301 5077660 : full_distortion[DIST_CALC_PREDICTION] += cbFullDistortion[DIST_CALC_PREDICTION];
3302 5077660 : *coeffBits += cb_coeff_bits;
3303 : }
3304 9265820 : if (component_mask == COMPONENT_CHROMA_CR || component_mask == COMPONENT_CHROMA || component_mask == COMPONENT_ALL) {
3305 4714410 : cbFullDistortion[DIST_CALC_RESIDUAL] = 0;
3306 4714410 : crFullDistortion[DIST_CALC_RESIDUAL] = 0;
3307 4714410 : cbFullDistortion[DIST_CALC_PREDICTION] = 0;
3308 4714410 : crFullDistortion[DIST_CALC_PREDICTION] = 0;
3309 :
3310 4714410 : cb_coeff_bits = 0;
3311 4714410 : cr_coeff_bits = 0;
3312 4714410 : alpha_q3 = (check_dc) ? 0 :
3313 4191950 : cfl_idx_to_alpha(candidate_ptr->cfl_alpha_idx, candidate_ptr->cfl_alpha_signs, CFL_PRED_V); // once for U, once for V
3314 4713810 : assert(chroma_width * CFL_BUF_LINE + chroma_height <=
3315 : CFL_BUF_SQUARE);
3316 :
3317 4713810 : if (!context_ptr->hbd_mode_decision) {
3318 4713850 : eb_cfl_predict_lbd(
3319 4713850 : context_ptr->pred_buf_q3,
3320 4713850 : &(candidate_buffer->prediction_ptr->buffer_cr[cuChromaOriginIndex]),
3321 4713850 : candidate_buffer->prediction_ptr->stride_cr,
3322 4713850 : &(candidate_buffer->cfl_temp_prediction_ptr->buffer_cr[cuChromaOriginIndex]),
3323 4713850 : candidate_buffer->cfl_temp_prediction_ptr->stride_cr,
3324 : alpha_q3,
3325 : 8,
3326 : chroma_width,
3327 : chroma_height);
3328 : } else {
3329 0 : eb_cfl_predict_hbd(
3330 0 : context_ptr->pred_buf_q3,
3331 0 : ((uint16_t*)candidate_buffer->prediction_ptr->buffer_cr) + cuChromaOriginIndex,
3332 0 : candidate_buffer->prediction_ptr->stride_cr,
3333 0 : ((uint16_t*)candidate_buffer->cfl_temp_prediction_ptr->buffer_cr) + cuChromaOriginIndex,
3334 0 : candidate_buffer->cfl_temp_prediction_ptr->stride_cr,
3335 : alpha_q3,
3336 : 10,
3337 : chroma_width,
3338 : chroma_height);
3339 : }
3340 :
3341 : // Cr Residual
3342 4713500 : residual_kernel(
3343 : input_picture_ptr->buffer_cr,
3344 : inputCbOriginIndex,
3345 4713500 : input_picture_ptr->stride_cr,
3346 4713500 : candidate_buffer->cfl_temp_prediction_ptr->buffer_cr,
3347 : cuChromaOriginIndex,
3348 4713500 : candidate_buffer->cfl_temp_prediction_ptr->stride_cr,
3349 4713500 : (int16_t*)candidate_buffer->residual_ptr->buffer_cr,
3350 : cuChromaOriginIndex,
3351 4713500 : candidate_buffer->residual_ptr->stride_cr,
3352 4713500 : context_ptr->hbd_mode_decision,
3353 : chroma_width,
3354 : chroma_height);
3355 :
3356 4713320 : full_loop_r(
3357 : sb_ptr,
3358 : candidate_buffer,
3359 : context_ptr,
3360 : input_picture_ptr,
3361 : picture_control_set_ptr,
3362 : PICTURE_BUFFER_DESC_Cr_FLAG,
3363 : cb_qp,
3364 : cr_qp,
3365 : &(*count_non_zero_coeffs[1]),
3366 : &(*count_non_zero_coeffs[2]));
3367 4713300 : candidate_ptr->v_has_coeff = *count_non_zero_coeffs[2] ? EB_TRUE : EB_FALSE;
3368 :
3369 : // Create new function
3370 4713300 : cu_full_distortion_fast_tu_mode_r(
3371 : sb_ptr,
3372 : candidate_buffer,
3373 : context_ptr,
3374 : candidate_ptr,
3375 : picture_control_set_ptr,
3376 : input_picture_ptr,
3377 : cbFullDistortion,
3378 : crFullDistortion,
3379 : count_non_zero_coeffs,
3380 : COMPONENT_CHROMA_CR,
3381 : &cb_coeff_bits,
3382 : &cr_coeff_bits,
3383 : 0);
3384 :
3385 4713280 : full_distortion[DIST_CALC_RESIDUAL] += crFullDistortion[DIST_CALC_RESIDUAL];
3386 4713280 : full_distortion[DIST_CALC_PREDICTION] += crFullDistortion[DIST_CALC_PREDICTION];
3387 4713280 : *coeffBits += cr_coeff_bits;
3388 : }
3389 9264700 : }
3390 :
3391 : #define PLANE_SIGN_TO_JOINT_SIGN(plane, a, b) \
3392 : (plane == CFL_PRED_U ? a * CFL_SIGNS + b - 1 : b * CFL_SIGNS + a - 1)
3393 : /*************************Pick the best alpha for cfl mode or Choose DC******************************************************/
3394 522466 : void cfl_rd_pick_alpha(
3395 : PictureControlSet *picture_control_set_ptr,
3396 : ModeDecisionCandidateBuffer *candidate_buffer,
3397 : LargestCodingUnit *sb_ptr,
3398 : ModeDecisionContext *context_ptr,
3399 : EbPictureBufferDesc *input_picture_ptr,
3400 : uint32_t inputCbOriginIndex,
3401 : uint32_t cuChromaOriginIndex)
3402 : {
3403 522466 : int64_t best_rd = INT64_MAX;
3404 : uint64_t full_distortion[DIST_CALC_TOTAL];
3405 : uint64_t coeffBits;
3406 :
3407 522466 : const int64_t mode_rd =
3408 522466 : RDCOST(context_ptr->full_lambda,
3409 : (uint64_t)candidate_buffer->candidate_ptr->md_rate_estimation_ptr->intra_uv_mode_fac_bits[CFL_ALLOWED][candidate_buffer->candidate_ptr->intra_luma_mode][UV_CFL_PRED], 0);
3410 :
3411 : int64_t best_rd_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES];
3412 : int32_t best_c[CFL_JOINT_SIGNS][CFL_PRED_PLANES];
3413 :
3414 1567350 : for (int32_t plane = 0; plane < CFL_PRED_PLANES; plane++) {
3415 1044900 : coeffBits = 0;
3416 1044900 : full_distortion[DIST_CALC_RESIDUAL] = 0;
3417 9403550 : for (int32_t joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
3418 8358660 : best_rd_uv[joint_sign][plane] = INT64_MAX;
3419 8358660 : best_c[joint_sign][plane] = 0;
3420 : }
3421 : // Collect RD stats for an alpha value of zero in this plane.
3422 : // Skip i == CFL_SIGN_ZERO as (0, 0) is invalid.
3423 3134660 : for (int32_t i = CFL_SIGN_NEG; i < CFL_SIGNS; i++) {
3424 2089780 : const int32_t joint_sign = PLANE_SIGN_TO_JOINT_SIGN(plane, CFL_SIGN_ZERO, i);
3425 2089780 : if (i == CFL_SIGN_NEG) {
3426 1044870 : candidate_buffer->candidate_ptr->cfl_alpha_idx = 0;
3427 1044870 : candidate_buffer->candidate_ptr->cfl_alpha_signs = joint_sign;
3428 :
3429 1044870 : AV1CostCalcCfl(
3430 : picture_control_set_ptr,
3431 : candidate_buffer,
3432 : sb_ptr,
3433 : context_ptr,
3434 : (plane == 0) ? COMPONENT_CHROMA_CB : COMPONENT_CHROMA_CR,
3435 : input_picture_ptr,
3436 : inputCbOriginIndex,
3437 : cuChromaOriginIndex,
3438 : full_distortion,
3439 : &coeffBits,
3440 : 0);
3441 :
3442 1044860 : if (coeffBits == INT64_MAX) break;
3443 : }
3444 :
3445 2089770 : const int32_t alpha_rate = candidate_buffer->candidate_ptr->md_rate_estimation_ptr->cfl_alpha_fac_bits[joint_sign][plane][0];
3446 :
3447 2089770 : best_rd_uv[joint_sign][plane] =
3448 2089770 : RDCOST(context_ptr->full_lambda, coeffBits + alpha_rate, full_distortion[DIST_CALC_RESIDUAL]);
3449 : }
3450 : }
3451 :
3452 522454 : int32_t best_joint_sign = -1;
3453 :
3454 1567560 : for (int32_t plane = 0; plane < CFL_PRED_PLANES; plane++) {
3455 3134560 : for (int32_t pn_sign = CFL_SIGN_NEG; pn_sign < CFL_SIGNS; pn_sign++) {
3456 2089460 : int32_t progress = 0;
3457 9788830 : for (int32_t c = 0; c < CFL_ALPHABET_SIZE; c++) {
3458 9788440 : int32_t flag = 0;
3459 9788440 : if (c > 2 && progress < c) break;
3460 7699150 : coeffBits = 0;
3461 7699150 : full_distortion[DIST_CALC_RESIDUAL] = 0;
3462 30804000 : for (int32_t i = 0; i < CFL_SIGNS; i++) {
3463 23104600 : const int32_t joint_sign = PLANE_SIGN_TO_JOINT_SIGN(plane, pn_sign, i);
3464 23104600 : if (i == 0) {
3465 7699390 : candidate_buffer->candidate_ptr->cfl_alpha_idx = (c << CFL_ALPHABET_SIZE_LOG2) + c;
3466 7699390 : candidate_buffer->candidate_ptr->cfl_alpha_signs = joint_sign;
3467 :
3468 7699390 : AV1CostCalcCfl(
3469 : picture_control_set_ptr,
3470 : candidate_buffer,
3471 : sb_ptr,
3472 : context_ptr,
3473 : (plane == 0) ? COMPONENT_CHROMA_CB : COMPONENT_CHROMA_CR,
3474 : input_picture_ptr,
3475 : inputCbOriginIndex,
3476 : cuChromaOriginIndex,
3477 : full_distortion,
3478 : &coeffBits,
3479 : 0);
3480 :
3481 7699620 : if (coeffBits == INT64_MAX) break;
3482 : }
3483 :
3484 23104900 : const int32_t alpha_rate = candidate_buffer->candidate_ptr->md_rate_estimation_ptr->cfl_alpha_fac_bits[joint_sign][plane][c];
3485 :
3486 23104900 : int64_t this_rd =
3487 23104900 : RDCOST(context_ptr->full_lambda, coeffBits + alpha_rate, full_distortion[DIST_CALC_RESIDUAL]);
3488 23104900 : if (this_rd >= best_rd_uv[joint_sign][plane]) continue;
3489 7589330 : best_rd_uv[joint_sign][plane] = this_rd;
3490 7589330 : best_c[joint_sign][plane] = c;
3491 :
3492 7589330 : flag = 2;
3493 7589330 : if (best_rd_uv[joint_sign][!plane] == INT64_MAX) continue;
3494 4903790 : this_rd += mode_rd + best_rd_uv[joint_sign][!plane];
3495 4903790 : if (this_rd >= best_rd) continue;
3496 1637620 : best_rd = this_rd;
3497 1637620 : best_joint_sign = joint_sign;
3498 : }
3499 7699380 : progress += flag;
3500 : }
3501 : }
3502 : }
3503 :
3504 : // Compare with DC Chroma
3505 522681 : coeffBits = 0;
3506 522681 : full_distortion[DIST_CALC_RESIDUAL] = 0;
3507 :
3508 522681 : candidate_buffer->candidate_ptr->cfl_alpha_idx = 0;
3509 522681 : candidate_buffer->candidate_ptr->cfl_alpha_signs = 0;
3510 :
3511 522681 : const int64_t dc_mode_rd =
3512 522681 : RDCOST(context_ptr->full_lambda,
3513 : candidate_buffer->candidate_ptr->md_rate_estimation_ptr->intra_uv_mode_fac_bits[CFL_ALLOWED][candidate_buffer->candidate_ptr->intra_luma_mode][UV_DC_PRED], 0);
3514 :
3515 522681 : AV1CostCalcCfl(
3516 : picture_control_set_ptr,
3517 : candidate_buffer,
3518 : sb_ptr,
3519 : context_ptr,
3520 : COMPONENT_CHROMA,
3521 : input_picture_ptr,
3522 : inputCbOriginIndex,
3523 : cuChromaOriginIndex,
3524 : full_distortion,
3525 : &coeffBits,
3526 : 1);
3527 :
3528 522451 : int64_t dc_rd =
3529 522451 : RDCOST(context_ptr->full_lambda, coeffBits, full_distortion[DIST_CALC_RESIDUAL]);
3530 :
3531 522451 : dc_rd += dc_mode_rd;
3532 522451 : if (dc_rd <= best_rd) {
3533 233340 : candidate_buffer->candidate_ptr->intra_chroma_mode = UV_DC_PRED;
3534 233340 : candidate_buffer->candidate_ptr->cfl_alpha_idx = 0;
3535 233340 : candidate_buffer->candidate_ptr->cfl_alpha_signs = 0;
3536 : }
3537 : else {
3538 289111 : candidate_buffer->candidate_ptr->intra_chroma_mode = UV_CFL_PRED;
3539 289111 : int32_t ind = 0;
3540 289111 : if (best_joint_sign >= 0) {
3541 289126 : const int32_t u = best_c[best_joint_sign][CFL_PRED_U];
3542 289126 : const int32_t v = best_c[best_joint_sign][CFL_PRED_V];
3543 289126 : ind = (u << CFL_ALPHABET_SIZE_LOG2) + v;
3544 : }
3545 : else
3546 0 : best_joint_sign = 0;
3547 289111 : candidate_buffer->candidate_ptr->cfl_alpha_idx = ind;
3548 289111 : candidate_buffer->candidate_ptr->cfl_alpha_signs = best_joint_sign;
3549 : }
3550 522451 : }
3551 :
3552 : // If mode is CFL:
3553 : // 1: recon the Luma
3554 : // 2: Form the pred_buf_q3
3555 : // 3: Loop over alphas and find the best or choose DC
3556 : // 4: Recalculate the residual for chroma
3557 749219 : static void CflPrediction(
3558 : PictureControlSet *picture_control_set_ptr,
3559 : ModeDecisionCandidateBuffer *candidate_buffer,
3560 : LargestCodingUnit *sb_ptr,
3561 : ModeDecisionContext *context_ptr,
3562 : EbPictureBufferDesc *input_picture_ptr,
3563 : uint32_t inputCbOriginIndex,
3564 : uint32_t cuChromaOriginIndex)
3565 : {
3566 749219 : if (context_ptr->blk_geom->has_uv) {
3567 : // 1: recon the Luma
3568 519774 : AV1PerformInverseTransformReconLuma(
3569 : picture_control_set_ptr,
3570 : context_ptr,
3571 : candidate_buffer);
3572 :
3573 519774 : uint32_t recLumaOffset = ((context_ptr->blk_geom->origin_y >> 3) << 3) * candidate_buffer->recon_ptr->stride_y +
3574 519774 : ((context_ptr->blk_geom->origin_x >> 3) << 3);
3575 : // 2: Form the pred_buf_q3
3576 519774 : uint32_t chroma_width = context_ptr->blk_geom->bwidth_uv;
3577 519774 : uint32_t chroma_height = context_ptr->blk_geom->bheight_uv;
3578 :
3579 : // Down sample Luma
3580 519774 : if (!context_ptr->hbd_mode_decision) {
3581 2598920 : cfl_luma_subsampling_420_lbd_c(
3582 519783 : &(context_ptr->cfl_temp_luma_recon[recLumaOffset]),
3583 519783 : candidate_buffer->recon_ptr->stride_y,
3584 519783 : context_ptr->pred_buf_q3,
3585 519783 : context_ptr->blk_geom->bwidth_uv == context_ptr->blk_geom->bwidth ? (context_ptr->blk_geom->bwidth_uv << 1) : context_ptr->blk_geom->bwidth,
3586 519783 : context_ptr->blk_geom->bheight_uv == context_ptr->blk_geom->bheight ? (context_ptr->blk_geom->bheight_uv << 1) : context_ptr->blk_geom->bheight);
3587 : } else {
3588 0 : cfl_luma_subsampling_420_hbd_c(
3589 0 : context_ptr->cfl_temp_luma_recon16bit + recLumaOffset,
3590 0 : candidate_buffer->recon_ptr->stride_y,
3591 0 : context_ptr->pred_buf_q3,
3592 0 : context_ptr->blk_geom->bwidth_uv == context_ptr->blk_geom->bwidth ? (context_ptr->blk_geom->bwidth_uv << 1) : context_ptr->blk_geom->bwidth,
3593 0 : context_ptr->blk_geom->bheight_uv == context_ptr->blk_geom->bheight ? (context_ptr->blk_geom->bheight_uv << 1) : context_ptr->blk_geom->bheight);
3594 : }
3595 519759 : int32_t round_offset = chroma_width * chroma_height / 2;
3596 :
3597 1039520 : eb_subtract_average(
3598 519764 : context_ptr->pred_buf_q3,
3599 : chroma_width,
3600 : chroma_height,
3601 : round_offset,
3602 519759 : LOG2F(chroma_width) + LOG2F(chroma_height));
3603 :
3604 : // 3: Loop over alphas and find the best or choose DC
3605 519781 : cfl_rd_pick_alpha(
3606 : picture_control_set_ptr,
3607 : candidate_buffer,
3608 : sb_ptr,
3609 : context_ptr,
3610 : input_picture_ptr,
3611 : inputCbOriginIndex,
3612 : cuChromaOriginIndex);
3613 :
3614 519783 : if (candidate_buffer->candidate_ptr->intra_chroma_mode == UV_CFL_PRED) {
3615 : // 4: Recalculate the prediction and the residual
3616 : int32_t alpha_q3_cb =
3617 288122 : cfl_idx_to_alpha(candidate_buffer->candidate_ptr->cfl_alpha_idx, candidate_buffer->candidate_ptr->cfl_alpha_signs, CFL_PRED_U);
3618 : int32_t alpha_q3_cr =
3619 288121 : cfl_idx_to_alpha(candidate_buffer->candidate_ptr->cfl_alpha_idx, candidate_buffer->candidate_ptr->cfl_alpha_signs, CFL_PRED_V);
3620 :
3621 288119 : assert(chroma_height * CFL_BUF_LINE + chroma_width <=
3622 : CFL_BUF_SQUARE);
3623 :
3624 288119 : if (!context_ptr->hbd_mode_decision) {
3625 288119 : eb_cfl_predict_lbd(
3626 288119 : context_ptr->pred_buf_q3,
3627 288119 : &(candidate_buffer->prediction_ptr->buffer_cb[cuChromaOriginIndex]),
3628 288119 : candidate_buffer->prediction_ptr->stride_cb,
3629 288119 : &(candidate_buffer->prediction_ptr->buffer_cb[cuChromaOriginIndex]),
3630 288119 : candidate_buffer->prediction_ptr->stride_cb,
3631 : alpha_q3_cb,
3632 : 8,
3633 : chroma_width,
3634 : chroma_height);
3635 :
3636 288127 : eb_cfl_predict_lbd(
3637 288127 : context_ptr->pred_buf_q3,
3638 288127 : &(candidate_buffer->prediction_ptr->buffer_cr[cuChromaOriginIndex]),
3639 288127 : candidate_buffer->prediction_ptr->stride_cr,
3640 288127 : &(candidate_buffer->prediction_ptr->buffer_cr[cuChromaOriginIndex]),
3641 288127 : candidate_buffer->prediction_ptr->stride_cr,
3642 : alpha_q3_cr,
3643 : 8,
3644 : chroma_width,
3645 : chroma_height);
3646 : } else {
3647 0 : eb_cfl_predict_hbd(
3648 0 : context_ptr->pred_buf_q3,
3649 0 : ((uint16_t*)candidate_buffer->prediction_ptr->buffer_cb) + cuChromaOriginIndex,
3650 0 : candidate_buffer->prediction_ptr->stride_cb,
3651 0 : ((uint16_t*)candidate_buffer->prediction_ptr->buffer_cb) + cuChromaOriginIndex,
3652 0 : candidate_buffer->prediction_ptr->stride_cb,
3653 : alpha_q3_cb,
3654 : 10,
3655 : chroma_width,
3656 : chroma_height);
3657 :
3658 0 : eb_cfl_predict_hbd(
3659 0 : context_ptr->pred_buf_q3,
3660 0 : ((uint16_t*)candidate_buffer->prediction_ptr->buffer_cr) + cuChromaOriginIndex,
3661 0 : candidate_buffer->prediction_ptr->stride_cr,
3662 0 : ((uint16_t*)candidate_buffer->prediction_ptr->buffer_cr) + cuChromaOriginIndex,
3663 0 : candidate_buffer->prediction_ptr->stride_cr,
3664 : alpha_q3_cr,
3665 : 10,
3666 : chroma_width,
3667 : chroma_height);
3668 : }
3669 :
3670 : // Cb Residual
3671 288121 : residual_kernel(
3672 : input_picture_ptr->buffer_cb,
3673 : inputCbOriginIndex,
3674 288121 : input_picture_ptr->stride_cb,
3675 288121 : candidate_buffer->prediction_ptr->buffer_cb,
3676 : cuChromaOriginIndex,
3677 288121 : candidate_buffer->prediction_ptr->stride_cb,
3678 288121 : (int16_t*)candidate_buffer->residual_ptr->buffer_cb,
3679 : cuChromaOriginIndex,
3680 288121 : candidate_buffer->residual_ptr->stride_cb,
3681 288121 : context_ptr->hbd_mode_decision,
3682 288121 : context_ptr->blk_geom->bwidth_uv,
3683 288121 : context_ptr->blk_geom->bheight_uv);
3684 :
3685 : // Cr Residual
3686 288124 : residual_kernel(
3687 : input_picture_ptr->buffer_cr,
3688 : inputCbOriginIndex,
3689 288124 : input_picture_ptr->stride_cr,
3690 288124 : candidate_buffer->prediction_ptr->buffer_cr,
3691 : cuChromaOriginIndex,
3692 288124 : candidate_buffer->prediction_ptr->stride_cr,
3693 288124 : (int16_t*)candidate_buffer->residual_ptr->buffer_cr,
3694 : cuChromaOriginIndex,
3695 288124 : candidate_buffer->residual_ptr->stride_cr,
3696 288124 : context_ptr->hbd_mode_decision,
3697 288124 : context_ptr->blk_geom->bwidth_uv,
3698 288124 : context_ptr->blk_geom->bheight_uv);
3699 : }
3700 : else {
3701 : // Alphas = 0, Preds are the same as DC. Switch to DC mode
3702 231661 : candidate_buffer->candidate_ptr->intra_chroma_mode = UV_DC_PRED;
3703 : }
3704 : }
3705 749234 : }
3706 3024800 : uint8_t get_skip_tx_search_flag(
3707 : int32_t sq_size,
3708 : uint64_t ref_fast_cost,
3709 : uint64_t cu_cost,
3710 : uint64_t weight)
3711 : {
3712 : //NM: Skip tx search when the fast cost of the current mode candidate is substansially
3713 : // Larger than the best fast_cost (
3714 3024800 : uint8_t tx_search_skip_flag = cu_cost >= ((ref_fast_cost * weight) / 100) ? 1 : 0;
3715 3024800 : tx_search_skip_flag = sq_size >= 128 ? 1 : tx_search_skip_flag;
3716 3024800 : return tx_search_skip_flag;
3717 : }
3718 :
3719 236348000 : static INLINE TxType av1_get_tx_type(
3720 : BlockSize sb_type,
3721 : int32_t is_inter,
3722 : PredictionMode pred_mode,
3723 : UvPredictionMode pred_mode_uv,
3724 : PlaneType plane_type,
3725 : const MacroBlockD *xd, int32_t blk_row,
3726 : int32_t blk_col, TxSize tx_size,
3727 : int32_t reduced_tx_set)
3728 : {
3729 : UNUSED(sb_type);
3730 : UNUSED(*xd);
3731 : UNUSED(blk_row);
3732 : UNUSED(blk_col);
3733 :
3734 : // block_size sb_type = BLOCK_8X8;
3735 :
3736 : MbModeInfo mbmi;
3737 236348000 : mbmi.block_mi.mode = pred_mode;
3738 236348000 : mbmi.block_mi.uv_mode = pred_mode_uv;
3739 :
3740 : // const MbModeInfo *const mbmi = xd->mi[0];
3741 : // const struct MacroblockdPlane *const pd = &xd->plane[plane_type];
3742 : const TxSetType tx_set_type =
3743 236348000 : /*av1_*/get_ext_tx_set_type(tx_size, is_inter, reduced_tx_set);
3744 :
3745 236550000 : TxType tx_type = DCT_DCT;
3746 236550000 : if ( /*xd->lossless[mbmi->segment_id] ||*/ txsize_sqr_up_map[tx_size] > TX_32X32)
3747 0 : tx_type = DCT_DCT;
3748 : else {
3749 236550000 : if (plane_type == PLANE_TYPE_Y) {
3750 : //const int32_t txk_type_idx =
3751 : // av1_get_txk_type_index(/*mbmi->*/sb_type, blk_row, blk_col);
3752 : //tx_type = mbmi->txk_type[txk_type_idx];
3753 : }
3754 236584000 : else if (is_inter /*is_inter_block(mbmi)*/) {
3755 : // scale back to y plane's coordinate
3756 : //blk_row <<= pd->subsampling_y;
3757 : //blk_col <<= pd->subsampling_x;
3758 : //const int32_t txk_type_idx =
3759 : // av1_get_txk_type_index(mbmi->sb_type, blk_row, blk_col);
3760 : //tx_type = mbmi->txk_type[txk_type_idx];
3761 : }
3762 : else {
3763 : // In intra mode, uv planes don't share the same prediction mode as y
3764 : // plane, so the tx_type should not be shared
3765 236598000 : tx_type = intra_mode_to_tx_type(&mbmi.block_mi, PLANE_TYPE_UV);
3766 : }
3767 : }
3768 : ASSERT(tx_type < TX_TYPES);
3769 236779000 : if (!av1_ext_tx_used[tx_set_type][tx_type]) return DCT_DCT;
3770 224583000 : return tx_type;
3771 : }
3772 :
3773 522356 : void check_best_indepedant_cfl(
3774 : PictureControlSet *picture_control_set_ptr,
3775 : EbPictureBufferDesc *input_picture_ptr,
3776 : ModeDecisionContext *context_ptr,
3777 : uint32_t inputCbOriginIndex,
3778 : uint32_t cuChromaOriginIndex,
3779 : ModeDecisionCandidateBuffer *candidate_buffer,
3780 : uint8_t cb_qp,
3781 : uint8_t cr_qp,
3782 : uint64_t *cbFullDistortion,
3783 : uint64_t *crFullDistortion,
3784 : uint64_t *cb_coeff_bits,
3785 : uint64_t *cr_coeff_bits)
3786 : {
3787 :
3788 : #if FILTER_INTRA_FLAG
3789 522356 : if (candidate_buffer->candidate_ptr->filter_intra_mode != FILTER_INTRA_MODES)
3790 175796 : assert(candidate_buffer->candidate_ptr->intra_luma_mode == DC_PRED);
3791 : #endif
3792 522356 : FrameHeader *frm_hdr = &picture_control_set_ptr->parent_pcs_ptr->frm_hdr;
3793 : // cfl cost
3794 522356 : uint64_t chromaRate = 0;
3795 522356 : if (candidate_buffer->candidate_ptr->intra_chroma_mode == UV_CFL_PRED) {
3796 287185 : chromaRate += candidate_buffer->candidate_ptr->md_rate_estimation_ptr->cfl_alpha_fac_bits[candidate_buffer->candidate_ptr->cfl_alpha_signs][CFL_PRED_U][CFL_IDX_U(candidate_buffer->candidate_ptr->cfl_alpha_idx)] +
3797 287185 : candidate_buffer->candidate_ptr->md_rate_estimation_ptr->cfl_alpha_fac_bits[candidate_buffer->candidate_ptr->cfl_alpha_signs][CFL_PRED_V][CFL_IDX_V(candidate_buffer->candidate_ptr->cfl_alpha_idx)];
3798 :
3799 287185 : chromaRate += (uint64_t)candidate_buffer->candidate_ptr->md_rate_estimation_ptr->intra_uv_mode_fac_bits[CFL_ALLOWED][candidate_buffer->candidate_ptr->intra_luma_mode][UV_CFL_PRED];
3800 287185 : chromaRate -= (uint64_t)candidate_buffer->candidate_ptr->md_rate_estimation_ptr->intra_uv_mode_fac_bits[CFL_ALLOWED][candidate_buffer->candidate_ptr->intra_luma_mode][UV_DC_PRED];
3801 : }
3802 : else
3803 235171 : chromaRate = (uint64_t)candidate_buffer->candidate_ptr->md_rate_estimation_ptr->intra_uv_mode_fac_bits[CFL_ALLOWED][candidate_buffer->candidate_ptr->intra_luma_mode][UV_DC_PRED];
3804 522356 : int coeff_rate = (int)(*cb_coeff_bits + *cr_coeff_bits);
3805 522356 : int distortion = (int)(cbFullDistortion[DIST_CALC_RESIDUAL] + crFullDistortion[DIST_CALC_RESIDUAL]);
3806 522356 : int rate = (int)(coeff_rate + chromaRate + candidate_buffer->candidate_ptr->fast_luma_rate);
3807 522356 : uint64_t cfl_uv_cost = RDCOST(context_ptr->full_lambda, rate, distortion);
3808 :
3809 : // cfl vs. best independant
3810 522356 : if (context_ptr->best_uv_cost[candidate_buffer->candidate_ptr->intra_luma_mode][3 + candidate_buffer->candidate_ptr->angle_delta[PLANE_TYPE_Y]] < cfl_uv_cost) {
3811 : // Update the current candidate
3812 197813 : candidate_buffer->candidate_ptr->intra_chroma_mode = context_ptr->best_uv_mode[candidate_buffer->candidate_ptr->intra_luma_mode][MAX_ANGLE_DELTA + candidate_buffer->candidate_ptr->angle_delta[PLANE_TYPE_Y]];
3813 197813 : candidate_buffer->candidate_ptr->angle_delta[PLANE_TYPE_UV] = context_ptr->best_uv_angle[candidate_buffer->candidate_ptr->intra_luma_mode][MAX_ANGLE_DELTA + candidate_buffer->candidate_ptr->angle_delta[PLANE_TYPE_Y]];
3814 197813 : candidate_buffer->candidate_ptr->is_directional_chroma_mode_flag = (uint8_t)av1_is_directional_mode((PredictionMode)(context_ptr->best_uv_mode[candidate_buffer->candidate_ptr->intra_luma_mode][MAX_ANGLE_DELTA + candidate_buffer->candidate_ptr->angle_delta[PLANE_TYPE_Y]]));
3815 :
3816 : // check if candidate_buffer->candidate_ptr->fast_luma_rate = context_ptr->fast_luma_rate[candidate_buffer->candidate_ptr->intra_luma_mode];
3817 197813 : candidate_buffer->candidate_ptr->fast_chroma_rate = context_ptr->fast_chroma_rate[candidate_buffer->candidate_ptr->intra_luma_mode][MAX_ANGLE_DELTA + candidate_buffer->candidate_ptr->angle_delta[PLANE_TYPE_Y]];
3818 :
3819 395625 : candidate_buffer->candidate_ptr->transform_type_uv =
3820 197813 : av1_get_tx_type(
3821 197813 : context_ptr->blk_geom->bsize,
3822 : 0,
3823 : (PredictionMode)NULL,
3824 197813 : (UvPredictionMode)context_ptr->best_uv_mode[candidate_buffer->candidate_ptr->intra_luma_mode][3 + candidate_buffer->candidate_ptr->angle_delta[PLANE_TYPE_Y]],
3825 : PLANE_TYPE_UV,
3826 : 0,
3827 : 0,
3828 : 0,
3829 197813 : context_ptr->blk_geom->txsize_uv[0][0],
3830 197813 : frm_hdr->reduced_tx_set);
3831 :
3832 : // Start uv search path
3833 197812 : context_ptr->uv_search_path = EB_TRUE;
3834 :
3835 197812 : memset(candidate_buffer->candidate_ptr->eob[1], 0, sizeof(uint16_t));
3836 197812 : memset(candidate_buffer->candidate_ptr->eob[2], 0, sizeof(uint16_t));
3837 197812 : candidate_buffer->candidate_ptr->u_has_coeff = 0;
3838 197812 : candidate_buffer->candidate_ptr->v_has_coeff = 0;
3839 197812 : cbFullDistortion[DIST_CALC_RESIDUAL] = 0;
3840 197812 : crFullDistortion[DIST_CALC_RESIDUAL] = 0;
3841 197812 : cbFullDistortion[DIST_CALC_PREDICTION] = 0;
3842 197812 : crFullDistortion[DIST_CALC_PREDICTION] = 0;
3843 :
3844 197812 : *cb_coeff_bits = 0;
3845 197812 : *cr_coeff_bits = 0;
3846 :
3847 : uint32_t count_non_zero_coeffs[3][MAX_NUM_OF_TU_PER_CU];
3848 197812 : context_ptr->md_staging_skip_inter_chroma_pred = EB_FALSE;
3849 197812 : ProductPredictionFunTable[candidate_buffer->candidate_ptr->type](
3850 : context_ptr,
3851 : picture_control_set_ptr,
3852 : candidate_buffer);
3853 :
3854 : // Cb Residual
3855 197812 : residual_kernel(
3856 : input_picture_ptr->buffer_cb,
3857 : inputCbOriginIndex,
3858 197812 : input_picture_ptr->stride_cb,
3859 197812 : candidate_buffer->prediction_ptr->buffer_cb,
3860 : cuChromaOriginIndex,
3861 197812 : candidate_buffer->prediction_ptr->stride_cb,
3862 197812 : (int16_t*)candidate_buffer->residual_ptr->buffer_cb,
3863 : cuChromaOriginIndex,
3864 197812 : candidate_buffer->residual_ptr->stride_cb,
3865 197812 : context_ptr->hbd_mode_decision,
3866 197812 : context_ptr->blk_geom->bwidth_uv,
3867 197812 : context_ptr->blk_geom->bheight_uv);
3868 :
3869 : // Cr Residual
3870 197812 : residual_kernel(
3871 : input_picture_ptr->buffer_cr,
3872 : inputCbOriginIndex,
3873 197812 : input_picture_ptr->stride_cr,
3874 197812 : candidate_buffer->prediction_ptr->buffer_cr,
3875 : cuChromaOriginIndex,
3876 197812 : candidate_buffer->prediction_ptr->stride_cr,
3877 197812 : (int16_t*)candidate_buffer->residual_ptr->buffer_cr,
3878 : cuChromaOriginIndex,
3879 197812 : candidate_buffer->residual_ptr->stride_cr,
3880 197812 : context_ptr->hbd_mode_decision,
3881 197812 : context_ptr->blk_geom->bwidth_uv,
3882 197812 : context_ptr->blk_geom->bheight_uv);
3883 :
3884 197811 : full_loop_r(
3885 : context_ptr->sb_ptr,
3886 : candidate_buffer,
3887 : context_ptr,
3888 : input_picture_ptr,
3889 : picture_control_set_ptr,
3890 : PICTURE_BUFFER_DESC_CHROMA_MASK,
3891 : cb_qp,
3892 : cr_qp,
3893 : &(*count_non_zero_coeffs[1]),
3894 : &(*count_non_zero_coeffs[2]));
3895 :
3896 197811 : cu_full_distortion_fast_tu_mode_r(
3897 : context_ptr->sb_ptr,
3898 : candidate_buffer,
3899 : context_ptr,
3900 : candidate_buffer->candidate_ptr,
3901 : picture_control_set_ptr,
3902 : input_picture_ptr,
3903 : cbFullDistortion,
3904 : crFullDistortion,
3905 : count_non_zero_coeffs,
3906 : COMPONENT_CHROMA,
3907 : cb_coeff_bits,
3908 : cr_coeff_bits,
3909 : 1);
3910 :
3911 : // End uv search path
3912 197813 : context_ptr->uv_search_path = EB_FALSE;
3913 : }
3914 522356 : }
3915 :
3916 : // double check the usage of tx_search_luma_recon_neighbor_array16bit
3917 1899240 : EbErrorType av1_intra_luma_prediction(
3918 : ModeDecisionContext *md_context_ptr,
3919 : PictureControlSet *picture_control_set_ptr,
3920 : ModeDecisionCandidateBuffer *candidate_buffer_ptr)
3921 : {
3922 1899240 : EbErrorType return_error = EB_ErrorNone;
3923 :
3924 1899240 : uint16_t txb_origin_x = md_context_ptr->cu_origin_x + md_context_ptr->blk_geom->tx_boff_x[md_context_ptr->tx_depth][md_context_ptr->txb_itr];
3925 1899240 : uint16_t txb_origin_y = md_context_ptr->cu_origin_y + md_context_ptr->blk_geom->tx_boff_y[md_context_ptr->tx_depth][md_context_ptr->txb_itr];
3926 :
3927 1899240 : uint8_t tx_width = md_context_ptr->blk_geom->tx_width[md_context_ptr->tx_depth][md_context_ptr->txb_itr];
3928 1899240 : uint8_t tx_height = md_context_ptr->blk_geom->tx_height[md_context_ptr->tx_depth][md_context_ptr->txb_itr];
3929 :
3930 1899240 : uint32_t modeTypeLeftNeighborIndex = get_neighbor_array_unit_left_index(
3931 : md_context_ptr->mode_type_neighbor_array,
3932 : txb_origin_y);
3933 1899250 : uint32_t modeTypeTopNeighborIndex = get_neighbor_array_unit_top_index(
3934 : md_context_ptr->mode_type_neighbor_array,
3935 : txb_origin_x);
3936 1899260 : uint32_t intraLumaModeLeftNeighborIndex = get_neighbor_array_unit_left_index(
3937 : md_context_ptr->intra_luma_mode_neighbor_array,
3938 : txb_origin_y);
3939 1899240 : uint32_t intraLumaModeTopNeighborIndex = get_neighbor_array_unit_top_index(
3940 : md_context_ptr->intra_luma_mode_neighbor_array,
3941 : txb_origin_x);
3942 :
3943 326767 : md_context_ptr->intra_luma_left_mode = (uint32_t)(
3944 1899260 : (md_context_ptr->mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] != INTRA_MODE) ? DC_PRED/*EB_INTRA_DC*/ :
3945 1572490 : (uint32_t)md_context_ptr->intra_luma_mode_neighbor_array->left_array[intraLumaModeLeftNeighborIndex]);
3946 :
3947 344277 : md_context_ptr->intra_luma_top_mode = (uint32_t)(
3948 1899260 : (md_context_ptr->mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] != INTRA_MODE) ? DC_PRED/*EB_INTRA_DC*/ :
3949 1554980 : (uint32_t)md_context_ptr->intra_luma_mode_neighbor_array->top_array[intraLumaModeTopNeighborIndex]); // use DC. This seems like we could use a LCU-width
3950 :
3951 1899260 : TxSize tx_size = md_context_ptr->blk_geom->txsize[md_context_ptr->tx_depth][md_context_ptr->txb_itr];
3952 :
3953 : PredictionMode mode;
3954 1899260 : if (!md_context_ptr->hbd_mode_decision) {
3955 : uint8_t topNeighArray[64 * 2 + 1];
3956 : uint8_t leftNeighArray[64 * 2 + 1];
3957 :
3958 1899240 : if (txb_origin_y != 0)
3959 1826950 : memcpy(topNeighArray + 1, md_context_ptr->tx_search_luma_recon_neighbor_array->top_array + txb_origin_x, tx_width * 2);
3960 1899240 : if (txb_origin_x != 0)
3961 1828220 : memcpy(leftNeighArray + 1, md_context_ptr->tx_search_luma_recon_neighbor_array->left_array + txb_origin_y, tx_height * 2);
3962 1899240 : if (txb_origin_y != 0 && txb_origin_x != 0)
3963 1757970 : topNeighArray[0] = leftNeighArray[0] = md_context_ptr->tx_search_luma_recon_neighbor_array->top_left_array[MAX_PICTURE_HEIGHT_SIZE + txb_origin_x - txb_origin_y];
3964 :
3965 1899240 : mode = candidate_buffer_ptr->candidate_ptr->pred_mode;
3966 1899240 : eb_av1_predict_intra_block(
3967 1899240 : &md_context_ptr->sb_ptr->tile_info,
3968 : !ED_STAGE,
3969 : md_context_ptr->blk_geom,
3970 1899240 : picture_control_set_ptr->parent_pcs_ptr->av1_cm, //const Av1Common *cm,
3971 1899240 : md_context_ptr->blk_geom->bwidth,
3972 1899240 : md_context_ptr->blk_geom->bheight,
3973 : tx_size,
3974 : mode, //PredictionMode mode,
3975 1899240 : candidate_buffer_ptr->candidate_ptr->angle_delta[PLANE_TYPE_Y],
3976 : #if PAL_SUP
3977 1899240 : candidate_buffer_ptr->candidate_ptr->palette_info.pmi.palette_size[0]>0,
3978 1899240 : &candidate_buffer_ptr->candidate_ptr->palette_info , //ATB MD
3979 : #else
3980 : 0, //int32_t use_palette,
3981 : #endif
3982 : #if FILTER_INTRA_FLAG
3983 1899240 : candidate_buffer_ptr->candidate_ptr->filter_intra_mode,
3984 : #else
3985 : FILTER_INTRA_MODES, //CHKN FilterIntraMode filter_intra_mode,
3986 : #endif
3987 : topNeighArray + 1,
3988 : leftNeighArray + 1,
3989 : candidate_buffer_ptr->prediction_ptr, //uint8_t *dst,
3990 1899240 : md_context_ptr->blk_geom->tx_boff_x[md_context_ptr->tx_depth][md_context_ptr->txb_itr] >> 2, //int32_t col_off,
3991 1899240 : md_context_ptr->blk_geom->tx_boff_y[md_context_ptr->tx_depth][md_context_ptr->txb_itr] >> 2, //int32_t row_off,
3992 : PLANE_TYPE_Y, //int32_t plane,
3993 1899240 : md_context_ptr->blk_geom->bsize,
3994 1899240 : md_context_ptr->cu_origin_x,
3995 1899240 : md_context_ptr->cu_origin_y,
3996 1899240 : md_context_ptr->cu_origin_x,
3997 1899240 : md_context_ptr->cu_origin_y,
3998 1899240 : md_context_ptr->blk_geom->tx_org_x[md_context_ptr->tx_depth][md_context_ptr->txb_itr], //uint32_t cuOrgX used only for prediction Ptr
3999 1899240 : md_context_ptr->blk_geom->tx_org_y[md_context_ptr->tx_depth][md_context_ptr->txb_itr] //uint32_t cuOrgY used only for prediction Ptr
4000 : );
4001 : } else {
4002 : uint16_t topNeighArray[64 * 2 + 1];
4003 : uint16_t leftNeighArray[64 * 2 + 1];
4004 :
4005 12 : if (txb_origin_y != 0)
4006 0 : memcpy(topNeighArray + 1, (uint16_t*)(md_context_ptr->tx_search_luma_recon_neighbor_array16bit->top_array) + txb_origin_x, sizeof(uint16_t) * tx_width * 2);
4007 12 : if (txb_origin_x != 0)
4008 0 : memcpy(leftNeighArray + 1, (uint16_t*)(md_context_ptr->tx_search_luma_recon_neighbor_array16bit->left_array) + txb_origin_y, sizeof(uint16_t) * tx_height * 2);
4009 12 : if (txb_origin_y != 0 && txb_origin_x != 0)
4010 0 : topNeighArray[0] = leftNeighArray[0] = ((uint16_t*)(md_context_ptr->tx_search_luma_recon_neighbor_array16bit->top_left_array) + MAX_PICTURE_HEIGHT_SIZE + txb_origin_x - txb_origin_y)[0];
4011 :
4012 12 : mode = candidate_buffer_ptr->candidate_ptr->pred_mode;
4013 12 : eb_av1_predict_intra_block_16bit(
4014 12 : &md_context_ptr->sb_ptr->tile_info,
4015 : !ED_STAGE,
4016 : md_context_ptr->blk_geom,
4017 12 : picture_control_set_ptr->parent_pcs_ptr->av1_cm,
4018 12 : md_context_ptr->blk_geom->bwidth,
4019 12 : md_context_ptr->blk_geom->bheight,
4020 : tx_size,
4021 : mode,
4022 12 : candidate_buffer_ptr->candidate_ptr->angle_delta[PLANE_TYPE_Y],
4023 : #if PAL_SUP
4024 12 : candidate_buffer_ptr->candidate_ptr->palette_info.pmi.palette_size[0] > 0,
4025 12 : &candidate_buffer_ptr->candidate_ptr->palette_info, //ATB MD
4026 : #else
4027 : 0,
4028 : #endif
4029 : FILTER_INTRA_MODES,
4030 : topNeighArray + 1,
4031 : leftNeighArray + 1,
4032 : candidate_buffer_ptr->prediction_ptr,
4033 12 : md_context_ptr->blk_geom->tx_boff_x[md_context_ptr->tx_depth][md_context_ptr->txb_itr] >> 2,
4034 12 : md_context_ptr->blk_geom->tx_boff_y[md_context_ptr->tx_depth][md_context_ptr->txb_itr] >> 2, //int32_t row_off,
4035 : PLANE_TYPE_Y,
4036 12 : md_context_ptr->blk_geom->bsize,
4037 12 : md_context_ptr->cu_origin_x,
4038 12 : md_context_ptr->cu_origin_y,
4039 12 : md_context_ptr->cu_origin_x,
4040 12 : md_context_ptr->cu_origin_y,
4041 12 : md_context_ptr->blk_geom->tx_org_x[md_context_ptr->tx_depth][md_context_ptr->txb_itr], //uint32_t cuOrgX used only for prediction Ptr
4042 12 : md_context_ptr->blk_geom->tx_org_y[md_context_ptr->tx_depth][md_context_ptr->txb_itr] //uint32_t cuOrgY used only for prediction Ptr
4043 : );
4044 : }
4045 :
4046 1899440 : return return_error;
4047 : }
4048 :
4049 1471100 : static void tx_search_update_recon_sample_neighbor_array(
4050 : NeighborArrayUnit *lumaReconSampleNeighborArray,
4051 : EbPictureBufferDesc *recon_buffer,
4052 : uint32_t tu_origin_x,
4053 : uint32_t tu_origin_y,
4054 : uint32_t input_origin_x,
4055 : uint32_t input_origin_y,
4056 : uint32_t width,
4057 : uint32_t height,
4058 : EbBool hbd)
4059 : {
4060 1471100 : if (hbd) {
4061 0 : neighbor_array_unit16bit_sample_write(
4062 : lumaReconSampleNeighborArray,
4063 0 : (uint16_t*)recon_buffer->buffer_y,
4064 0 : recon_buffer->stride_y,
4065 0 : recon_buffer->origin_x + tu_origin_x,
4066 0 : recon_buffer->origin_y + tu_origin_y,
4067 : input_origin_x,
4068 : input_origin_y,
4069 : width,
4070 : height,
4071 : NEIGHBOR_ARRAY_UNIT_FULL_MASK);
4072 : } else {
4073 1471100 : neighbor_array_unit_sample_write(
4074 : lumaReconSampleNeighborArray,
4075 : recon_buffer->buffer_y,
4076 1471100 : recon_buffer->stride_y,
4077 1471100 : recon_buffer->origin_x + tu_origin_x,
4078 1471100 : recon_buffer->origin_y + tu_origin_y,
4079 : input_origin_x,
4080 : input_origin_y,
4081 : width,
4082 : height,
4083 : NEIGHBOR_ARRAY_UNIT_FULL_MASK);
4084 : }
4085 :
4086 1471070 : return;
4087 : }
4088 :
4089 35501400 : uint8_t get_end_tx_depth(BlockSize bsize, uint8_t btype) {
4090 35501400 : uint8_t tx_depth = 0;
4091 35501400 : if (bsize == BLOCK_64X64 ||
4092 34084600 : bsize == BLOCK_32X32 ||
4093 30808900 : bsize == BLOCK_16X16 ||
4094 30401400 : bsize == BLOCK_64X32 ||
4095 30021700 : bsize == BLOCK_32X64 ||
4096 28515900 : bsize == BLOCK_16X32 ||
4097 27030300 : bsize == BLOCK_32X16 ||
4098 23413500 : bsize == BLOCK_16X8 ||
4099 : bsize == BLOCK_8X16)
4100 16976900 : tx_depth = (btype == INTRA_MODE) ? 1 : 1;
4101 18524600 : else if (bsize == BLOCK_8X8 ||
4102 11314500 : bsize == BLOCK_64X16 ||
4103 10704700 : bsize == BLOCK_16X64 ||
4104 8800200 : bsize == BLOCK_32X8 ||
4105 6847650 : bsize == BLOCK_8X32 ||
4106 5089960 : bsize == BLOCK_16X4 ||
4107 : bsize == BLOCK_4X16)
4108 15157900 : tx_depth = (btype == INTRA_MODE) ? 1 : 1;
4109 :
4110 35501400 : return tx_depth;
4111 : }
4112 :
4113 : #if ENHANCE_ATB
4114 : uint8_t allowed_tx_set_a[TX_SIZES_ALL][TX_TYPES];
4115 :
4116 1627310 : void tx_initialize_neighbor_arrays(
4117 : PictureControlSet *picture_control_set_ptr,
4118 : ModeDecisionContext *context_ptr,
4119 : EbBool is_inter) {
4120 :
4121 : // Set recon neighbor array to be used @ intra compensation
4122 1627310 : if (!is_inter)
4123 856733 : context_ptr->tx_search_luma_recon_neighbor_array =
4124 856733 : (context_ptr->tx_depth) ?
4125 856733 : picture_control_set_ptr->md_tx_depth_1_luma_recon_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX] :
4126 : picture_control_set_ptr->md_luma_recon_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
4127 :
4128 : // Set luma dc sign level coeff
4129 1627310 : context_ptr->full_loop_luma_dc_sign_level_coeff_neighbor_array =
4130 1627310 : (context_ptr->tx_depth == 1) ?
4131 1627310 : picture_control_set_ptr->md_tx_depth_1_luma_dc_sign_level_coeff_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX] :
4132 : picture_control_set_ptr->md_luma_dc_sign_level_coeff_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
4133 1627310 : }
4134 :
4135 3547460 : void tx_update_neighbor_arrays(
4136 : PictureControlSet *picture_control_set_ptr,
4137 : ModeDecisionContext *context_ptr,
4138 : ModeDecisionCandidateBuffer *candidate_buffer,
4139 : EbBool is_inter) {
4140 :
4141 3547460 : if (context_ptr->tx_depth) {
4142 :
4143 2733960 : if (!is_inter)
4144 1471120 : tx_search_update_recon_sample_neighbor_array(
4145 : context_ptr->tx_search_luma_recon_neighbor_array,
4146 : candidate_buffer->recon_ptr,
4147 1471120 : context_ptr->blk_geom->tx_org_x[context_ptr->tx_depth][context_ptr->txb_itr],
4148 1471120 : context_ptr->blk_geom->tx_org_y[context_ptr->tx_depth][context_ptr->txb_itr],
4149 1471120 : context_ptr->sb_origin_x + context_ptr->blk_geom->tx_org_x[context_ptr->tx_depth][context_ptr->txb_itr],
4150 1471120 : context_ptr->sb_origin_y + context_ptr->blk_geom->tx_org_y[context_ptr->tx_depth][context_ptr->txb_itr],
4151 1471120 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
4152 1471120 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr],
4153 1471120 : context_ptr->hbd_mode_decision);
4154 :
4155 2733910 : int8_t dc_sign_level_coeff = candidate_buffer->candidate_ptr->quantized_dc[0][context_ptr->txb_itr];
4156 2733910 : neighbor_array_unit_mode_write(
4157 : picture_control_set_ptr->md_tx_depth_1_luma_dc_sign_level_coeff_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX],
4158 : (uint8_t*)&dc_sign_level_coeff,
4159 2733910 : context_ptr->sb_origin_x + context_ptr->blk_geom->tx_org_x[context_ptr->tx_depth][context_ptr->txb_itr],
4160 2733910 : context_ptr->sb_origin_y + context_ptr->blk_geom->tx_org_y[context_ptr->tx_depth][context_ptr->txb_itr],
4161 2733910 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
4162 2733910 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr],
4163 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
4164 : }
4165 3547270 : }
4166 :
4167 :
4168 813703 : void tx_reset_neighbor_arrays(
4169 : PictureControlSet *picture_control_set_ptr,
4170 : ModeDecisionContext *context_ptr,
4171 : EbBool is_inter,
4172 : uint8_t end_tx_depth) {
4173 :
4174 813703 : if (end_tx_depth) {
4175 813703 : if (!is_inter)
4176 428399 : copy_neigh_arr(
4177 : picture_control_set_ptr->md_luma_recon_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX],
4178 : picture_control_set_ptr->md_tx_depth_1_luma_recon_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX],
4179 428399 : context_ptr->sb_origin_x + context_ptr->blk_geom->origin_x,
4180 428399 : context_ptr->sb_origin_y + context_ptr->blk_geom->origin_y,
4181 428399 : context_ptr->blk_geom->bwidth,
4182 428399 : context_ptr->blk_geom->bheight,
4183 : NEIGHBOR_ARRAY_UNIT_FULL_MASK);
4184 :
4185 813690 : copy_neigh_arr(
4186 : picture_control_set_ptr->md_luma_dc_sign_level_coeff_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX],
4187 : picture_control_set_ptr->md_tx_depth_1_luma_dc_sign_level_coeff_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX],
4188 813690 : context_ptr->sb_origin_x + context_ptr->blk_geom->origin_x,
4189 813690 : context_ptr->sb_origin_y + context_ptr->blk_geom->origin_y,
4190 813690 : context_ptr->blk_geom->bwidth,
4191 813690 : context_ptr->blk_geom->bheight,
4192 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
4193 : }
4194 813690 : }
4195 :
4196 2647620 : void tx_type_search(
4197 : SequenceControlSet *sequence_control_set_ptr,
4198 : PictureControlSet *picture_control_set_ptr,
4199 : ModeDecisionContext *context_ptr,
4200 : ModeDecisionCandidateBuffer *candidate_buffer,
4201 : uint32_t qp)
4202 : {
4203 2647620 : EbPictureBufferDesc *input_picture_ptr = picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
4204 5295240 : int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
4205 2647620 : picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
4206 :
4207 2647620 : TxType txk_start = DCT_DCT;
4208 2647620 : TxType txk_end = TX_TYPES;
4209 2647620 : uint64_t best_cost_tx_search = (uint64_t)~0;
4210 : int32_t tx_type;
4211 2647620 : TxSize txSize = context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr];
4212 2647620 : int32_t is_inter = (candidate_buffer->candidate_ptr->type == INTER_MODE || candidate_buffer->candidate_ptr->use_intrabc) ? EB_TRUE : EB_FALSE;
4213 2647620 : const TxSetType tx_set_type = get_ext_tx_set_type(txSize, is_inter, picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set);
4214 2647770 : uint8_t txb_origin_x = (uint8_t)context_ptr->blk_geom->tx_org_x[context_ptr->tx_depth][context_ptr->txb_itr];
4215 2647770 : uint8_t txb_origin_y = (uint8_t)context_ptr->blk_geom->tx_org_y[context_ptr->tx_depth][context_ptr->txb_itr];
4216 2647770 : uint32_t tu_origin_index = txb_origin_x + (txb_origin_y * candidate_buffer->residual_ptr->stride_y);
4217 2647770 : uint32_t input_tu_origin_index = (context_ptr->sb_origin_x + txb_origin_x + input_picture_ptr->origin_x) + ((context_ptr->sb_origin_y + txb_origin_y + input_picture_ptr->origin_y) * input_picture_ptr->stride_y);
4218 :
4219 :
4220 2647770 : context_ptr->luma_txb_skip_context = 0;
4221 2647770 : context_ptr->luma_dc_sign_context = 0;
4222 2647770 : get_txb_ctx(
4223 : sequence_control_set_ptr,
4224 : COMPONENT_LUMA,
4225 : context_ptr->full_loop_luma_dc_sign_level_coeff_neighbor_array,
4226 2647770 : context_ptr->sb_origin_x + txb_origin_x,
4227 2647770 : context_ptr->sb_origin_y + txb_origin_y,
4228 2647770 : context_ptr->blk_geom->bsize,
4229 2647770 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
4230 : &context_ptr->luma_txb_skip_context,
4231 : &context_ptr->luma_dc_sign_context);
4232 :
4233 2647680 : if (picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set == 2)
4234 0 : txk_end = 2;
4235 :
4236 2647680 : TxType best_tx_type = DCT_DCT;
4237 44979500 : for (tx_type = txk_start; tx_type < txk_end; ++tx_type) {
4238 :
4239 : uint64_t tuFullDistortion[3][DIST_CALC_TOTAL];
4240 42319100 : uint64_t y_tu_coeff_bits = 0;
4241 : uint32_t y_count_non_zero_coeffs;
4242 :
4243 : //context_ptr->three_quad_energy = 0;
4244 42319100 : if (tx_type != DCT_DCT) {
4245 39650400 : if (is_inter) {
4246 11219900 : TxSize max_tx_size = context_ptr->blk_geom->txsize[0][0];
4247 11219900 : const TxSetType tx_set_type = get_ext_tx_set_type(max_tx_size, is_inter, picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set);
4248 11219600 : int32_t eset = get_ext_tx_set(max_tx_size, is_inter, picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set);
4249 : // eset == 0 should correspond to a set with only DCT_DCT and there
4250 : // is no need to send the tx_type
4251 36557900 : if (eset <= 0) continue;
4252 10321700 : else if (av1_ext_tx_used[tx_set_type][tx_type] == 0) continue;
4253 7513480 : else if (context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr] > 32 || context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr] > 32) continue;
4254 : }
4255 :
4256 35944000 : int32_t eset = get_ext_tx_set(context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr], is_inter, picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set);
4257 : // eset == 0 should correspond to a set with only DCT_DCT and there
4258 : // is no need to send the tx_type
4259 35945400 : if (eset <= 0) continue;
4260 34269600 : else if (av1_ext_tx_used[tx_set_type][tx_type] == 0) continue;
4261 17868300 : else if (context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr] > 32 || context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr] > 32) continue;
4262 : }
4263 :
4264 20537000 : if (picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set)
4265 0 : if (!allowed_tx_set_a[context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr]][tx_type]) continue;
4266 :
4267 : // For Inter blocks, transform type of chroma follows luma transfrom type
4268 20537000 : if (is_inter)
4269 8261160 : candidate_buffer->candidate_ptr->transform_type_uv = (context_ptr->txb_itr == 0) ? candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr] : candidate_buffer->candidate_ptr->transform_type_uv;
4270 :
4271 : // Y: T Q iQ
4272 20537000 : av1_estimate_transform(
4273 20537000 : &(((int16_t*)candidate_buffer->residual_ptr->buffer_y)[tu_origin_index]),
4274 20537000 : candidate_buffer->residual_ptr->stride_y,
4275 20537000 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[context_ptr->txb_1d_offset]),
4276 : NOT_USED_VALUE,
4277 20537000 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
4278 : &context_ptr->three_quad_energy,
4279 : context_ptr->transform_inner_array_ptr,
4280 20537000 : context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
4281 : tx_type,
4282 : PLANE_TYPE_Y,
4283 : DEFAULT_SHAPE);
4284 :
4285 20550200 : av1_quantize_inv_quantize(
4286 : picture_control_set_ptr,
4287 : context_ptr,
4288 20550200 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[context_ptr->txb_1d_offset]),
4289 : NOT_USED_VALUE,
4290 20550200 : &(((int32_t*)candidate_buffer->residual_quant_coeff_ptr->buffer_y)[context_ptr->txb_1d_offset]),
4291 20550200 : &(((int32_t*)candidate_buffer->recon_coeff_ptr->buffer_y)[context_ptr->txb_1d_offset]),
4292 : qp,
4293 : seg_qp,
4294 20550200 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
4295 20550200 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr],
4296 20550200 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
4297 20550200 : &candidate_buffer->candidate_ptr->eob[0][context_ptr->txb_itr],
4298 : &y_count_non_zero_coeffs,
4299 : COMPONENT_LUMA,
4300 20550200 : context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
4301 : tx_type,
4302 : candidate_buffer,
4303 20550200 : context_ptr->luma_txb_skip_context,
4304 20550200 : context_ptr->luma_dc_sign_context,
4305 20550200 : candidate_buffer->candidate_ptr->pred_mode,
4306 : EB_FALSE,
4307 : EB_FALSE);
4308 :
4309 20547100 : candidate_buffer->candidate_ptr->quantized_dc[0][context_ptr->txb_itr] = (((int32_t*)candidate_buffer->residual_quant_coeff_ptr->buffer_y)[context_ptr->txb_1d_offset]);
4310 20547100 : uint32_t y_has_coeff = y_count_non_zero_coeffs > 0;
4311 :
4312 : // tx_type not equal to DCT_DCT and no coeff is not an acceptable option in AV1.
4313 20547100 : if (y_has_coeff == 0 && tx_type != DCT_DCT)
4314 4450620 : continue;
4315 :
4316 :
4317 16096500 : if (y_has_coeff)
4318 15548700 : inv_transform_recon_wrapper(
4319 15548700 : candidate_buffer->prediction_ptr->buffer_y,
4320 : tu_origin_index,
4321 15548700 : candidate_buffer->prediction_ptr->stride_y,
4322 15548700 : candidate_buffer->recon_ptr->buffer_y,
4323 : tu_origin_index,
4324 15548700 : candidate_buffer->recon_ptr->stride_y,
4325 15548700 : (int32_t*)candidate_buffer->recon_coeff_ptr->buffer_y,
4326 : context_ptr->txb_1d_offset,
4327 15548700 : context_ptr->hbd_mode_decision,
4328 15548700 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
4329 : tx_type,
4330 : PLANE_TYPE_Y,
4331 15548700 : (uint16_t)candidate_buffer->candidate_ptr->eob[0][context_ptr->txb_itr]);
4332 : else
4333 547773 : picture_copy(
4334 : candidate_buffer->prediction_ptr,
4335 : tu_origin_index,
4336 : 0,
4337 : candidate_buffer->recon_ptr,
4338 : tu_origin_index,
4339 : 0,
4340 547773 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
4341 547773 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr],
4342 : 0,
4343 : 0,
4344 : PICTURE_BUFFER_DESC_Y_FLAG,
4345 547773 : context_ptr->hbd_mode_decision);
4346 :
4347 32212700 : EbSpatialFullDistType spatial_full_dist_type_fun = context_ptr->hbd_mode_decision ?
4348 16106300 : full_distortion_kernel16_bits : spatial_full_distortion_kernel;
4349 :
4350 32206300 : tuFullDistortion[0][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun(
4351 : input_picture_ptr->buffer_y,
4352 : input_tu_origin_index,
4353 16106300 : input_picture_ptr->stride_y,
4354 16106300 : candidate_buffer->prediction_ptr->buffer_y,
4355 : tu_origin_index,
4356 16106300 : candidate_buffer->prediction_ptr->stride_y,
4357 16106300 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
4358 16106300 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr]);
4359 :
4360 32199100 : tuFullDistortion[0][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun(
4361 : input_picture_ptr->buffer_y,
4362 : input_tu_origin_index,
4363 16099900 : input_picture_ptr->stride_y,
4364 16099900 : candidate_buffer->recon_ptr->buffer_y,
4365 : tu_origin_index,
4366 16099900 : candidate_buffer->recon_ptr->stride_y,
4367 16099900 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
4368 16099900 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr]);
4369 :
4370 16099200 : tuFullDistortion[0][DIST_CALC_PREDICTION] <<= 4;
4371 16099200 : tuFullDistortion[0][DIST_CALC_RESIDUAL] <<= 4;
4372 :
4373 : //LUMA-ONLY
4374 16099200 : av1_tu_estimate_coeff_bits(
4375 : context_ptr,
4376 : 0, //allow_update_cdf,
4377 : NULL,//FRAME_CONTEXT *ec_ctx,
4378 : picture_control_set_ptr,
4379 : candidate_buffer,
4380 : context_ptr->txb_1d_offset,
4381 : 0,
4382 : context_ptr->coeff_est_entropy_coder_ptr,
4383 : candidate_buffer->residual_quant_coeff_ptr,
4384 : y_count_non_zero_coeffs,
4385 : 0,
4386 : 0,
4387 : &y_tu_coeff_bits,
4388 : &y_tu_coeff_bits,
4389 : &y_tu_coeff_bits,
4390 16099200 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
4391 16099200 : context_ptr->blk_geom->txsize_uv[context_ptr->tx_depth][context_ptr->txb_itr],
4392 : tx_type,
4393 16099200 : candidate_buffer->candidate_ptr->transform_type_uv,
4394 : COMPONENT_LUMA);
4395 :
4396 16095700 : uint64_t cost = RDCOST(context_ptr->full_lambda, y_tu_coeff_bits, tuFullDistortion[0][DIST_CALC_RESIDUAL]);
4397 16095700 : if (cost < best_cost_tx_search) {
4398 5495560 : best_cost_tx_search = cost;
4399 5495560 : best_tx_type = tx_type;
4400 : }
4401 : }
4402 :
4403 : // Best Tx Type Pass
4404 2660460 : candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr] = best_tx_type;
4405 :
4406 : // For Inter blocks, transform type of chroma follows luma transfrom type
4407 2660460 : if (is_inter)
4408 748392 : candidate_buffer->candidate_ptr->transform_type_uv = (context_ptr->txb_itr == 0) ? candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr] : candidate_buffer->candidate_ptr->transform_type_uv;
4409 :
4410 2660460 : }
4411 :
4412 14044100 : static INLINE int block_signals_txsize(BlockSize bsize) {
4413 14044100 : return bsize > BLOCK_4X4;
4414 : }
4415 :
4416 19252000 : static INLINE int is_inter_block(const BlockModeInfo *bloc_mi) {
4417 19252000 : return is_intrabc_block(bloc_mi) || bloc_mi->ref_frame[0] > INTRA_FRAME;
4418 : }
4419 :
4420 3135220 : static INLINE int get_vartx_max_txsize(/*const MbModeInfo *xd,*/ BlockSize bsize,
4421 : int plane) {
4422 : /* if (xd->lossless[xd->mi[0]->segment_id]) return TX_4X4;*/
4423 3135220 : const TxSize max_txsize = max_txsize_rect_lookup[bsize];
4424 3135220 : if (plane == 0) return max_txsize; // luma
4425 0 : return av1_get_adjusted_tx_size(max_txsize); // chroma
4426 : }
4427 :
4428 4219450 : static INLINE int max_block_wide(const MacroBlockD *xd, BlockSize bsize,
4429 : int plane) {
4430 4219450 : int max_blocks_wide = block_size_wide[bsize];
4431 4219450 : const struct macroblockd_plane *const pd = &xd->plane[plane];
4432 :
4433 4219450 : if (xd->mb_to_right_edge < 0)
4434 0 : max_blocks_wide += xd->mb_to_right_edge >> (3 + pd->subsampling_x);
4435 :
4436 : // Scale the width in the transform block unit.
4437 4219450 : return max_blocks_wide >> tx_size_wide_log2[0];
4438 : }
4439 :
4440 4219400 : static INLINE int max_block_high(const MacroBlockD *xd, BlockSize bsize,
4441 : int plane) {
4442 4219400 : int max_blocks_high = block_size_high[bsize];
4443 4219400 : const struct macroblockd_plane *const pd = &xd->plane[plane];
4444 :
4445 4219400 : if (xd->mb_to_bottom_edge < 0)
4446 10987 : max_blocks_high += xd->mb_to_bottom_edge >> (3 + pd->subsampling_y);
4447 :
4448 : // Scale the height in the transform block unit.
4449 4219400 : return max_blocks_high >> tx_size_high_log2[0];
4450 : }
4451 :
4452 3755710 : static INLINE void txfm_partition_update(TXFM_CONTEXT *above_ctx,
4453 : TXFM_CONTEXT *left_ctx,
4454 : TxSize tx_size, TxSize txb_size) {
4455 3755710 : BlockSize bsize = txsize_to_bsize[txb_size];
4456 3755710 : int bh = mi_size_high[bsize];
4457 3755710 : int bw = mi_size_wide[bsize];
4458 3755710 : uint8_t txw = tx_size_wide[tx_size];
4459 3755710 : uint8_t txh = tx_size_high[tx_size];
4460 : int i;
4461 15887500 : for (i = 0; i < bh; ++i) left_ctx[i] = txh;
4462 15857800 : for (i = 0; i < bw; ++i) above_ctx[i] = txw;
4463 3755710 : }
4464 :
4465 3999000 : static INLINE TxSize get_sqr_tx_size(int tx_dim) {
4466 3999000 : switch (tx_dim) {
4467 217315 : case 128:
4468 217315 : case 64: return TX_64X64; break;
4469 807066 : case 32: return TX_32X32; break;
4470 1847190 : case 16: return TX_16X16; break;
4471 1127840 : case 8: return TX_8X8; break;
4472 0 : default: return TX_4X4;
4473 : }
4474 : }
4475 3998980 : static INLINE int txfm_partition_context(TXFM_CONTEXT *above_ctx,
4476 : TXFM_CONTEXT *left_ctx,
4477 : BlockSize bsize, TxSize tx_size) {
4478 3998980 : const uint8_t txw = tx_size_wide[tx_size];
4479 3998980 : const uint8_t txh = tx_size_high[tx_size];
4480 3998980 : const int above = *above_ctx < txw;
4481 3998980 : const int left = *left_ctx < txh;
4482 3998980 : int category = TXFM_PARTITION_CONTEXTS;
4483 :
4484 : // dummy return, not used by others.
4485 3998980 : if (tx_size <= TX_4X4) return 0;
4486 :
4487 : TxSize max_tx_size =
4488 3998980 : get_sqr_tx_size(AOMMAX(block_size_wide[bsize], block_size_high[bsize]));
4489 :
4490 3999060 : if (max_tx_size >= TX_8X8) {
4491 3999290 : category =
4492 3999290 : (txsize_sqr_up_map[tx_size] != max_tx_size && max_tx_size > TX_8X8) +
4493 3999290 : (TX_SIZES - 1 - max_tx_size) * 2;
4494 : }
4495 3999060 : assert(category != TXFM_PARTITION_CONTEXTS);
4496 3999060 : return category * 3 + above + left;
4497 : }
4498 :
4499 4219420 : static uint64_t cost_tx_size_vartx(MacroBlockD *xd, const MbModeInfo *mbmi,
4500 : TxSize tx_size, int depth, int blk_row,
4501 : int blk_col, MdRateEstimationContext *md_rate_estimation_ptr) {
4502 4219420 : uint64_t bits = 0;
4503 4219420 : const int max_blocks_high = max_block_high(xd, mbmi->block_mi.sb_type, 0);
4504 4219460 : const int max_blocks_wide = max_block_wide(xd, mbmi->block_mi.sb_type, 0);
4505 :
4506 4219420 : if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return bits;
4507 :
4508 4219420 : if (depth == MAX_VARTX_DEPTH) {
4509 :
4510 220466 : txfm_partition_update(xd->above_txfm_context + blk_col,
4511 220466 : xd->left_txfm_context + blk_row, tx_size, tx_size);
4512 :
4513 220465 : return bits;
4514 : }
4515 :
4516 3998960 : const int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
4517 3998960 : xd->left_txfm_context + blk_row,
4518 3998960 : mbmi->block_mi.sb_type, tx_size);
4519 :
4520 3999110 : const int write_txfm_partition = (tx_size == tx_depth_to_tx_size[mbmi->tx_depth][mbmi->block_mi.sb_type]);
4521 :
4522 3999110 : if (write_txfm_partition) {
4523 3336480 : bits += md_rate_estimation_ptr->txfm_partition_fac_bits[ctx][0];
4524 :
4525 3336480 : txfm_partition_update(xd->above_txfm_context + blk_col,
4526 3336480 : xd->left_txfm_context + blk_row, tx_size, tx_size);
4527 :
4528 : }
4529 : else {
4530 662635 : const TxSize sub_txs = sub_tx_size_map[tx_size];
4531 662635 : const int bsw = tx_size_wide_unit[sub_txs];
4532 662635 : const int bsh = tx_size_high_unit[sub_txs];
4533 :
4534 662635 : bits += md_rate_estimation_ptr->txfm_partition_fac_bits[ctx][1];
4535 662635 : if (sub_txs == TX_4X4) {
4536 :
4537 198793 : txfm_partition_update(xd->above_txfm_context + blk_col,
4538 198793 : xd->left_txfm_context + blk_row, sub_txs, tx_size);
4539 :
4540 198791 : return bits;
4541 : }
4542 :
4543 463842 : assert(bsw > 0 && bsh > 0);
4544 1203220 : for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh)
4545 1823930 : for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
4546 1084550 : int offsetr = blk_row + row;
4547 1084550 : int offsetc = blk_col + col;
4548 1084550 : bits += cost_tx_size_vartx(xd, mbmi, sub_txs, depth + 1, offsetr, offsetc, md_rate_estimation_ptr);
4549 : }
4550 : }
4551 3800310 : return bits;
4552 : }
4553 :
4554 12113300 : static INLINE void set_txfm_ctx(TXFM_CONTEXT *txfm_ctx, uint8_t txs, int len) {
4555 : int i;
4556 49466400 : for (i = 0; i < len; ++i) txfm_ctx[i] = txs;
4557 12113300 : }
4558 :
4559 6056740 : static INLINE void set_txfm_ctxs(TxSize tx_size, int n8_w, int n8_h, int skip,
4560 : const MacroBlockD *xd) {
4561 6056740 : uint8_t bw = tx_size_wide[tx_size];
4562 6056740 : uint8_t bh = tx_size_high[tx_size];
4563 :
4564 6056740 : if (skip) {
4565 673069 : bw = n8_w * MI_SIZE;
4566 673069 : bh = n8_h * MI_SIZE;
4567 : }
4568 :
4569 6056740 : set_txfm_ctx(xd->above_txfm_context, bw, n8_w);
4570 6057680 : set_txfm_ctx(xd->left_txfm_context, bh, n8_h);
4571 6058500 : }
4572 :
4573 4852790 : static INLINE int tx_size_to_depth(TxSize tx_size, BlockSize bsize) {
4574 4852790 : TxSize ctx_size = max_txsize_rect_lookup[bsize];
4575 4852790 : int depth = 0;
4576 5714890 : while (tx_size != ctx_size) {
4577 862099 : depth++;
4578 862099 : ctx_size = sub_tx_size_map[ctx_size];
4579 862099 : assert(depth <= MAX_TX_DEPTH);
4580 : }
4581 4852790 : return depth;
4582 : }
4583 :
4584 : #define BLOCK_SIZES_ALL 22
4585 :
4586 : // Returns a context number for the given MB prediction signal
4587 : // The mode info data structure has a one element border above and to the
4588 : // left of the entries corresponding to real blocks.
4589 : // The prediction flags in these dummy entries are initialized to 0.
4590 4853850 : static INLINE int get_tx_size_context(const MacroBlockD *xd) {
4591 4853850 : const ModeInfo *mi = xd->mi[0];
4592 4853850 : const MbModeInfo *mbmi = &mi->mbmi;
4593 4853850 : const MbModeInfo *const above_mbmi = xd->above_mbmi;
4594 4853850 : const MbModeInfo *const left_mbmi = xd->left_mbmi;
4595 4853850 : const TxSize max_tx_size = max_txsize_rect_lookup[mbmi->block_mi.sb_type];
4596 4853850 : const int max_tx_wide = tx_size_wide[max_tx_size];
4597 4853850 : const int max_tx_high = tx_size_high[max_tx_size];
4598 4853850 : const int has_above = xd->up_available;
4599 4853850 : const int has_left = xd->left_available;
4600 :
4601 4853850 : int above = xd->above_txfm_context[0] >= max_tx_wide;
4602 4853850 : int left = xd->left_txfm_context[0] >= max_tx_high;
4603 :
4604 4853850 : if (has_above)
4605 4625990 : if (is_inter_block(&above_mbmi->block_mi))
4606 950897 : above = block_size_wide[above_mbmi->block_mi.sb_type] >= max_tx_wide;
4607 :
4608 4853520 : if (has_left)
4609 4668580 : if (is_inter_block(&left_mbmi->block_mi))
4610 943619 : left = block_size_high[left_mbmi->block_mi.sb_type] >= max_tx_high;
4611 :
4612 4852740 : if (has_above && has_left)
4613 4446440 : return (above + left);
4614 406300 : else if (has_above)
4615 178721 : return above;
4616 227579 : else if (has_left)
4617 221691 : return left;
4618 : else
4619 5888 : return 0;
4620 : }
4621 :
4622 4854510 : static uint64_t cost_selected_tx_size(
4623 : const MacroBlockD *xd,
4624 : MdRateEstimationContext *md_rate_estimation_ptr) {
4625 4854510 : const ModeInfo *const mi = xd->mi[0];
4626 4854510 : const MbModeInfo *const mbmi = &mi->mbmi;
4627 4854510 : const BlockSize bsize = mbmi->block_mi.sb_type;
4628 4854510 : uint64_t bits = 0;
4629 :
4630 4854510 : if (block_signals_txsize(bsize)) {
4631 4853720 : const TxSize tx_size = mbmi->tx_size;
4632 4853720 : const int tx_size_ctx = get_tx_size_context(xd);
4633 4852680 : const int depth = tx_size_to_depth(tx_size, bsize);
4634 4852900 : const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize);
4635 4853100 : bits += md_rate_estimation_ptr->tx_size_fac_bits[tx_size_cat][tx_size_ctx][depth];
4636 : }
4637 :
4638 4853080 : return bits;
4639 : }
4640 :
4641 9193970 : static uint64_t tx_size_bits(
4642 : MdRateEstimationContext *md_rate_estimation_ptr,
4643 : MacroBlockD *xd,
4644 : const MbModeInfo *mbmi,
4645 : TxMode tx_mode,
4646 : BlockSize bsize,
4647 : uint8_t skip) {
4648 :
4649 9193970 : uint64_t bits = 0;
4650 :
4651 9193970 : int is_inter_tx = is_inter_block(&mbmi->block_mi) || is_intrabc_block(&mbmi->block_mi);
4652 9193600 : if (tx_mode == TX_MODE_SELECT && block_signals_txsize(bsize) &&
4653 3768360 : !(is_inter_tx && skip) /*&& !xd->lossless[segment_id]*/) {
4654 7989780 : if (is_inter_tx) { // This implies skip flag is 0.
4655 3135250 : const TxSize max_tx_size = get_vartx_max_txsize(/*xd,*/ bsize, 0);
4656 3135230 : const int txbh = tx_size_high_unit[max_tx_size];
4657 3135230 : const int txbw = tx_size_wide_unit[max_tx_size];
4658 3135230 : const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
4659 3135230 : const int height = block_size_high[bsize] >> tx_size_high_log2[0];
4660 : int idx, idy;
4661 6270280 : for (idy = 0; idy < height; idy += txbh)
4662 6270270 : for (idx = 0; idx < width; idx += txbw)
4663 3135210 : bits += cost_tx_size_vartx(xd, mbmi, max_tx_size, 0, idy, idx, md_rate_estimation_ptr);
4664 : }
4665 : else {
4666 4854540 : bits += cost_selected_tx_size(xd, md_rate_estimation_ptr);
4667 4853170 : set_txfm_ctxs(mbmi->tx_size, xd->n8_w, xd->n8_h, 0, xd);
4668 : }
4669 : }
4670 : else {
4671 1975990 : set_txfm_ctxs(mbmi->tx_size, xd->n8_w, xd->n8_h,
4672 772352 : skip && is_inter_block(&mbmi->block_mi), xd);
4673 : }
4674 9193150 : return bits;
4675 : }
4676 :
4677 : void set_mi_row_col(
4678 : PictureControlSet *picture_control_set_ptr,
4679 : MacroBlockD *xd,
4680 : TileInfo * tile,
4681 : int mi_row,
4682 : int bh,
4683 : int mi_col,
4684 : int bw,
4685 : uint32_t mi_stride,
4686 : int mi_rows,
4687 : int mi_cols);
4688 :
4689 9193640 : uint64_t estimate_tx_size_bits(
4690 : PictureControlSet *pcsPtr,
4691 : ModeDecisionContext *context_ptr,
4692 : ModeDecisionCandidate *candidate_ptr,
4693 : EbBool skip_flag,
4694 : uint32_t cu_origin_x,
4695 : uint32_t cu_origin_y,
4696 : CodingUnit *cu_ptr,
4697 : const BlockGeom *blk_geom,
4698 : NeighborArrayUnit *txfm_context_array,
4699 : uint8_t tx_depth,
4700 : MdRateEstimationContext *md_rate_estimation_ptr) {
4701 9193640 : uint32_t txfm_context_left_index = get_neighbor_array_unit_left_index(
4702 : txfm_context_array,
4703 : cu_origin_y);
4704 9192980 : uint32_t txfm_context_above_index = get_neighbor_array_unit_top_index(
4705 : txfm_context_array,
4706 : cu_origin_x);
4707 :
4708 9193120 : TxMode tx_mode = pcsPtr->parent_pcs_ptr->frm_hdr.tx_mode;
4709 9193120 : Av1Common *cm = pcsPtr->parent_pcs_ptr->av1_cm;
4710 9193120 : MacroBlockD *xd = cu_ptr->av1xd;
4711 9193120 : TileInfo * tile = &xd->tile;
4712 9193120 : int32_t mi_row = cu_origin_y >> MI_SIZE_LOG2;
4713 9193120 : int32_t mi_col = cu_origin_x >> MI_SIZE_LOG2;
4714 9193120 : BlockSize bsize = blk_geom->bsize;
4715 9193120 : const int32_t bw = mi_size_wide[bsize];
4716 9193120 : const int32_t bh = mi_size_high[bsize];
4717 9193120 : uint32_t mi_stride = pcsPtr->mi_stride;
4718 :
4719 9193120 : set_mi_row_col(
4720 : pcsPtr,
4721 : xd,
4722 : tile,
4723 : mi_row,
4724 : bh,
4725 : mi_col,
4726 : bw,
4727 : mi_stride,
4728 : cm->mi_rows,
4729 : cm->mi_cols);
4730 :
4731 9193810 : MbModeInfo * mbmi = &xd->mi[0]->mbmi;
4732 :
4733 9193810 : memcpy(context_ptr->above_txfm_context, &(txfm_context_array->top_array[txfm_context_above_index]), (blk_geom->bwidth >> MI_SIZE_LOG2) * sizeof(TXFM_CONTEXT));
4734 9193810 : memcpy(context_ptr->left_txfm_context, &(txfm_context_array->left_array[txfm_context_left_index]), (blk_geom->bheight >> MI_SIZE_LOG2) * sizeof(TXFM_CONTEXT));
4735 :
4736 9193810 : xd->above_txfm_context = context_ptr->above_txfm_context;
4737 9193810 : xd->left_txfm_context = context_ptr->left_txfm_context;
4738 :
4739 9193810 : mbmi->tx_size = blk_geom->txsize[tx_depth][0];
4740 9193810 : mbmi->block_mi.sb_type = blk_geom->bsize;
4741 9193810 : mbmi->block_mi.use_intrabc = candidate_ptr->use_intrabc;
4742 9193810 : mbmi->block_mi.ref_frame[0] = candidate_ptr->ref_frame_type;
4743 9193810 : mbmi->tx_depth = tx_depth;
4744 :
4745 9193810 : uint64_t bits = tx_size_bits(
4746 : md_rate_estimation_ptr,
4747 : xd,
4748 : mbmi,
4749 : tx_mode,
4750 : bsize,
4751 : skip_flag);
4752 :
4753 9193060 : return bits;
4754 : }
4755 :
4756 9194160 : uint64_t get_tx_size_bits(
4757 : ModeDecisionCandidateBuffer *candidateBuffer,
4758 : ModeDecisionContext *context_ptr,
4759 : PictureControlSet *picture_control_set_ptr,
4760 : uint8_t tx_depth,
4761 : EbBool block_has_coeff) {
4762 :
4763 9194160 : uint64_t tx_size_bits = 0;
4764 :
4765 9194160 : tx_size_bits = estimate_tx_size_bits(
4766 : picture_control_set_ptr,
4767 : context_ptr,
4768 : candidateBuffer->candidate_ptr,
4769 : block_has_coeff ? 0 : 1,
4770 9194160 : context_ptr->cu_origin_x,
4771 9194160 : context_ptr->cu_origin_y,
4772 : context_ptr->cu_ptr,
4773 : context_ptr->blk_geom,
4774 : context_ptr->txfm_context_array,
4775 : tx_depth,
4776 : context_ptr->md_rate_estimation_ptr);
4777 :
4778 9193020 : return tx_size_bits;
4779 : }
4780 :
4781 813702 : void tx_partitioning_path(
4782 : ModeDecisionCandidateBuffer *candidate_buffer,
4783 : ModeDecisionContext *context_ptr,
4784 : PictureControlSet *picture_control_set_ptr,
4785 : uint64_t ref_fast_cost,
4786 : uint8_t end_tx_depth,
4787 : uint32_t qp,
4788 : uint32_t *y_count_non_zero_coeffs,
4789 : uint64_t *y_coeff_bits,
4790 : uint64_t *y_full_distortion)
4791 : {
4792 813702 : EbPictureBufferDesc *input_picture_ptr = picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
4793 813702 : SequenceControlSet *sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
4794 813702 : int32_t is_inter = (candidate_buffer->candidate_ptr->type == INTER_MODE || candidate_buffer->candidate_ptr->use_intrabc) ? EB_TRUE : EB_FALSE;
4795 :
4796 :
4797 813702 : uint8_t best_tx_depth = 0;
4798 813702 : uint64_t best_cost_search = (uint64_t)~0;
4799 :
4800 : // Fill the scratch buffer
4801 813702 : memcpy(context_ptr->scratch_candidate_buffer->candidate_ptr, candidate_buffer->candidate_ptr, sizeof(ModeDecisionCandidate));
4802 :
4803 813702 : if (is_inter) {
4804 :
4805 385315 : uint32_t block_index = context_ptr->blk_geom->origin_x + (context_ptr->blk_geom->origin_y * MAX_SB_SIZE);
4806 :
4807 : // Copy pred
4808 : {
4809 385315 : EbByte src = &(candidate_buffer->prediction_ptr->buffer_y[block_index]);
4810 385315 : EbByte dst = &(context_ptr->scratch_candidate_buffer->prediction_ptr->buffer_y[block_index]);
4811 6196280 : for (int i = 0; i < context_ptr->blk_geom->bheight; i++) {
4812 5810960 : memcpy(dst, src, context_ptr->blk_geom->bwidth);
4813 5810960 : src += candidate_buffer->prediction_ptr->stride_y;
4814 5810960 : dst += context_ptr->scratch_candidate_buffer->prediction_ptr->stride_y;
4815 : }
4816 : }
4817 :
4818 : // Copy residual
4819 : {
4820 385315 : int16_t* src = &(((int16_t*)candidate_buffer->residual_ptr->buffer_y)[block_index]);
4821 385315 : int16_t* dst = &(((int16_t*)context_ptr->scratch_candidate_buffer->residual_ptr->buffer_y)[block_index]);
4822 :
4823 6196270 : for (int i = 0; i < context_ptr->blk_geom->bheight; i++) {
4824 5810950 : memcpy(dst, src, context_ptr->blk_geom->bwidth << 1);
4825 5810950 : src += candidate_buffer->residual_ptr->stride_y;
4826 5810950 : dst += context_ptr->scratch_candidate_buffer->residual_ptr->stride_y;
4827 : }
4828 : }
4829 : }
4830 :
4831 :
4832 : uint8_t tx_search_skip_flag;
4833 813702 : if (context_ptr->md_staging_tx_search == 0)
4834 0 : tx_search_skip_flag = EB_TRUE;
4835 813702 : else if (context_ptr->md_staging_tx_search == 1)
4836 770634 : tx_search_skip_flag = picture_control_set_ptr->parent_pcs_ptr->tx_search_level == TX_SEARCH_FULL_LOOP ? get_skip_tx_search_flag(
4837 385316 : context_ptr->blk_geom->sq_size,
4838 : ref_fast_cost,
4839 385316 : *candidate_buffer->fast_cost_ptr,
4840 385316 : picture_control_set_ptr->parent_pcs_ptr->tx_weight) : EB_TRUE;
4841 : else
4842 428387 : tx_search_skip_flag = picture_control_set_ptr->parent_pcs_ptr->tx_search_level == TX_SEARCH_FULL_LOOP ? EB_FALSE : EB_TRUE;
4843 :
4844 :
4845 813705 : tx_reset_neighbor_arrays(
4846 : picture_control_set_ptr,
4847 : context_ptr,
4848 : is_inter,
4849 : end_tx_depth);
4850 :
4851 : // Transform Depth Loop
4852 2441060 : for (context_ptr->tx_depth = 0; context_ptr->tx_depth <= end_tx_depth; context_ptr->tx_depth++) {
4853 :
4854 1627390 : ModeDecisionCandidateBuffer *tx_candidate_buffer = (context_ptr->tx_depth == 0) ? candidate_buffer : context_ptr->scratch_candidate_buffer;
4855 :
4856 1627390 : tx_candidate_buffer->candidate_ptr->tx_depth = context_ptr->tx_depth;
4857 :
4858 1627390 : tx_initialize_neighbor_arrays(
4859 : picture_control_set_ptr,
4860 : context_ptr,
4861 : is_inter);
4862 :
4863 : // Initialize TU Split
4864 : uint32_t tx_y_count_non_zero_coeffs[MAX_NUM_OF_TU_PER_CU];
4865 1627300 : uint64_t tx_y_coeff_bits = 0;
4866 1627300 : uint64_t tx_y_full_distortion[DIST_CALC_TOTAL] = { 0 };
4867 :
4868 1627300 : context_ptr->txb_1d_offset = 0;
4869 1627300 : context_ptr->three_quad_energy = 0;
4870 1627300 : tx_candidate_buffer->candidate_ptr->y_has_coeff = 0;
4871 :
4872 1627300 : uint16_t txb_count = context_ptr->blk_geom->txb_count[context_ptr->tx_depth];
4873 :
4874 1627300 : uint32_t block_has_coeff = EB_FALSE;
4875 5174530 : for (context_ptr->txb_itr = 0; context_ptr->txb_itr < txb_count; context_ptr->txb_itr++) {
4876 3547150 : uint16_t tx_org_x = context_ptr->blk_geom->tx_org_x[context_ptr->tx_depth][context_ptr->txb_itr];
4877 3547150 : uint16_t tx_org_y = context_ptr->blk_geom->tx_org_y[context_ptr->tx_depth][context_ptr->txb_itr];
4878 3547150 : uint32_t tu_origin_index = tx_org_x + (tx_org_y * tx_candidate_buffer->residual_ptr->stride_y);
4879 3547150 : uint32_t input_tu_origin_index = (context_ptr->sb_origin_x + tx_org_x + input_picture_ptr->origin_x) + ((context_ptr->sb_origin_y + tx_org_y + input_picture_ptr->origin_y) * input_picture_ptr->stride_y);
4880 :
4881 : // Y Prediction
4882 :
4883 3547150 : if (!is_inter) {
4884 1899240 : av1_intra_luma_prediction(
4885 : context_ptr,
4886 : picture_control_set_ptr,
4887 : tx_candidate_buffer);
4888 :
4889 : // Y Residual
4890 1899430 : residual_kernel8bit(
4891 1899430 : &(input_picture_ptr->buffer_y[input_tu_origin_index]),
4892 1899430 : input_picture_ptr->stride_y,
4893 1899430 : &(tx_candidate_buffer->prediction_ptr->buffer_y[tu_origin_index]),
4894 1899430 : tx_candidate_buffer->prediction_ptr->stride_y,
4895 1899430 : &(((int16_t*)tx_candidate_buffer->residual_ptr->buffer_y)[tu_origin_index]),
4896 1899430 : tx_candidate_buffer->residual_ptr->stride_y,
4897 1899430 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
4898 1899430 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr]);
4899 : }
4900 :
4901 3547420 : if (!tx_search_skip_flag) {
4902 :
4903 2647610 : tx_type_search(
4904 : sequence_control_set_ptr,
4905 : picture_control_set_ptr,
4906 : context_ptr,
4907 : tx_candidate_buffer,
4908 : qp);
4909 : }
4910 :
4911 3547320 : product_full_loop(
4912 : tx_candidate_buffer,
4913 : context_ptr,
4914 : picture_control_set_ptr,
4915 : input_picture_ptr,
4916 3547320 : context_ptr->cu_ptr->qp,
4917 : &(tx_y_count_non_zero_coeffs[0]),
4918 : &tx_y_coeff_bits,
4919 : &tx_y_full_distortion[0]);
4920 :
4921 3547480 : uint32_t y_has_coeff = tx_y_count_non_zero_coeffs[context_ptr->txb_itr] > 0;
4922 :
4923 3547480 : tx_update_neighbor_arrays(
4924 : picture_control_set_ptr,
4925 : context_ptr,
4926 : tx_candidate_buffer,
4927 : is_inter);
4928 :
4929 3547230 : if (y_has_coeff)
4930 2823320 : block_has_coeff = EB_TRUE;
4931 :
4932 : } // Transform Loop
4933 :
4934 1627380 : uint64_t tx_size_bits = 0;
4935 1627380 : if (picture_control_set_ptr->parent_pcs_ptr->frm_hdr.tx_mode == TX_MODE_SELECT)
4936 1627340 : tx_size_bits = get_tx_size_bits(
4937 : tx_candidate_buffer,
4938 : context_ptr,
4939 : picture_control_set_ptr,
4940 1627340 : context_ptr->tx_depth,
4941 : block_has_coeff);
4942 :
4943 1627370 : uint64_t cost = RDCOST(context_ptr->full_lambda, (tx_y_coeff_bits + tx_size_bits), tx_y_full_distortion[DIST_CALC_RESIDUAL]);
4944 :
4945 1627370 : if (cost < best_cost_search) {
4946 1161860 : best_cost_search = cost;
4947 1161860 : best_tx_depth = context_ptr->tx_depth;
4948 :
4949 1161860 : y_full_distortion[DIST_CALC_RESIDUAL] = tx_y_full_distortion[DIST_CALC_RESIDUAL];
4950 1161860 : y_full_distortion[DIST_CALC_PREDICTION] = tx_y_full_distortion[DIST_CALC_PREDICTION];
4951 1161860 : *y_coeff_bits = tx_y_coeff_bits;
4952 3102750 : for (context_ptr->txb_itr = 0; context_ptr->txb_itr < txb_count; context_ptr->txb_itr++) {
4953 1940880 : y_count_non_zero_coeffs[context_ptr->txb_itr] = tx_y_count_non_zero_coeffs[context_ptr->txb_itr];
4954 : }
4955 :
4956 : }
4957 : } // Transform Depth Loop
4958 :
4959 : // ATB Recon
4960 813674 : if (best_tx_depth == 1) {
4961 : // Copy depth 1 mode/type/eob ..
4962 348182 : memcpy(candidate_buffer->candidate_ptr, context_ptr->scratch_candidate_buffer->candidate_ptr, sizeof(ModeDecisionCandidate));
4963 :
4964 : // Copy depth 1 pred
4965 348182 : uint32_t block_index = context_ptr->blk_geom->origin_x + (context_ptr->blk_geom->origin_y * MAX_SB_SIZE);
4966 348182 : EbByte src = &(context_ptr->scratch_candidate_buffer->prediction_ptr->buffer_y[block_index]);
4967 348182 : EbByte dst = &(candidate_buffer->prediction_ptr->buffer_y[block_index]);
4968 6379340 : for (int i = 0; i < context_ptr->blk_geom->bheight; i++) {
4969 6031160 : memcpy(dst, src, context_ptr->blk_geom->bwidth);
4970 6031160 : src += context_ptr->scratch_candidate_buffer->prediction_ptr->stride_y;
4971 6031160 : dst += candidate_buffer->prediction_ptr->stride_y;
4972 : }
4973 :
4974 : // Copy depth 1 recon coeff
4975 348182 : memcpy(candidate_buffer->recon_coeff_ptr->buffer_y, context_ptr->scratch_candidate_buffer->recon_coeff_ptr->buffer_y, (context_ptr->blk_geom->bwidth * context_ptr->blk_geom->bheight << 2));
4976 : }
4977 813674 : }
4978 : #else
4979 : void perform_intra_tx_partitioning(
4980 : ModeDecisionCandidateBuffer *candidate_buffer,
4981 : ModeDecisionContext *context_ptr,
4982 : PictureControlSet *picture_control_set_ptr,
4983 : uint64_t ref_fast_cost,
4984 : uint8_t end_tx_depth,
4985 : uint32_t qp,
4986 : uint32_t *y_count_non_zero_coeffs,
4987 : uint64_t *y_coeff_bits,
4988 : uint64_t *y_full_distortion)
4989 : {
4990 : EbPictureBufferDesc *input_picture_ptr = picture_control_set_ptr->hbd_mode_decision ?
4991 : picture_control_set_ptr->input_frame16bit : picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
4992 : SequenceControlSet *sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
4993 : uint32_t tu_origin_index;
4994 : uint64_t y_full_cost;
4995 : uint64_t y_tu_coeff_bits;
4996 : uint64_t tuFullDistortion[3][DIST_CALC_TOTAL];
4997 : uint32_t txb_1d_offset;
4998 :
4999 : uint8_t best_tx_depth = 0;
5000 :
5001 : uint64_t best_cost_search = (uint64_t)~0;
5002 :
5003 : TxType best_tx_type_depth_0 = DCT_DCT; // Track the best tx type @ depth 0 to be used @ the final stage (i.e. avoid redoing the tx type search).
5004 : uint8_t tx_search_skip_flag;
5005 : if (context_ptr->md_staging_tx_search == 0)
5006 : tx_search_skip_flag = EB_TRUE;
5007 : else if (context_ptr->md_staging_tx_search == 1)
5008 : tx_search_skip_flag = picture_control_set_ptr->parent_pcs_ptr->tx_search_level == TX_SEARCH_FULL_LOOP ? get_skip_tx_search_flag(
5009 : context_ptr->blk_geom->sq_size,
5010 : ref_fast_cost,
5011 : *candidate_buffer->fast_cost_ptr,
5012 : picture_control_set_ptr->parent_pcs_ptr->tx_weight) : EB_TRUE;
5013 : else
5014 : tx_search_skip_flag = picture_control_set_ptr->parent_pcs_ptr->tx_search_level == TX_SEARCH_FULL_LOOP ? EB_FALSE : EB_TRUE;
5015 :
5016 : // Reset depth_1 neighbor arrays
5017 : if (end_tx_depth) {
5018 : if (!picture_control_set_ptr->hbd_mode_decision) {
5019 : copy_neigh_arr(
5020 : picture_control_set_ptr->md_luma_recon_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX],
5021 : picture_control_set_ptr->md_tx_depth_1_luma_recon_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX],
5022 : context_ptr->sb_origin_x + context_ptr->blk_geom->origin_x,
5023 : context_ptr->sb_origin_y + context_ptr->blk_geom->origin_y,
5024 : context_ptr->blk_geom->bwidth,
5025 : context_ptr->blk_geom->bheight,
5026 : NEIGHBOR_ARRAY_UNIT_FULL_MASK);
5027 : } else {
5028 : copy_neigh_arr(
5029 : picture_control_set_ptr->md_luma_recon_neighbor_array16bit[MD_NEIGHBOR_ARRAY_INDEX],
5030 : picture_control_set_ptr->md_tx_depth_1_luma_recon_neighbor_array16bit[MD_NEIGHBOR_ARRAY_INDEX],
5031 : context_ptr->sb_origin_x + context_ptr->blk_geom->origin_x,
5032 : context_ptr->sb_origin_y + context_ptr->blk_geom->origin_y,
5033 : context_ptr->blk_geom->bwidth,
5034 : context_ptr->blk_geom->bheight,
5035 : NEIGHBOR_ARRAY_UNIT_FULL_MASK);
5036 : }
5037 :
5038 : copy_neigh_arr(
5039 : picture_control_set_ptr->md_luma_dc_sign_level_coeff_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX],
5040 : picture_control_set_ptr->md_tx_depth_1_luma_dc_sign_level_coeff_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX],
5041 : context_ptr->sb_origin_x + context_ptr->blk_geom->origin_x,
5042 : context_ptr->sb_origin_y + context_ptr->blk_geom->origin_y,
5043 : context_ptr->blk_geom->bwidth,
5044 : context_ptr->blk_geom->bheight,
5045 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
5046 : }
5047 :
5048 : // Transform Depth Loop
5049 : for (context_ptr->tx_depth = 0; context_ptr->tx_depth <= end_tx_depth; context_ptr->tx_depth++) {
5050 : // Set recon neighbor array to be used @ intra compensation
5051 : if (!context_ptr->hbd_mode_decision) {
5052 : context_ptr->tx_search_luma_recon_neighbor_array =
5053 : (context_ptr->tx_depth) ?
5054 : picture_control_set_ptr->md_tx_depth_1_luma_recon_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX] :
5055 : picture_control_set_ptr->md_luma_recon_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
5056 : } else {
5057 : context_ptr->tx_search_luma_recon_neighbor_array16bit =
5058 : (context_ptr->tx_depth) ?
5059 : picture_control_set_ptr->md_tx_depth_1_luma_recon_neighbor_array16bit[MD_NEIGHBOR_ARRAY_INDEX] :
5060 : picture_control_set_ptr->md_luma_recon_neighbor_array16bit[MD_NEIGHBOR_ARRAY_INDEX];
5061 : }
5062 :
5063 : // Set luma dc sign level coeff
5064 : context_ptr->tx_search_luma_dc_sign_level_coeff_neighbor_array =
5065 : (context_ptr->tx_depth) ?
5066 : picture_control_set_ptr->md_tx_depth_1_luma_dc_sign_level_coeff_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX] :
5067 : picture_control_set_ptr->md_luma_dc_sign_level_coeff_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
5068 :
5069 : // Initialize TU Split
5070 : y_full_distortion[DIST_CALC_RESIDUAL] = 0;
5071 : y_full_distortion[DIST_CALC_PREDICTION] = 0;
5072 : *y_coeff_bits = 0;
5073 : txb_1d_offset = 0;
5074 : context_ptr->three_quad_energy = 0;
5075 : candidate_buffer->candidate_ptr->y_has_coeff = 0;
5076 :
5077 : uint16_t txb_count = context_ptr->blk_geom->txb_count[context_ptr->tx_depth];
5078 : for (context_ptr->txb_itr = 0; context_ptr->txb_itr < txb_count; context_ptr->txb_itr++) {
5079 : uint16_t tx_org_x = context_ptr->blk_geom->tx_org_x[context_ptr->tx_depth][context_ptr->txb_itr];
5080 : uint16_t tx_org_y = context_ptr->blk_geom->tx_org_y[context_ptr->tx_depth][context_ptr->txb_itr];
5081 :
5082 : context_ptr->luma_txb_skip_context = 0;
5083 : context_ptr->luma_dc_sign_context = 0;
5084 : get_txb_ctx(
5085 : sequence_control_set_ptr,
5086 : COMPONENT_LUMA,
5087 : context_ptr->tx_search_luma_dc_sign_level_coeff_neighbor_array,
5088 : context_ptr->sb_origin_x + tx_org_x,
5089 : context_ptr->sb_origin_y + tx_org_y,
5090 : context_ptr->blk_geom->bsize,
5091 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
5092 : &context_ptr->luma_txb_skip_context,
5093 : &context_ptr->luma_dc_sign_context);
5094 : tu_origin_index = tx_org_x + (tx_org_y * candidate_buffer->residual_ptr->stride_y);
5095 :
5096 : uint32_t input_tu_origin_index = (context_ptr->sb_origin_x + tx_org_x + input_picture_ptr->origin_x) + ((context_ptr->sb_origin_y + tx_org_y + input_picture_ptr->origin_y) * input_picture_ptr->stride_y);
5097 :
5098 : // Y Prediction
5099 : av1_intra_luma_prediction(
5100 : context_ptr,
5101 : picture_control_set_ptr,
5102 : candidate_buffer);
5103 :
5104 : // Y Residual
5105 : residual_kernel(
5106 : input_picture_ptr->buffer_y,
5107 : input_tu_origin_index,
5108 : input_picture_ptr->stride_y,
5109 : candidate_buffer->prediction_ptr->buffer_y,
5110 : tu_origin_index,
5111 : candidate_buffer->prediction_ptr->stride_y,
5112 : (int16_t*)candidate_buffer->residual_ptr->buffer_y,
5113 : tu_origin_index,
5114 : candidate_buffer->residual_ptr->stride_y,
5115 : context_ptr->hbd_mode_decision,
5116 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
5117 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr]);
5118 :
5119 : TxType best_tx_type = DCT_DCT;
5120 : if (!tx_search_skip_flag) {
5121 : TxType txk_start = DCT_DCT;
5122 : TxType txk_end = TX_TYPES;
5123 : uint64_t best_cost_tx_search = (uint64_t)~0;
5124 :
5125 : const TxSetType tx_set_type = get_ext_tx_set_type(context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr], 0, 0); // assumes INTRA
5126 :
5127 : for (int32_t tx_type = txk_start; tx_type < txk_end; ++tx_type) {
5128 : y_tu_coeff_bits = 0;
5129 :
5130 : int32_t eset = get_ext_tx_set(context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr], 0, 0); // assumes INTRA
5131 : // eset == 0 should correspond to a set with only DCT_DCT and there
5132 : // is no need to send the tx_type
5133 : if (eset <= 0) continue;
5134 : else if (av1_ext_tx_used[tx_set_type][tx_type] == 0) continue;
5135 : else if (context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr] > 32 || context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr] > 32) continue;
5136 :
5137 : int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
5138 : picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
5139 : // Y: T Q iQ
5140 : av1_estimate_transform(
5141 : &(((int16_t*)candidate_buffer->residual_ptr->buffer_y)[tu_origin_index]),
5142 : candidate_buffer->residual_ptr->stride_y,
5143 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[txb_1d_offset]),
5144 : NOT_USED_VALUE,
5145 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
5146 : &context_ptr->three_quad_energy,
5147 : context_ptr->transform_inner_array_ptr,
5148 : picture_control_set_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
5149 : tx_type,
5150 : PLANE_TYPE_Y,
5151 : DEFAULT_SHAPE);
5152 :
5153 : av1_quantize_inv_quantize(
5154 : picture_control_set_ptr,
5155 : context_ptr,
5156 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[txb_1d_offset]),
5157 : NOT_USED_VALUE,
5158 : &(((int32_t*)candidate_buffer->residual_quant_coeff_ptr->buffer_y)[txb_1d_offset]),
5159 : &(((int32_t*)candidate_buffer->recon_coeff_ptr->buffer_y)[txb_1d_offset]),
5160 : qp,
5161 : seg_qp,
5162 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
5163 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr],
5164 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
5165 : &candidate_buffer->candidate_ptr->eob[0][context_ptr->txb_itr],
5166 : &(y_count_non_zero_coeffs[context_ptr->txb_itr]),
5167 : COMPONENT_LUMA,
5168 : picture_control_set_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
5169 : tx_type,
5170 : candidate_buffer,
5171 : context_ptr->luma_txb_skip_context,
5172 : context_ptr->luma_dc_sign_context,
5173 : candidate_buffer->candidate_ptr->pred_mode,
5174 : EB_FALSE,
5175 : EB_FALSE);
5176 :
5177 : candidate_buffer->candidate_ptr->quantized_dc[0][context_ptr->txb_itr] = (((int32_t*)candidate_buffer->residual_quant_coeff_ptr->buffer_y)[txb_1d_offset]);
5178 :
5179 : uint32_t y_has_coeff = y_count_non_zero_coeffs[context_ptr->txb_itr] > 0;
5180 :
5181 : // tx_type not equal to DCT_DCT and no coeff is not an acceptable option in AV1.
5182 : if (y_has_coeff == 0 && tx_type != DCT_DCT)
5183 : continue;
5184 : if (y_has_coeff)
5185 : inv_transform_recon_wrapper(
5186 : candidate_buffer->prediction_ptr->buffer_y,
5187 : tu_origin_index,
5188 : candidate_buffer->prediction_ptr->stride_y,
5189 : candidate_buffer->recon_ptr->buffer_y,
5190 : tu_origin_index,
5191 : candidate_buffer->recon_ptr->stride_y,
5192 : (int32_t*) candidate_buffer->recon_coeff_ptr->buffer_y,
5193 : txb_1d_offset,
5194 : picture_control_set_ptr->hbd_mode_decision,
5195 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
5196 : tx_type,
5197 : PLANE_TYPE_Y,
5198 : (uint16_t)candidate_buffer->candidate_ptr->eob[0][context_ptr->txb_itr]);
5199 : else
5200 : picture_copy(
5201 : candidate_buffer->prediction_ptr,
5202 : tu_origin_index,
5203 : 0,
5204 : candidate_buffer->recon_ptr,
5205 : tu_origin_index,
5206 : 0,
5207 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
5208 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr],
5209 : 0,
5210 : 0,
5211 : PICTURE_BUFFER_DESC_Y_FLAG,
5212 : picture_control_set_ptr->hbd_mode_decision);
5213 :
5214 : EbSpatialFullDistType spatial_full_dist_type_fun = context_ptr->hbd_mode_decision ?
5215 : full_distortion_kernel16_bits : spatial_full_distortion_kernel;
5216 :
5217 : tuFullDistortion[0][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun(
5218 : input_picture_ptr->buffer_y,
5219 : input_tu_origin_index,
5220 : input_picture_ptr->stride_y,
5221 : candidate_buffer->prediction_ptr->buffer_y,
5222 : tu_origin_index,
5223 : candidate_buffer->prediction_ptr->stride_y,
5224 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
5225 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr]);
5226 :
5227 : tuFullDistortion[0][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun(
5228 : input_picture_ptr->buffer_y,
5229 : input_tu_origin_index,
5230 : input_picture_ptr->stride_y,
5231 : candidate_buffer->recon_ptr->buffer_y,
5232 : tu_origin_index,
5233 : candidate_buffer->recon_ptr->stride_y,
5234 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
5235 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr]);
5236 :
5237 : tuFullDistortion[0][DIST_CALC_PREDICTION] <<= 4;
5238 : tuFullDistortion[0][DIST_CALC_RESIDUAL] <<= 4;
5239 :
5240 : //LUMA-ONLY
5241 : av1_tu_estimate_coeff_bits(
5242 : context_ptr,
5243 : 0, //allow_update_cdf,
5244 : NULL,//FRAME_CONTEXT *ec_ctx,
5245 : picture_control_set_ptr,
5246 : candidate_buffer,
5247 : txb_1d_offset,
5248 : 0,
5249 : context_ptr->coeff_est_entropy_coder_ptr,
5250 : candidate_buffer->residual_quant_coeff_ptr,
5251 : y_count_non_zero_coeffs[context_ptr->txb_itr],
5252 : 0,
5253 : 0,
5254 : &y_tu_coeff_bits,
5255 : &y_tu_coeff_bits,
5256 : &y_tu_coeff_bits,
5257 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
5258 : context_ptr->blk_geom->txsize_uv[context_ptr->tx_depth][context_ptr->txb_itr],
5259 : tx_type,
5260 : candidate_buffer->candidate_ptr->transform_type_uv,
5261 : COMPONENT_LUMA);
5262 :
5263 : uint64_t cost = RDCOST(context_ptr->full_lambda, y_tu_coeff_bits, tuFullDistortion[0][DIST_CALC_RESIDUAL]);
5264 : if (cost < best_cost_tx_search) {
5265 : best_cost_tx_search = cost;
5266 : best_tx_type = tx_type;
5267 : }
5268 : }
5269 :
5270 : // Record the best tx type @ depth 0
5271 : best_tx_type_depth_0 = (context_ptr->tx_depth == 0) ? best_tx_type : best_tx_type_depth_0;
5272 : }
5273 : // Best Tx Type Pass
5274 : candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr] = best_tx_type;
5275 :
5276 : y_tu_coeff_bits = 0;
5277 :
5278 :
5279 : // Y: T Q iQ
5280 : av1_estimate_transform(
5281 : &(((int16_t*)candidate_buffer->residual_ptr->buffer_y)[tu_origin_index]),
5282 : candidate_buffer->residual_ptr->stride_y,
5283 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[txb_1d_offset]),
5284 : NOT_USED_VALUE,
5285 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
5286 : &context_ptr->three_quad_energy,
5287 : context_ptr->transform_inner_array_ptr,
5288 : picture_control_set_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
5289 : candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr],
5290 : PLANE_TYPE_Y,
5291 : DEFAULT_SHAPE);
5292 :
5293 : int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
5294 : picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
5295 :
5296 : candidate_buffer->candidate_ptr->quantized_dc[0][context_ptr->txb_itr] = av1_quantize_inv_quantize(
5297 : picture_control_set_ptr,
5298 : context_ptr,
5299 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[txb_1d_offset]),
5300 : NOT_USED_VALUE,
5301 : &(((int32_t*)candidate_buffer->residual_quant_coeff_ptr->buffer_y)[txb_1d_offset]),
5302 : &(((int32_t*)candidate_buffer->recon_coeff_ptr->buffer_y)[txb_1d_offset]),
5303 : qp,
5304 : seg_qp,
5305 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
5306 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr],
5307 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
5308 : &candidate_buffer->candidate_ptr->eob[0][context_ptr->txb_itr],
5309 : &(y_count_non_zero_coeffs[context_ptr->txb_itr]),
5310 : COMPONENT_LUMA,
5311 : picture_control_set_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
5312 : candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr],
5313 : candidate_buffer,
5314 : context_ptr->luma_txb_skip_context,
5315 : context_ptr->luma_dc_sign_context,
5316 : candidate_buffer->candidate_ptr->pred_mode,
5317 : EB_FALSE,
5318 : EB_FALSE);
5319 : uint32_t y_has_coeff = y_count_non_zero_coeffs[context_ptr->txb_itr] > 0;
5320 :
5321 : if (y_has_coeff)
5322 : inv_transform_recon_wrapper(
5323 : candidate_buffer->prediction_ptr->buffer_y,
5324 : tu_origin_index,
5325 : candidate_buffer->prediction_ptr->stride_y,
5326 : candidate_buffer->recon_ptr->buffer_y,
5327 : tu_origin_index,
5328 : candidate_buffer->recon_ptr->stride_y,
5329 : (int32_t*) candidate_buffer->recon_coeff_ptr->buffer_y,
5330 : txb_1d_offset,
5331 : picture_control_set_ptr->hbd_mode_decision,
5332 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
5333 : candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr],
5334 : PLANE_TYPE_Y,
5335 : (uint16_t)candidate_buffer->candidate_ptr->eob[0][context_ptr->txb_itr]);
5336 : else
5337 : picture_copy(
5338 : candidate_buffer->prediction_ptr,
5339 : tu_origin_index,
5340 : 0,
5341 : candidate_buffer->recon_ptr,
5342 : tu_origin_index,
5343 : 0,
5344 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
5345 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr],
5346 : 0,
5347 : 0,
5348 : PICTURE_BUFFER_DESC_Y_FLAG,
5349 : picture_control_set_ptr->hbd_mode_decision);
5350 :
5351 : EbSpatialFullDistType spatial_full_dist_type_fun = context_ptr->hbd_mode_decision ?
5352 : full_distortion_kernel16_bits : spatial_full_distortion_kernel;
5353 :
5354 : tuFullDistortion[0][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun(
5355 : input_picture_ptr->buffer_y,
5356 : input_tu_origin_index,
5357 : input_picture_ptr->stride_y,
5358 : candidate_buffer->prediction_ptr->buffer_y,
5359 : tu_origin_index,
5360 : candidate_buffer->prediction_ptr->stride_y,
5361 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
5362 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr]);
5363 :
5364 : tuFullDistortion[0][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun(
5365 : input_picture_ptr->buffer_y,
5366 : input_tu_origin_index,
5367 : input_picture_ptr->stride_y,
5368 : candidate_buffer->recon_ptr->buffer_y,
5369 : tu_origin_index,
5370 : candidate_buffer->recon_ptr->stride_y,
5371 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
5372 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr]);
5373 :
5374 : tuFullDistortion[0][DIST_CALC_PREDICTION] <<= 4;
5375 : tuFullDistortion[0][DIST_CALC_RESIDUAL] <<= 4;
5376 :
5377 : //LUMA-ONLY
5378 : av1_tu_estimate_coeff_bits(
5379 : context_ptr,
5380 : 0, //allow_update_cdf,
5381 : NULL,//FRAME_CONTEXT *ec_ctx,
5382 : picture_control_set_ptr,
5383 : candidate_buffer,
5384 : txb_1d_offset,
5385 : 0,
5386 : context_ptr->coeff_est_entropy_coder_ptr,
5387 : candidate_buffer->residual_quant_coeff_ptr,
5388 : y_count_non_zero_coeffs[context_ptr->txb_itr],
5389 : 0,
5390 : 0,
5391 : &y_tu_coeff_bits,
5392 : &y_tu_coeff_bits,
5393 : &y_tu_coeff_bits,
5394 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
5395 : context_ptr->blk_geom->txsize_uv[context_ptr->tx_depth][context_ptr->txb_itr],
5396 : candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr],
5397 : candidate_buffer->candidate_ptr->transform_type_uv,
5398 : COMPONENT_LUMA);
5399 :
5400 : av1_tu_calc_cost_luma(
5401 : context_ptr->luma_txb_skip_context,
5402 : candidate_buffer->candidate_ptr,
5403 : context_ptr->txb_itr,
5404 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
5405 : y_count_non_zero_coeffs[context_ptr->txb_itr],
5406 : tuFullDistortion[0],
5407 : &y_tu_coeff_bits,
5408 : &y_full_cost,
5409 : context_ptr->full_lambda);
5410 :
5411 : (*y_coeff_bits) += y_tu_coeff_bits;
5412 :
5413 : y_full_distortion[DIST_CALC_RESIDUAL] += tuFullDistortion[0][DIST_CALC_RESIDUAL];
5414 : y_full_distortion[DIST_CALC_PREDICTION] += tuFullDistortion[0][DIST_CALC_PREDICTION];
5415 :
5416 : txb_1d_offset += context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr] * context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr];
5417 :
5418 : if (context_ptr->tx_depth)
5419 : {
5420 : NeighborArrayUnit *tx_search_luma_recon =
5421 : context_ptr->hbd_mode_decision ? context_ptr->tx_search_luma_recon_neighbor_array16bit : context_ptr->tx_search_luma_recon_neighbor_array;
5422 :
5423 : tx_search_update_recon_sample_neighbor_array(
5424 : tx_search_luma_recon,
5425 : candidate_buffer->recon_ptr,
5426 : context_ptr->blk_geom->tx_org_x[context_ptr->tx_depth][context_ptr->txb_itr],
5427 : context_ptr->blk_geom->tx_org_y[context_ptr->tx_depth][context_ptr->txb_itr],
5428 : context_ptr->sb_origin_x + context_ptr->blk_geom->tx_org_x[context_ptr->tx_depth][context_ptr->txb_itr],
5429 : context_ptr->sb_origin_y + context_ptr->blk_geom->tx_org_y[context_ptr->tx_depth][context_ptr->txb_itr],
5430 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
5431 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr],
5432 : context_ptr->hbd_mode_decision);
5433 :
5434 : int8_t dc_sign_level_coeff = candidate_buffer->candidate_ptr->quantized_dc[0][context_ptr->txb_itr];
5435 : neighbor_array_unit_mode_write(
5436 : picture_control_set_ptr->md_tx_depth_1_luma_dc_sign_level_coeff_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX],
5437 : (uint8_t*)&dc_sign_level_coeff,
5438 : context_ptr->sb_origin_x + context_ptr->blk_geom->tx_org_x[context_ptr->tx_depth][context_ptr->txb_itr],
5439 : context_ptr->sb_origin_y + context_ptr->blk_geom->tx_org_y[context_ptr->tx_depth][context_ptr->txb_itr],
5440 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
5441 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr],
5442 : NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
5443 : }
5444 : } // Transform Loop
5445 :
5446 : // To do: estimate the cost of tx size = tx_size_bits
5447 : uint64_t cost = RDCOST(context_ptr->full_lambda, (*y_coeff_bits), y_full_distortion[DIST_CALC_RESIDUAL]);
5448 :
5449 : if (cost < best_cost_search) {
5450 : best_cost_search = cost;
5451 : best_tx_depth = context_ptr->tx_depth;
5452 : }
5453 : } // Transform Depth Loop
5454 :
5455 : // ATB Recon
5456 : context_ptr->tx_depth = candidate_buffer->candidate_ptr->tx_depth = best_tx_depth;
5457 :
5458 : if (context_ptr->tx_depth == 0) {
5459 : // Set recon neighbor array to be used @ intra compensation
5460 : if (context_ptr->hbd_mode_decision)
5461 : context_ptr->tx_search_luma_recon_neighbor_array16bit = picture_control_set_ptr->md_luma_recon_neighbor_array16bit[MD_NEIGHBOR_ARRAY_INDEX];
5462 : else
5463 : context_ptr->tx_search_luma_recon_neighbor_array = picture_control_set_ptr->md_luma_recon_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
5464 :
5465 : // Initialize TU Split
5466 : y_full_distortion[DIST_CALC_RESIDUAL] = 0;
5467 : y_full_distortion[DIST_CALC_PREDICTION] = 0;
5468 : *y_coeff_bits = 0;
5469 : txb_1d_offset = 0;
5470 : context_ptr->three_quad_energy = 0;
5471 : candidate_buffer->candidate_ptr->y_has_coeff = 0;
5472 :
5473 : uint16_t txb_count = context_ptr->blk_geom->txb_count[context_ptr->tx_depth];
5474 : for (context_ptr->txb_itr = 0; context_ptr->txb_itr < txb_count; context_ptr->txb_itr++) {
5475 : uint16_t tx_org_x = context_ptr->blk_geom->tx_org_x[context_ptr->tx_depth][context_ptr->txb_itr];
5476 : uint16_t tx_org_y = context_ptr->blk_geom->tx_org_y[context_ptr->tx_depth][context_ptr->txb_itr];
5477 :
5478 : context_ptr->luma_txb_skip_context = 0;
5479 : context_ptr->luma_dc_sign_context = 0;
5480 : get_txb_ctx(
5481 : sequence_control_set_ptr,
5482 : COMPONENT_LUMA,
5483 : picture_control_set_ptr->md_luma_dc_sign_level_coeff_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX],
5484 : context_ptr->sb_origin_x + tx_org_x,
5485 : context_ptr->sb_origin_y + tx_org_y,
5486 : context_ptr->blk_geom->bsize,
5487 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
5488 : &context_ptr->luma_txb_skip_context,
5489 : &context_ptr->luma_dc_sign_context);
5490 :
5491 : tu_origin_index = tx_org_x + (tx_org_y * candidate_buffer->residual_ptr->stride_y);
5492 : y_tu_coeff_bits = 0;
5493 :
5494 : uint32_t input_tu_origin_index = (context_ptr->sb_origin_x + tx_org_x + input_picture_ptr->origin_x) + ((context_ptr->sb_origin_y + tx_org_y + input_picture_ptr->origin_y) * input_picture_ptr->stride_y);
5495 :
5496 : // Y Prediction
5497 : av1_intra_luma_prediction(
5498 : context_ptr,
5499 : picture_control_set_ptr,
5500 : candidate_buffer);
5501 :
5502 : // Y Residual
5503 : residual_kernel(
5504 : input_picture_ptr->buffer_y,
5505 : input_tu_origin_index,
5506 : input_picture_ptr->stride_y,
5507 : candidate_buffer->prediction_ptr->buffer_y,
5508 : tu_origin_index,
5509 : candidate_buffer->prediction_ptr->stride_y,
5510 : (int16_t*)candidate_buffer->residual_ptr->buffer_y,
5511 : tu_origin_index,
5512 : candidate_buffer->residual_ptr->stride_y,
5513 : context_ptr->hbd_mode_decision,
5514 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
5515 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr]);
5516 :
5517 : // Get the depth 0 best tx type
5518 : candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr] = best_tx_type_depth_0;
5519 :
5520 : y_tu_coeff_bits = 0;
5521 :
5522 : // Y: T Q iQ
5523 : av1_estimate_transform(
5524 : &(((int16_t*)candidate_buffer->residual_ptr->buffer_y)[tu_origin_index]),
5525 : candidate_buffer->residual_ptr->stride_y,
5526 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[txb_1d_offset]),
5527 : NOT_USED_VALUE,
5528 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
5529 : &context_ptr->three_quad_energy,
5530 : context_ptr->transform_inner_array_ptr,
5531 : picture_control_set_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
5532 : candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr],
5533 : PLANE_TYPE_Y,
5534 : DEFAULT_SHAPE);
5535 :
5536 : int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
5537 : picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
5538 : candidate_buffer->candidate_ptr->quantized_dc[0][context_ptr->txb_itr] = av1_quantize_inv_quantize(
5539 : picture_control_set_ptr,
5540 : context_ptr,
5541 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[txb_1d_offset]),
5542 : NOT_USED_VALUE,
5543 : &(((int32_t*)candidate_buffer->residual_quant_coeff_ptr->buffer_y)[txb_1d_offset]),
5544 : &(((int32_t*)candidate_buffer->recon_coeff_ptr->buffer_y)[txb_1d_offset]),
5545 : qp,
5546 : seg_qp,
5547 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
5548 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr],
5549 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
5550 : &candidate_buffer->candidate_ptr->eob[0][context_ptr->txb_itr],
5551 : &(y_count_non_zero_coeffs[context_ptr->txb_itr]),
5552 : COMPONENT_LUMA,
5553 : picture_control_set_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
5554 : candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr],
5555 : candidate_buffer,
5556 : context_ptr->luma_txb_skip_context,
5557 : context_ptr->luma_dc_sign_context,
5558 : candidate_buffer->candidate_ptr->pred_mode,
5559 : EB_FALSE,
5560 : EB_FALSE);
5561 : uint32_t y_has_coeff = y_count_non_zero_coeffs[context_ptr->txb_itr] > 0;
5562 :
5563 : if (y_has_coeff)
5564 : inv_transform_recon_wrapper(
5565 : candidate_buffer->prediction_ptr->buffer_y,
5566 : tu_origin_index,
5567 : candidate_buffer->prediction_ptr->stride_y,
5568 : candidate_buffer->recon_ptr->buffer_y,
5569 : tu_origin_index,
5570 : candidate_buffer->recon_ptr->stride_y,
5571 : (int32_t*) candidate_buffer->recon_coeff_ptr->buffer_y,
5572 : txb_1d_offset,
5573 : picture_control_set_ptr->hbd_mode_decision,
5574 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
5575 : candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr],
5576 : PLANE_TYPE_Y,
5577 : (uint16_t)candidate_buffer->candidate_ptr->eob[0][context_ptr->txb_itr]);
5578 :
5579 : else
5580 : picture_copy(
5581 : candidate_buffer->prediction_ptr,
5582 : tu_origin_index,
5583 : 0,
5584 : candidate_buffer->recon_ptr,
5585 : tu_origin_index,
5586 : 0,
5587 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
5588 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr],
5589 : 0,
5590 : 0,
5591 : PICTURE_BUFFER_DESC_Y_FLAG,
5592 : picture_control_set_ptr->hbd_mode_decision);
5593 :
5594 : EbSpatialFullDistType spatial_full_dist_type_fun = context_ptr->hbd_mode_decision ?
5595 : full_distortion_kernel16_bits : spatial_full_distortion_kernel;
5596 :
5597 : tuFullDistortion[0][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun(
5598 : input_picture_ptr->buffer_y,
5599 : input_tu_origin_index,
5600 : input_picture_ptr->stride_y,
5601 : candidate_buffer->prediction_ptr->buffer_y,
5602 : tu_origin_index,
5603 : candidate_buffer->prediction_ptr->stride_y,
5604 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
5605 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr]);
5606 :
5607 : tuFullDistortion[0][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun(
5608 : input_picture_ptr->buffer_y,
5609 : input_tu_origin_index,
5610 : input_picture_ptr->stride_y,
5611 : candidate_buffer->recon_ptr->buffer_y,
5612 : tu_origin_index,
5613 : candidate_buffer->recon_ptr->stride_y,
5614 : context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr],
5615 : context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr]);
5616 :
5617 : tuFullDistortion[0][DIST_CALC_PREDICTION] <<= 4;
5618 : tuFullDistortion[0][DIST_CALC_RESIDUAL] <<= 4;
5619 :
5620 : //LUMA-ONLY
5621 : av1_tu_estimate_coeff_bits(
5622 : context_ptr,
5623 : 0, //allow_update_cdf,
5624 : NULL,//FRAME_CONTEXT *ec_ctx,
5625 : picture_control_set_ptr,
5626 : candidate_buffer,
5627 : txb_1d_offset,
5628 : 0,
5629 : context_ptr->coeff_est_entropy_coder_ptr,
5630 : candidate_buffer->residual_quant_coeff_ptr,
5631 : y_count_non_zero_coeffs[context_ptr->txb_itr],
5632 : 0,
5633 : 0,
5634 : &y_tu_coeff_bits,
5635 : &y_tu_coeff_bits,
5636 : &y_tu_coeff_bits,
5637 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
5638 : context_ptr->blk_geom->txsize_uv[context_ptr->tx_depth][context_ptr->txb_itr],
5639 : candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr],
5640 : candidate_buffer->candidate_ptr->transform_type_uv,
5641 : COMPONENT_LUMA);
5642 :
5643 : av1_tu_calc_cost_luma(
5644 : context_ptr->luma_txb_skip_context,
5645 : candidate_buffer->candidate_ptr,
5646 : context_ptr->txb_itr,
5647 : context_ptr->blk_geom->txsize[context_ptr->tx_depth][context_ptr->txb_itr],
5648 : y_count_non_zero_coeffs[context_ptr->txb_itr],
5649 : tuFullDistortion[0],
5650 : &y_tu_coeff_bits,
5651 : &y_full_cost,
5652 : context_ptr->full_lambda);
5653 :
5654 : (*y_coeff_bits) += y_tu_coeff_bits;
5655 :
5656 : y_full_distortion[DIST_CALC_RESIDUAL] += tuFullDistortion[0][DIST_CALC_RESIDUAL];
5657 : y_full_distortion[DIST_CALC_PREDICTION] += tuFullDistortion[0][DIST_CALC_PREDICTION];
5658 :
5659 : txb_1d_offset += context_ptr->blk_geom->tx_width[context_ptr->tx_depth][context_ptr->txb_itr] * context_ptr->blk_geom->tx_height[context_ptr->tx_depth][context_ptr->txb_itr];
5660 : } // Transform Loop
5661 : }
5662 : }
5663 : #endif
5664 :
5665 :
5666 37535300 : void full_loop_core(
5667 : PictureControlSet *picture_control_set_ptr,
5668 : LargestCodingUnit *sb_ptr,
5669 : CodingUnit *cu_ptr,
5670 : ModeDecisionContext *context_ptr,
5671 : ModeDecisionCandidateBuffer *candidate_buffer,
5672 : ModeDecisionCandidate *candidate_ptr,
5673 : EbPictureBufferDesc *input_picture_ptr,
5674 : uint32_t inputOriginIndex,
5675 : uint32_t inputCbOriginIndex,
5676 : uint32_t cuOriginIndex,
5677 : uint32_t cuChromaOriginIndex,
5678 : uint64_t ref_fast_cost)
5679 : {
5680 : uint64_t y_full_distortion[DIST_CALC_TOTAL];
5681 : uint32_t count_non_zero_coeffs[3][MAX_NUM_OF_TU_PER_CU];
5682 :
5683 : uint64_t cbFullDistortion[DIST_CALC_TOTAL];
5684 : uint64_t crFullDistortion[DIST_CALC_TOTAL];
5685 :
5686 : uint64_t y_coeff_bits;
5687 37535300 : uint64_t cb_coeff_bits = 0;
5688 37535300 : uint64_t cr_coeff_bits = 0;
5689 :
5690 : // initialize TU Split
5691 37535300 : y_full_distortion[DIST_CALC_RESIDUAL] = 0;
5692 37535300 : y_full_distortion[DIST_CALC_PREDICTION] = 0;
5693 37535300 : y_coeff_bits = 0;
5694 :
5695 37535300 : candidate_ptr->full_distortion = 0;
5696 :
5697 37535300 : memset(candidate_ptr->eob[0], 0, sizeof(uint16_t));
5698 37535300 : memset(candidate_ptr->eob[1], 0, sizeof(uint16_t));
5699 37535300 : memset(candidate_ptr->eob[2], 0, sizeof(uint16_t));
5700 :
5701 37535300 : candidate_ptr->chroma_distortion = 0;
5702 37535300 : candidate_ptr->chroma_distortion_inter_depth = 0;
5703 : // Set Skip Flag
5704 37535300 : candidate_ptr->skip_flag = EB_FALSE;
5705 :
5706 37535300 : if (candidate_ptr->type != INTRA_MODE) {
5707 : #if REMOVE_MD_STAGE_1
5708 31690800 : if (context_ptr->md_staging_skip_full_pred == EB_FALSE) {
5709 : #else
5710 : if (picture_control_set_ptr->parent_pcs_ptr->interpolation_search_level > IT_SEARCH_OFF)
5711 : if (picture_control_set_ptr->parent_pcs_ptr->interpolation_search_level == IT_SEARCH_FULL_LOOP || context_ptr->md_staging_skip_full_pred == EB_FALSE) {
5712 : context_ptr->md_staging_skip_interpolation_search = EB_FALSE;
5713 : context_ptr->md_staging_skip_inter_chroma_pred = EB_FALSE;
5714 : #endif
5715 31588200 : ProductPredictionFunTable[candidate_ptr->type](
5716 : context_ptr,
5717 : picture_control_set_ptr,
5718 : candidate_buffer);
5719 : }
5720 : }
5721 :
5722 : // Initialize luma CBF
5723 37538800 : candidate_ptr->y_has_coeff = 0;
5724 37538800 : candidate_ptr->u_has_coeff = 0;
5725 37538800 : candidate_ptr->v_has_coeff = 0;
5726 :
5727 : // Initialize tx type
5728 37538800 : candidate_ptr->transform_type[0] = DCT_DCT;
5729 37538800 : candidate_ptr->transform_type[1] = DCT_DCT;
5730 37538800 : candidate_ptr->transform_type[2] = DCT_DCT;
5731 37538800 : candidate_ptr->transform_type[3] = DCT_DCT;
5732 :
5733 37538800 : uint8_t end_tx_depth = 0;
5734 : // end_tx_depth set to zero for blocks which go beyond the picture boundaries
5735 37538800 : if ((context_ptr->sb_origin_x + context_ptr->blk_geom->origin_x + context_ptr->blk_geom->bwidth < picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.max_frame_width &&
5736 36580100 : context_ptr->sb_origin_y + context_ptr->blk_geom->origin_y + context_ptr->blk_geom->bheight < picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.max_frame_height))
5737 35503400 : end_tx_depth = get_end_tx_depth(context_ptr->blk_geom->bsize, candidate_buffer->candidate_ptr->type);
5738 : else
5739 2035420 : end_tx_depth = 0;
5740 : // Transform partitioning path (INTRA Luma)
5741 : #if ENHANCE_ATB
5742 38345000 : if (picture_control_set_ptr->parent_pcs_ptr->atb_mode && context_ptr->md_staging_skip_atb == EB_FALSE && end_tx_depth && candidate_buffer->candidate_ptr->use_intrabc == 0) {
5743 813707 : int32_t is_inter = (candidate_buffer->candidate_ptr->type == INTER_MODE || candidate_buffer->candidate_ptr->use_intrabc) ? EB_TRUE : EB_FALSE;
5744 :
5745 : //Y Residual: residual for INTRA is computed inside the TU loop
5746 813707 : if (is_inter)
5747 : //Y Residual
5748 385319 : residual_kernel(
5749 : input_picture_ptr->buffer_y,
5750 : inputOriginIndex,
5751 385319 : input_picture_ptr->stride_y,
5752 385319 : candidate_buffer->prediction_ptr->buffer_y,
5753 : cuOriginIndex,
5754 385319 : candidate_buffer->prediction_ptr->stride_y,
5755 385319 : (int16_t*)candidate_buffer->residual_ptr->buffer_y,
5756 : cuOriginIndex,
5757 385319 : candidate_buffer->residual_ptr->stride_y,
5758 385319 : context_ptr->hbd_mode_decision,
5759 385319 : context_ptr->blk_geom->bwidth,
5760 385319 : context_ptr->blk_geom->bheight);
5761 :
5762 813706 : tx_partitioning_path(
5763 : #else
5764 : if (picture_control_set_ptr->parent_pcs_ptr->atb_mode && context_ptr->md_staging_skip_atb == EB_FALSE && end_tx_depth && candidate_buffer->candidate_ptr->type == INTRA_MODE && candidate_buffer->candidate_ptr->use_intrabc == 0) {
5765 : perform_intra_tx_partitioning(
5766 : #endif
5767 : candidate_buffer,
5768 : context_ptr,
5769 : picture_control_set_ptr,
5770 : ref_fast_cost,
5771 : end_tx_depth,
5772 813706 : context_ptr->cu_ptr->qp,
5773 : &(*count_non_zero_coeffs[0]),
5774 : &y_coeff_bits,
5775 : &y_full_distortion[0]);
5776 : }
5777 : else {
5778 : // Transform partitioning free patch (except the 128x128 case)
5779 :
5780 : //Y Residual
5781 36717600 : residual_kernel(
5782 : input_picture_ptr->buffer_y,
5783 : inputOriginIndex,
5784 36717600 : input_picture_ptr->stride_y,
5785 36717600 : candidate_buffer->prediction_ptr->buffer_y,
5786 : cuOriginIndex,
5787 36717600 : candidate_buffer->prediction_ptr->stride_y,
5788 36717600 : (int16_t*)candidate_buffer->residual_ptr->buffer_y,
5789 : cuOriginIndex,
5790 36717600 : candidate_buffer->residual_ptr->stride_y,
5791 36717600 : context_ptr->hbd_mode_decision,
5792 36717600 : context_ptr->blk_geom->bwidth,
5793 36717600 : context_ptr->blk_geom->bheight);
5794 :
5795 : // Transform partitioning free path
5796 : uint8_t tx_search_skip_flag;
5797 36722900 : if (context_ptr->md_staging_tx_search == 0)
5798 33617700 : tx_search_skip_flag = EB_TRUE;
5799 3105160 : else if (context_ptr->md_staging_tx_search == 1)
5800 5349920 : tx_search_skip_flag = picture_control_set_ptr->parent_pcs_ptr->tx_search_level == TX_SEARCH_FULL_LOOP ? get_skip_tx_search_flag(
5801 2622770 : context_ptr->blk_geom->sq_size,
5802 : ref_fast_cost,
5803 2622770 : *candidate_buffer->fast_cost_ptr,
5804 2622770 : picture_control_set_ptr->parent_pcs_ptr->tx_weight) : EB_TRUE;
5805 : else
5806 377991 : tx_search_skip_flag = picture_control_set_ptr->parent_pcs_ptr->tx_search_level == TX_SEARCH_FULL_LOOP ? EB_FALSE : EB_TRUE;
5807 :
5808 36722900 : if (!tx_search_skip_flag) {
5809 1760940 : product_full_loop_tx_search(
5810 : candidate_buffer,
5811 : context_ptr,
5812 : picture_control_set_ptr);
5813 :
5814 1760920 : candidate_ptr->full_distortion = 0;
5815 :
5816 1760920 : memset(candidate_ptr->eob[0], 0, sizeof(uint16_t));
5817 :
5818 : //re-init
5819 1760920 : candidate_ptr->y_has_coeff = 0;
5820 : }
5821 : #if ENHANCE_ATB
5822 36722800 : context_ptr->tx_depth = candidate_buffer->candidate_ptr->tx_depth;
5823 36722800 : context_ptr->full_loop_luma_dc_sign_level_coeff_neighbor_array = context_ptr->luma_dc_sign_level_coeff_neighbor_array;
5824 36722800 : context_ptr->txb_1d_offset = 0;
5825 73443100 : for (context_ptr->txb_itr = 0; context_ptr->txb_itr < context_ptr->blk_geom->txb_count[context_ptr->tx_depth]; context_ptr->txb_itr++)
5826 : #endif
5827 36726100 : product_full_loop(
5828 : candidate_buffer,
5829 : context_ptr,
5830 : picture_control_set_ptr,
5831 : input_picture_ptr,
5832 36726100 : context_ptr->cu_ptr->qp,
5833 : &(*count_non_zero_coeffs[0]),
5834 : &y_coeff_bits,
5835 : &y_full_distortion[0]);
5836 : }
5837 :
5838 37530800 : candidate_ptr->chroma_distortion_inter_depth = 0;
5839 37530800 : candidate_ptr->chroma_distortion = 0;
5840 :
5841 : //CHROMA
5842 :
5843 37530800 : cbFullDistortion[DIST_CALC_RESIDUAL] = 0;
5844 37530800 : crFullDistortion[DIST_CALC_RESIDUAL] = 0;
5845 37530800 : cbFullDistortion[DIST_CALC_PREDICTION] = 0;
5846 37530800 : crFullDistortion[DIST_CALC_PREDICTION] = 0;
5847 :
5848 37530800 : cb_coeff_bits = 0;
5849 37530800 : cr_coeff_bits = 0;
5850 :
5851 : // FullLoop and TU search
5852 37530800 : uint16_t cb_qp = context_ptr->qp;
5853 37530800 : uint16_t cr_qp = context_ptr->qp;
5854 37530800 : if (context_ptr->md_staging_skip_full_chroma == EB_FALSE) {
5855 :
5856 3925010 : if (context_ptr->blk_geom->has_uv && context_ptr->chroma_level <= CHROMA_MODE_1) {
5857 : //Cb Residual
5858 3169220 : residual_kernel(
5859 : input_picture_ptr->buffer_cb,
5860 : inputCbOriginIndex,
5861 3169220 : input_picture_ptr->stride_cb,
5862 3169220 : candidate_buffer->prediction_ptr->buffer_cb,
5863 : cuChromaOriginIndex,
5864 3169220 : candidate_buffer->prediction_ptr->stride_cb,
5865 3169220 : (int16_t*)candidate_buffer->residual_ptr->buffer_cb,
5866 : cuChromaOriginIndex,
5867 3169220 : candidate_buffer->residual_ptr->stride_cb,
5868 3169220 : context_ptr->hbd_mode_decision,
5869 3169220 : context_ptr->blk_geom->bwidth_uv,
5870 3169220 : context_ptr->blk_geom->bheight_uv);
5871 :
5872 : //Cr Residual
5873 3169260 : residual_kernel(
5874 : input_picture_ptr->buffer_cr,
5875 : inputCbOriginIndex,
5876 3169260 : input_picture_ptr->stride_cr,
5877 3169260 : candidate_buffer->prediction_ptr->buffer_cr,
5878 : cuChromaOriginIndex,
5879 3169260 : candidate_buffer->prediction_ptr->stride_cr,
5880 3169260 : (int16_t*)candidate_buffer->residual_ptr->buffer_cr,
5881 : cuChromaOriginIndex,
5882 3169260 : candidate_buffer->residual_ptr->stride_cr,
5883 3169260 : context_ptr->hbd_mode_decision,
5884 3169260 : context_ptr->blk_geom->bwidth_uv,
5885 3169260 : context_ptr->blk_geom->bheight_uv);
5886 : }
5887 :
5888 3925090 : if (candidate_ptr->type == INTRA_MODE && candidate_buffer->candidate_ptr->intra_chroma_mode == UV_CFL_PRED) {
5889 : // If mode is CFL:
5890 : // 1: recon the Luma
5891 : // 2: Form the pred_buf_q3
5892 : // 3: Loop over alphas and find the best or choose DC
5893 : // 4: Recalculate the residual for chroma
5894 749216 : CflPrediction(
5895 : picture_control_set_ptr,
5896 : candidate_buffer,
5897 : sb_ptr,
5898 : context_ptr,
5899 : input_picture_ptr,
5900 : inputCbOriginIndex,
5901 : cuChromaOriginIndex);
5902 : }
5903 3925120 : if (context_ptr->blk_geom->has_uv && context_ptr->chroma_level <= CHROMA_MODE_1) {
5904 3169240 : full_loop_r(
5905 : sb_ptr,
5906 : candidate_buffer,
5907 : context_ptr,
5908 : input_picture_ptr,
5909 : picture_control_set_ptr,
5910 : PICTURE_BUFFER_DESC_CHROMA_MASK,
5911 : cb_qp,
5912 : cr_qp,
5913 : &(*count_non_zero_coeffs[1]),
5914 : &(*count_non_zero_coeffs[2]));
5915 :
5916 3169250 : cu_full_distortion_fast_tu_mode_r(
5917 : sb_ptr,
5918 : candidate_buffer,
5919 : context_ptr,
5920 : candidate_ptr,
5921 : picture_control_set_ptr,
5922 : input_picture_ptr,
5923 : cbFullDistortion,
5924 : crFullDistortion,
5925 : count_non_zero_coeffs,
5926 : COMPONENT_CHROMA,
5927 : &cb_coeff_bits,
5928 : &cr_coeff_bits,
5929 : 1);
5930 : }
5931 :
5932 : // Check independant chroma vs. cfl
5933 3925210 : if (context_ptr->blk_geom->has_uv && context_ptr->chroma_level == CHROMA_MODE_0) {
5934 1017200 : if (candidate_buffer->candidate_ptr->type == INTRA_MODE && (candidate_buffer->candidate_ptr->intra_chroma_mode == UV_CFL_PRED || candidate_buffer->candidate_ptr->intra_chroma_mode == UV_DC_PRED)) {
5935 522352 : check_best_indepedant_cfl(
5936 : picture_control_set_ptr,
5937 : input_picture_ptr,
5938 : context_ptr,
5939 : inputCbOriginIndex,
5940 : cuChromaOriginIndex,
5941 : candidate_buffer,
5942 522352 : (uint8_t)cb_qp,
5943 522352 : (uint8_t)cr_qp,
5944 : cbFullDistortion,
5945 : crFullDistortion,
5946 : &cb_coeff_bits,
5947 : &cr_coeff_bits);
5948 : }
5949 : }
5950 : }
5951 :
5952 37531000 : candidate_ptr->block_has_coeff = (candidate_ptr->y_has_coeff | candidate_ptr->u_has_coeff | candidate_ptr->v_has_coeff) ? EB_TRUE : EB_FALSE;
5953 :
5954 : //ALL PLANE
5955 37531000 : Av1ProductFullCostFuncTable[candidate_ptr->type](
5956 : picture_control_set_ptr,
5957 : context_ptr,
5958 : candidate_buffer,
5959 : cu_ptr,
5960 : y_full_distortion,
5961 : cbFullDistortion,
5962 : crFullDistortion,
5963 37531000 : context_ptr->full_lambda,
5964 : &y_coeff_bits,
5965 : &cb_coeff_bits,
5966 : &cr_coeff_bits,
5967 37531000 : context_ptr->blk_geom->bsize);
5968 :
5969 37543600 : candidate_buffer->cb_distortion[DIST_CALC_RESIDUAL] = cbFullDistortion[DIST_CALC_RESIDUAL];
5970 37543600 : candidate_buffer->cb_distortion[DIST_CALC_PREDICTION] = cbFullDistortion[DIST_CALC_PREDICTION];
5971 37543600 : candidate_buffer->cb_coeff_bits = cb_coeff_bits;
5972 :
5973 37543600 : candidate_buffer->cr_distortion[DIST_CALC_RESIDUAL] = crFullDistortion[DIST_CALC_RESIDUAL];
5974 37543600 : candidate_buffer->cr_distortion[DIST_CALC_PREDICTION] = crFullDistortion[DIST_CALC_PREDICTION];
5975 37543600 : candidate_buffer->cr_coeff_bits = cr_coeff_bits;
5976 37543600 : candidate_buffer->candidate_ptr->full_distortion = (uint32_t)(y_full_distortion[0]);
5977 :
5978 37543600 : candidate_buffer->y_coeff_bits = y_coeff_bits;
5979 37543600 : candidate_ptr->full_distortion = (uint32_t)(y_full_distortion[0]);
5980 37543600 : }
5981 : #if REMOVE_MD_STAGE_1
5982 3160720 : void md_stage_1(
5983 : #else
5984 : void md_stage_2(
5985 : #endif
5986 : PictureControlSet *picture_control_set_ptr,
5987 : LargestCodingUnit *sb_ptr,
5988 : CodingUnit *cu_ptr,
5989 : ModeDecisionContext *context_ptr,
5990 : EbPictureBufferDesc *input_picture_ptr,
5991 : uint32_t inputOriginIndex,
5992 : uint32_t inputCbOriginIndex,
5993 : uint32_t cuOriginIndex,
5994 : uint32_t cuChromaOriginIndex,
5995 : uint64_t ref_fast_cost)
5996 : {
5997 3160720 : ModeDecisionCandidateBuffer **candidate_buffer_ptr_array_base = context_ptr->candidate_buffer_ptr_array;
5998 3160720 : ModeDecisionCandidateBuffer **candidate_buffer_ptr_array = &(candidate_buffer_ptr_array_base[0]);
5999 : ModeDecisionCandidateBuffer *candidate_buffer;
6000 : ModeDecisionCandidate *candidate_ptr;
6001 :
6002 : uint32_t fullLoopCandidateIndex;
6003 : uint32_t candidateIndex;
6004 :
6005 : // Set MD Staging full_loop_core settings
6006 : #if !REMOVE_MD_STAGE_1
6007 : context_ptr->md_staging_skip_full_pred = EB_TRUE;
6008 : #endif
6009 3160720 : context_ptr->md_staging_skip_atb = EB_TRUE;
6010 3160720 : context_ptr->md_staging_tx_search = 0;
6011 : #if FILTER_INTRA_FLAG
6012 : #if REMOVE_MD_STAGE_1
6013 3160720 : context_ptr->md_staging_skip_full_chroma = EB_TRUE;
6014 : #else
6015 : context_ptr->md_staging_skip_full_chroma = context_ptr->target_class == CAND_CLASS_0 || context_ptr->target_class == CAND_CLASS_6 || context_ptr->md_staging_mode == MD_STAGING_MODE_3;
6016 : #endif
6017 : #else
6018 : context_ptr->md_staging_skip_full_chroma = context_ptr->target_class == CAND_CLASS_0 || context_ptr->md_staging_mode == MD_STAGING_MODE_3;
6019 : #endif
6020 :
6021 : #if REMOVE_MD_STAGE_1
6022 3160720 : context_ptr->md_staging_skip_rdoq = EB_TRUE;
6023 36782200 : for (fullLoopCandidateIndex = 0; fullLoopCandidateIndex < context_ptr->md_stage_1_count[context_ptr->target_class]; ++fullLoopCandidateIndex) {
6024 : #else
6025 : context_ptr->md_staging_skip_rdoq = (context_ptr->md_staging_mode == MD_STAGING_MODE_2 || context_ptr->md_staging_mode == MD_STAGING_MODE_3);
6026 : for (fullLoopCandidateIndex = 0; fullLoopCandidateIndex < context_ptr->md_stage_2_count[context_ptr->target_class]; ++fullLoopCandidateIndex) {
6027 : #endif
6028 33622900 : candidateIndex = context_ptr->cand_buff_indices[context_ptr->target_class][fullLoopCandidateIndex];
6029 33622900 : candidate_buffer = candidate_buffer_ptr_array[candidateIndex];
6030 33622900 : candidate_ptr = candidate_buffer->candidate_ptr;
6031 :
6032 : #if REMOVE_MD_STAGE_1
6033 33622900 : context_ptr->md_staging_skip_full_pred = EB_FALSE;
6034 33622900 : context_ptr->md_staging_skip_interpolation_search = EB_FALSE;
6035 33622900 : context_ptr->md_staging_skip_inter_chroma_pred = EB_TRUE;
6036 33622900 : candidate_buffer->candidate_ptr->interp_filters = 0;
6037 : #endif
6038 33622900 : full_loop_core(
6039 : picture_control_set_ptr,
6040 : sb_ptr,
6041 : cu_ptr,
6042 : context_ptr,
6043 : candidate_buffer,
6044 : candidate_ptr,
6045 : input_picture_ptr,
6046 : inputOriginIndex,
6047 : inputCbOriginIndex,
6048 : cuOriginIndex,
6049 : cuChromaOriginIndex,
6050 : ref_fast_cost);
6051 : }
6052 3159220 : }
6053 : #if REMOVE_MD_STAGE_1
6054 811357 : void md_stage_2(
6055 : #else
6056 : void md_stage_3(
6057 : #endif
6058 : PictureControlSet *picture_control_set_ptr,
6059 : LargestCodingUnit *sb_ptr,
6060 : CodingUnit *cu_ptr,
6061 : ModeDecisionContext *context_ptr,
6062 : EbPictureBufferDesc *input_picture_ptr,
6063 : uint32_t inputOriginIndex,
6064 : uint32_t inputCbOriginIndex,
6065 : uint32_t cuOriginIndex,
6066 : uint32_t cuChromaOriginIndex,
6067 : uint32_t fullCandidateTotalCount,
6068 : uint64_t ref_fast_cost)
6069 : {
6070 811357 : ModeDecisionCandidateBuffer **candidate_buffer_ptr_array_base = context_ptr->candidate_buffer_ptr_array;
6071 811357 : ModeDecisionCandidateBuffer **candidate_buffer_ptr_array = &(candidate_buffer_ptr_array_base[0]);
6072 : ModeDecisionCandidateBuffer *candidate_buffer;
6073 : ModeDecisionCandidate *candidate_ptr;
6074 :
6075 811357 : uint32_t best_inter_luma_zero_coeff = 1;
6076 811357 : uint64_t best_full_cost = 0xFFFFFFFFull;
6077 : uint32_t fullLoopCandidateIndex;
6078 : uint32_t candidateIndex;
6079 :
6080 4736450 : for (fullLoopCandidateIndex = 0; fullLoopCandidateIndex < fullCandidateTotalCount; ++fullLoopCandidateIndex) {
6081 :
6082 3984600 : candidateIndex = (context_ptr->full_loop_escape == 2) ? context_ptr->sorted_candidate_index_array[fullLoopCandidateIndex] : context_ptr->best_candidate_index_array[fullLoopCandidateIndex];
6083 3984600 : candidate_buffer = candidate_buffer_ptr_array[candidateIndex];
6084 3984600 : candidate_ptr = candidate_buffer->candidate_ptr;
6085 :
6086 : // Set MD Staging full_loop_core settings
6087 : #if REMOVE_MD_STAGE_1
6088 3984600 : context_ptr->md_staging_skip_full_pred = (context_ptr->md_staging_mode == MD_STAGING_MODE_0 && picture_control_set_ptr->parent_pcs_ptr->interpolation_search_level != IT_SEARCH_FULL_LOOP);
6089 3984600 : context_ptr->md_staging_skip_interpolation_search = (context_ptr->md_staging_mode == MD_STAGING_MODE_1 || picture_control_set_ptr->parent_pcs_ptr->interpolation_search_level != IT_SEARCH_FULL_LOOP);
6090 3984600 : context_ptr->md_staging_skip_inter_chroma_pred = EB_FALSE;
6091 : #else
6092 : context_ptr->md_staging_skip_full_pred = (context_ptr->md_staging_mode == MD_STAGING_MODE_3) ? EB_FALSE: EB_TRUE;
6093 : #endif
6094 3984600 : context_ptr->md_staging_skip_atb = context_ptr->coeff_based_skip_atb;
6095 : #if FILTER_INTRA_FLAG
6096 : #if PAL_CLASS
6097 3984600 : context_ptr->md_staging_tx_search =
6098 3984600 : (candidate_ptr->cand_class == CAND_CLASS_0 || candidate_ptr->cand_class == CAND_CLASS_6 || candidate_ptr->cand_class == CAND_CLASS_7)
6099 : ? 2 : 1;
6100 : #else
6101 : context_ptr->md_staging_tx_search = (candidate_ptr->cand_class == CAND_CLASS_0 || candidate_ptr->cand_class == CAND_CLASS_6)? 2 : 1;
6102 : #endif
6103 : #else
6104 : context_ptr->md_staging_tx_search = candidate_ptr->cand_class == CAND_CLASS_0 ? 2 : 1;
6105 : #endif
6106 3984600 : context_ptr->md_staging_skip_full_chroma = EB_FALSE;
6107 3984600 : context_ptr->md_staging_skip_rdoq = EB_FALSE;
6108 :
6109 3984600 : if (picture_control_set_ptr->slice_type != I_SLICE) {
6110 3350080 : if ((candidate_ptr->type == INTRA_MODE || context_ptr->full_loop_escape == 2) && best_inter_luma_zero_coeff == 0) {
6111 : #if REMOVE_MD_STAGE_1
6112 59633 : context_ptr->md_stage_2_total_count = fullLoopCandidateIndex;
6113 : #else
6114 : context_ptr->md_stage_3_total_count = fullLoopCandidateIndex;
6115 : #endif
6116 59633 : return;
6117 : }
6118 : }
6119 :
6120 3924960 : full_loop_core(
6121 : picture_control_set_ptr,
6122 : sb_ptr,
6123 : cu_ptr,
6124 : context_ptr,
6125 : candidate_buffer,
6126 : candidate_ptr,
6127 : input_picture_ptr,
6128 : inputOriginIndex,
6129 : inputCbOriginIndex,
6130 : cuOriginIndex,
6131 : cuChromaOriginIndex,
6132 : ref_fast_cost);
6133 :
6134 3925090 : if (context_ptr->full_loop_escape)
6135 : {
6136 149237 : if (picture_control_set_ptr->slice_type != I_SLICE) {
6137 114803 : if (candidate_ptr->type == INTER_MODE) {
6138 104369 : if (*candidate_buffer->full_cost_ptr < best_full_cost) {
6139 77057 : best_inter_luma_zero_coeff = candidate_ptr->y_has_coeff;
6140 77057 : best_full_cost = *candidate_buffer->full_cost_ptr;
6141 : }
6142 : }
6143 : }
6144 : }
6145 : }
6146 : }
6147 :
6148 33840 : void move_cu_data(
6149 : #if PAL_SUP
6150 : PictureControlSet* pcs,
6151 : EncDecContext *context_ptr,
6152 : #endif
6153 : CodingUnit *src_cu,
6154 : CodingUnit *dst_cu)
6155 : {
6156 : #if PAL_SUP
6157 33840 : memcpy(&dst_cu->palette_info.pmi, &src_cu->palette_info.pmi, sizeof(PaletteModeInfo));
6158 33840 : if (svt_av1_allow_palette(pcs->parent_pcs_ptr->palette_mode, context_ptr->blk_geom->bsize)){
6159 0 : dst_cu->palette_info.color_idx_map = (uint8_t *)malloc(MAX_PALETTE_SQUARE);
6160 0 : assert(dst_cu->palette_info.color_idx_map != NULL && "palette:Not-Enough-Memory");
6161 0 : if(dst_cu->palette_info.color_idx_map != NULL)
6162 0 : memcpy(dst_cu->palette_info.color_idx_map, src_cu->palette_info.color_idx_map, MAX_PALETTE_SQUARE);
6163 : else
6164 0 : printf("ERROR palette:Not-Enough-Memory\n");
6165 : }
6166 : #endif
6167 : #if OBMC_FLAG
6168 33839 : dst_cu->interp_filters = src_cu->interp_filters;
6169 : #endif
6170 33839 : dst_cu->interinter_comp.type = src_cu->interinter_comp.type;
6171 33839 : dst_cu->interinter_comp.mask_type = src_cu->interinter_comp.mask_type;
6172 33839 : dst_cu->interinter_comp.wedge_index = src_cu->interinter_comp.wedge_index;
6173 33839 : dst_cu->interinter_comp.wedge_sign = src_cu->interinter_comp.wedge_sign;
6174 33839 : dst_cu->compound_idx = src_cu->compound_idx;
6175 33839 : dst_cu->comp_group_idx = src_cu->comp_group_idx;
6176 :
6177 : #if II_COMP_FLAG
6178 33839 : dst_cu->is_interintra_used = src_cu->is_interintra_used ;
6179 33839 : dst_cu->interintra_mode = src_cu->interintra_mode ;
6180 33839 : dst_cu->use_wedge_interintra = src_cu->use_wedge_interintra ;
6181 33839 : dst_cu->interintra_wedge_index = src_cu->interintra_wedge_index ;//inter_intra wedge index
6182 33839 : dst_cu->ii_wedge_sign = src_cu->ii_wedge_sign ;//inter_intra wedge sign=-1
6183 : #endif
6184 : //CHKN TransformUnit transform_unit_array[TRANSFORM_UNIT_MAX_COUNT]; // 2-bytes * 21 = 42-bytes
6185 33839 : memcpy(dst_cu->transform_unit_array, src_cu->transform_unit_array, TRANSFORM_UNIT_MAX_COUNT * sizeof(TransformUnit));
6186 :
6187 : //CHKN PredictionUnit prediction_unit_array[MAX_NUM_OF_PU_PER_CU]; // 35-bytes * 4 = 140 bytes
6188 33839 : memcpy(dst_cu->prediction_unit_array, src_cu->prediction_unit_array, MAX_NUM_OF_PU_PER_CU * sizeof(PredictionUnit));
6189 :
6190 : //CHKN unsigned skip_flag_context : 2;
6191 : //CHKN unsigned prediction_mode_flag : 2;
6192 : //CHKN unsigned rootCbf : 1;
6193 : //CHKN unsigned split_flag_context : 2;
6194 : //CHKN #if !ADD_DELTA_QP_SUPPORT
6195 : //CHKN unsigned qp : 6;
6196 : //CHKN unsigned ref_qp : 6;
6197 : //CHKN
6198 : //CHKN signed delta_qp : 8; // can be signed 8bits
6199 : //CHKN signed org_delta_qp : 8;
6200 : //CHKN #endif
6201 : //CHKN
6202 : //CHKN #if ADD_DELTA_QP_SUPPORT
6203 : //CHKN uint16_t qp;
6204 : //CHKN uint16_t ref_qp;
6205 : //CHKN
6206 : //CHKN int16_t delta_qp; // can be signed 8bits
6207 : //CHKN int16_t org_delta_qp;
6208 : //CHKN #endif
6209 :
6210 33839 : dst_cu->skip_flag_context = src_cu->skip_flag_context;
6211 33839 : dst_cu->prediction_mode_flag = src_cu->prediction_mode_flag;
6212 33839 : dst_cu->block_has_coeff = src_cu->block_has_coeff;
6213 33839 : dst_cu->split_flag_context = src_cu->split_flag_context;
6214 33839 : dst_cu->qp = src_cu->qp;
6215 33839 : dst_cu->delta_qp = src_cu->delta_qp;
6216 :
6217 33839 : dst_cu->tx_depth = src_cu->tx_depth;
6218 :
6219 : //CHKN // Coded Tree
6220 : //CHKN struct {
6221 : //CHKN unsigned leaf_index : 8;
6222 : //CHKN unsigned split_flag : 1;
6223 : //CHKN unsigned skip_flag : 1;
6224 : //CHKN
6225 : //CHKN };
6226 :
6227 33839 : dst_cu->leaf_index = src_cu->leaf_index;
6228 33839 : dst_cu->split_flag = src_cu->split_flag;
6229 33839 : dst_cu->skip_flag = src_cu->skip_flag;
6230 :
6231 : //CHKN MacroBlockD* av1xd;
6232 33839 : memcpy(dst_cu->av1xd, src_cu->av1xd, sizeof(MacroBlockD));
6233 :
6234 : // uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
6235 :
6236 : //CHKN int16_t inter_mode_ctx[MODE_CTX_REF_FRAMES];
6237 33839 : memcpy(dst_cu->inter_mode_ctx, src_cu->inter_mode_ctx, MODE_CTX_REF_FRAMES * sizeof(int16_t));
6238 :
6239 : //CHKN IntMv ref_mvs[MODE_CTX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; //used only for nonCompound modes.
6240 33839 : memcpy(dst_cu->ref_mvs, src_cu->ref_mvs, MODE_CTX_REF_FRAMES*MAX_MV_REF_CANDIDATES * sizeof(IntMv));
6241 :
6242 : //CHKN uint8_t drl_index;
6243 : //CHKN PredictionMode pred_mode;
6244 33839 : dst_cu->drl_index = src_cu->drl_index;
6245 33839 : dst_cu->pred_mode = src_cu->pred_mode;
6246 :
6247 : //CHKN IntMv predmv[2];
6248 :
6249 33839 : memcpy(dst_cu->predmv, src_cu->predmv, 2 * sizeof(IntMv));
6250 : //CHKN uint8_t skip_coeff_context;
6251 : //CHKN int16_t luma_txb_skip_context;
6252 : //CHKN int16_t luma_dc_sign_context;
6253 : //CHKN int16_t cb_txb_skip_context;
6254 : //CHKN int16_t cb_dc_sign_context;
6255 : //CHKN int16_t cr_txb_skip_context;
6256 : //CHKN int16_t cr_dc_sign_context;
6257 : //CHKN uint8_t reference_mode_context;
6258 : //CHKN uint8_t compoud_reference_type_context;
6259 : //CHKN uint32_t partitionContext;
6260 :
6261 33839 : dst_cu->skip_coeff_context = src_cu->skip_coeff_context;
6262 33839 : dst_cu->reference_mode_context = src_cu->reference_mode_context;
6263 33839 : dst_cu->compoud_reference_type_context = src_cu->compoud_reference_type_context;
6264 33839 : dst_cu->segment_id = src_cu->segment_id;
6265 :
6266 33839 : memcpy(dst_cu->quantized_dc, src_cu->quantized_dc, 3 * MAX_TXB_COUNT * sizeof(int32_t));
6267 : //CHKN uint32_t is_inter_ctx;
6268 : //CHKN uint32_t interp_filters;
6269 :
6270 33839 : dst_cu->is_inter_ctx = src_cu->is_inter_ctx;
6271 33839 : dst_cu->interp_filters = src_cu->interp_filters;
6272 :
6273 33839 : dst_cu->part = src_cu->part;
6274 33839 : dst_cu->shape = src_cu->shape;
6275 33839 : dst_cu->mds_idx = src_cu->mds_idx;
6276 : #if FILTER_INTRA_FLAG
6277 33839 : dst_cu->filter_intra_mode = src_cu->filter_intra_mode;
6278 : #endif
6279 33839 : }
6280 101022 : void move_cu_data_redund(
6281 : #if PAL_SUP
6282 : PictureControlSet *pcs,
6283 : ModeDecisionContext *context_ptr,
6284 : #endif
6285 : CodingUnit *src_cu,
6286 : CodingUnit *dst_cu){
6287 : #if PAL_SUP
6288 101022 : dst_cu->segment_id = src_cu->segment_id;
6289 101022 : dst_cu->seg_id_predicted = src_cu->seg_id_predicted;
6290 101022 : dst_cu->ref_qp = src_cu->ref_qp;
6291 101022 : dst_cu->org_delta_qp = src_cu->org_delta_qp;
6292 :
6293 101022 : memcpy(&dst_cu->palette_info.pmi, &src_cu->palette_info.pmi, sizeof(PaletteModeInfo));
6294 101022 : if (svt_av1_allow_palette(pcs->parent_pcs_ptr->palette_mode, context_ptr->blk_geom->bsize))
6295 0 : memcpy(dst_cu->palette_info.color_idx_map, src_cu->palette_info.color_idx_map, MAX_PALETTE_SQUARE);
6296 :
6297 : #endif
6298 : #if OBMC_FLAG
6299 101022 : dst_cu->interp_filters = src_cu->interp_filters;
6300 : #endif
6301 101022 : dst_cu->interinter_comp.type = src_cu->interinter_comp.type;
6302 101022 : dst_cu->interinter_comp.mask_type = src_cu->interinter_comp.mask_type;
6303 101022 : dst_cu->interinter_comp.wedge_index = src_cu->interinter_comp.wedge_index;
6304 101022 : dst_cu->interinter_comp.wedge_sign = src_cu->interinter_comp.wedge_sign;
6305 101022 : dst_cu->compound_idx = src_cu->compound_idx;
6306 101022 : dst_cu->comp_group_idx = src_cu->comp_group_idx;
6307 : #if II_COMP_FLAG
6308 101022 : dst_cu->is_interintra_used = src_cu->is_interintra_used ;
6309 101022 : dst_cu->interintra_mode = src_cu->interintra_mode ;
6310 101022 : dst_cu->use_wedge_interintra = src_cu->use_wedge_interintra ;
6311 101022 : dst_cu->interintra_wedge_index = src_cu->interintra_wedge_index ;//inter_intra wedge index
6312 101022 : dst_cu->ii_wedge_sign = src_cu->ii_wedge_sign ;//inter_intra wedge sign=-1
6313 : #endif
6314 : #if FILTER_INTRA_FLAG
6315 101022 : dst_cu->filter_intra_mode = src_cu->filter_intra_mode;
6316 : #endif
6317 : //CHKN TransformUnit_t transform_unit_array[TRANSFORM_UNIT_MAX_COUNT]; // 2-bytes * 21 = 42-bytes
6318 101022 : memcpy(dst_cu->transform_unit_array, src_cu->transform_unit_array, TRANSFORM_UNIT_MAX_COUNT * sizeof(TransformUnit));
6319 :
6320 : //CHKN PredictionUnit_t prediction_unit_array[MAX_NUM_OF_PU_PER_CU]; // 35-bytes * 4 = 140 bytes
6321 101022 : memcpy(dst_cu->prediction_unit_array, src_cu->prediction_unit_array, MAX_NUM_OF_PU_PER_CU * sizeof(PredictionUnit));
6322 :
6323 : //CHKN unsigned skip_flag_context : 2;
6324 : //CHKN unsigned prediction_mode_flag : 2;
6325 : //CHKN unsigned rootCbf : 1;
6326 : //CHKN unsigned split_flag_context : 2;
6327 : //CHKN #if !ADD_DELTA_QP_SUPPORT
6328 : //CHKN unsigned qp : 6;
6329 : //CHKN unsigned ref_qp : 6;
6330 : //CHKN
6331 : //CHKN signed delta_qp : 8; // can be signed 8bits
6332 : //CHKN signed org_delta_qp : 8;
6333 : //CHKN #endif
6334 : //CHKN
6335 : //CHKN #if ADD_DELTA_QP_SUPPORT
6336 : //CHKN uint16_t qp;
6337 : //CHKN uint16_t ref_qp;
6338 : //CHKN
6339 : //CHKN int16_t delta_qp; // can be signed 8bits
6340 : //CHKN int16_t org_delta_qp;
6341 : //CHKN #endif
6342 :
6343 101022 : dst_cu->skip_flag_context = src_cu->skip_flag_context;
6344 101022 : dst_cu->prediction_mode_flag = src_cu->prediction_mode_flag;
6345 101022 : dst_cu->block_has_coeff = src_cu->block_has_coeff;
6346 101022 : dst_cu->split_flag_context = src_cu->split_flag_context;
6347 101022 : dst_cu->qp = src_cu->qp;
6348 101022 : dst_cu->delta_qp = src_cu->delta_qp;
6349 : //CHKN // Coded Tree
6350 : //CHKN struct {
6351 : //CHKN unsigned leaf_index : 8;
6352 : //CHKN unsigned split_flag : 1;
6353 : //CHKN unsigned skip_flag : 1;
6354 : //CHKN
6355 : //CHKN };
6356 :
6357 101022 : dst_cu->leaf_index = src_cu->leaf_index;
6358 101022 : dst_cu->skip_flag = src_cu->skip_flag;
6359 101022 : dst_cu->tx_depth = src_cu->tx_depth;
6360 : //CHKN MacroBlockD* av1xd;
6361 101022 : memcpy(dst_cu->av1xd, src_cu->av1xd, sizeof(MacroBlockD));
6362 :
6363 : // uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
6364 :
6365 : //CHKN int16_t inter_mode_ctx[MODE_CTX_REF_FRAMES];
6366 101022 : memcpy(dst_cu->inter_mode_ctx, src_cu->inter_mode_ctx, MODE_CTX_REF_FRAMES * sizeof(int16_t));
6367 :
6368 : //CHKN IntMv ref_mvs[MODE_CTX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; //used only for nonCompound modes.
6369 101022 : memcpy(dst_cu->ref_mvs, src_cu->ref_mvs, MODE_CTX_REF_FRAMES*MAX_MV_REF_CANDIDATES * sizeof(IntMv));
6370 :
6371 : //CHKN uint8_t drl_index;
6372 : //CHKN PredictionMode pred_mode;
6373 101022 : dst_cu->drl_index = src_cu->drl_index;
6374 101022 : dst_cu->pred_mode = src_cu->pred_mode;
6375 :
6376 : //CHKN IntMv predmv[2];
6377 :
6378 101022 : memcpy(dst_cu->predmv, src_cu->predmv, 2 * sizeof(IntMv));
6379 :
6380 : //CHKN uint8_t skip_coeff_context;
6381 : //CHKN int16_t luma_txb_skip_context;
6382 : //CHKN int16_t luma_dc_sign_context;
6383 : //CHKN int16_t cb_txb_skip_context;
6384 : //CHKN int16_t cb_dc_sign_context;
6385 : //CHKN int16_t cr_txb_skip_context;
6386 : //CHKN int16_t cr_dc_sign_context;
6387 : //CHKN uint8_t reference_mode_context;
6388 : //CHKN uint8_t compoud_reference_type_context;
6389 : //CHKN uint32_t partitionContext;
6390 :
6391 101022 : dst_cu->skip_coeff_context = src_cu->skip_coeff_context;
6392 101022 : dst_cu->reference_mode_context = src_cu->reference_mode_context;
6393 101022 : dst_cu->compoud_reference_type_context = src_cu->compoud_reference_type_context;
6394 101022 : memcpy(dst_cu->quantized_dc, src_cu->quantized_dc, 3 * MAX_TXB_COUNT * sizeof(int32_t));
6395 : //CHKN uint32_t is_inter_ctx;
6396 : //CHKN uint32_t interp_filters;
6397 :
6398 101022 : dst_cu->is_inter_ctx = src_cu->is_inter_ctx;
6399 101022 : dst_cu->interp_filters = src_cu->interp_filters;
6400 :
6401 101022 : dst_cu->part = src_cu->part;
6402 101022 : dst_cu->shape = src_cu->shape;
6403 : //dst_cu->mds_idx = src_cu->mds_idx;
6404 101022 : }
6405 :
6406 1712870 : void check_redundant_block(const BlockGeom * blk_geom, ModeDecisionContext *context_ptr, uint8_t * redundant_blk_avail, uint16_t *redundant_blk_mds)
6407 : {
6408 1712870 : if (blk_geom->redund) {
6409 641211 : for (int it = 0; it < blk_geom->redund_list.list_size; it++) {
6410 427155 : if (context_ptr->md_local_cu_unit[blk_geom->redund_list.blk_mds_table[it]].avail_blk_flag)
6411 : {
6412 101022 : *redundant_blk_mds = blk_geom->redund_list.blk_mds_table[it];
6413 101022 : *redundant_blk_avail = 1;
6414 101022 : break;
6415 : }
6416 : }
6417 : }
6418 1712870 : }
6419 :
6420 : /*******************************************
6421 : * ModeDecision LCU
6422 : * performs CL (LCU)
6423 : *******************************************/
6424 811418 : EbBool allowed_ns_cu(
6425 : #if COMBINE_MDC_NSQ_TABLE
6426 : uint8_t mdc_depth_level,
6427 : #endif
6428 : EbBool is_nsq_table_used,
6429 : uint8_t nsq_max_shapes_md,
6430 : ModeDecisionContext *context_ptr,
6431 : uint8_t is_complete_sb){
6432 811418 : EbBool ret = 1;
6433 : UNUSED(is_complete_sb);
6434 :
6435 : #if COMBINE_MDC_NSQ_TABLE
6436 811418 : if (is_nsq_table_used) {
6437 : #if MDC_ADAPTIVE_LEVEL
6438 0 : if (!mdc_depth_level) {
6439 : #else
6440 : if (mdc_depth_level == MAX_MDC_LEVEL) {
6441 : #endif
6442 0 : if (context_ptr->blk_geom->shape != PART_N) {
6443 0 : ret = 0;
6444 0 : for (int i = 0; i < nsq_max_shapes_md; i++) {
6445 0 : if (context_ptr->blk_geom->shape == context_ptr->nsq_table[i])
6446 0 : ret = 1;
6447 : }
6448 : }
6449 : }
6450 : else {
6451 0 : if (context_ptr->blk_geom->shape != PART_N) {
6452 0 : ret = 0;
6453 0 : for (int i = 0; i < nsq_max_shapes_md; i++) {
6454 0 : if (context_ptr->blk_geom->shape == context_ptr->nsq_table[i])
6455 0 : ret = 1;
6456 : }
6457 : }
6458 : }
6459 : }
6460 : #else
6461 : if (is_nsq_table_used) {
6462 : if (context_ptr->blk_geom->shape != PART_N) {
6463 : ret = 0;
6464 : for (int i = 0; i < nsq_max_shapes_md; i++) {
6465 : if (context_ptr->blk_geom->shape == context_ptr->nsq_table[i])
6466 : ret = 1;
6467 : }
6468 : }
6469 : }
6470 : #endif
6471 811418 : return ret;
6472 : }
6473 :
6474 0 : void init_candidate_buffer(
6475 : ModeDecisionCandidate *candidate_ptr,
6476 : uint32_t count_non_zero_coeffs[3][MAX_NUM_OF_TU_PER_CU])
6477 : {
6478 0 : candidate_ptr->y_has_coeff = 0;
6479 0 : candidate_ptr->u_has_coeff = 0;
6480 0 : candidate_ptr->v_has_coeff = 0;
6481 :
6482 0 : candidate_ptr->full_distortion = 0;
6483 :
6484 0 : memset(candidate_ptr->eob[0], 0, sizeof(uint16_t)*MAX_TXB_COUNT);
6485 0 : memset(count_non_zero_coeffs[0], 0, sizeof(uint32_t)*MAX_NUM_OF_TU_PER_CU);
6486 :
6487 0 : candidate_ptr->chroma_distortion = 0;
6488 0 : candidate_ptr->chroma_distortion_inter_depth = 0;
6489 0 : memset(candidate_ptr->eob[1], 0, sizeof(uint16_t)*MAX_TXB_COUNT);
6490 0 : memset(count_non_zero_coeffs[1], 0, sizeof(uint32_t)*MAX_NUM_OF_TU_PER_CU);
6491 0 : memset(candidate_ptr->eob[2], 0, sizeof(uint16_t)*MAX_TXB_COUNT);
6492 0 : memset(count_non_zero_coeffs[2], 0, sizeof(uint32_t)*MAX_NUM_OF_TU_PER_CU);
6493 0 : }
6494 811376 : void inter_depth_tx_search(
6495 : PictureControlSet *picture_control_set_ptr,
6496 : ModeDecisionCandidateBuffer *candidate_buffer,
6497 : CodingUnit *cu_ptr,
6498 : ModeDecisionContext *context_ptr,
6499 : EbPictureBufferDesc *input_picture_ptr,
6500 : uint64_t ref_fast_cost)
6501 : {
6502 : // Hsan: if Transform Search ON and INTRA, then Tx Type search is performed @ the full loop
6503 811376 : uint8_t tx_search_skip_flag = (picture_control_set_ptr->parent_pcs_ptr->tx_search_level == TX_SEARCH_INTER_DEPTH && (picture_control_set_ptr->parent_pcs_ptr->atb_mode == 0 || candidate_buffer ->candidate_ptr->type == INTER_MODE)) ? get_skip_tx_search_flag(
6504 0 : context_ptr->blk_geom->sq_size,
6505 : ref_fast_cost,
6506 0 : *candidate_buffer->fast_cost_ptr,
6507 0 : picture_control_set_ptr->parent_pcs_ptr->tx_weight) : 1;
6508 811381 : if (!tx_search_skip_flag) {
6509 0 : uint64_t y_full_distortion[DIST_CALC_TOTAL] = { 0 };
6510 : uint32_t count_non_zero_coeffs[3][MAX_NUM_OF_TU_PER_CU];
6511 :
6512 : uint64_t cbFullDistortion[DIST_CALC_TOTAL];
6513 : uint64_t crFullDistortion[DIST_CALC_TOTAL];
6514 :
6515 0 : uint64_t y_coeff_bits = 0;
6516 0 : uint64_t cb_coeff_bits = 0;
6517 0 : uint64_t cr_coeff_bits = 0;
6518 :
6519 0 : ModeDecisionCandidate *candidate_ptr = candidate_buffer->candidate_ptr;
6520 :
6521 0 : init_candidate_buffer(
6522 : candidate_ptr,
6523 : count_non_zero_coeffs);
6524 :
6525 0 : product_full_loop_tx_search(
6526 : candidate_buffer,
6527 : context_ptr,
6528 : picture_control_set_ptr
6529 : );
6530 :
6531 0 : candidate_ptr->full_distortion = 0;
6532 :
6533 0 : memset(candidate_ptr->eob[0], 0, sizeof(uint16_t)*MAX_TXB_COUNT);
6534 :
6535 : //re-init
6536 0 : candidate_ptr->y_has_coeff = 0;
6537 :
6538 0 : product_full_loop(
6539 : candidate_buffer,
6540 : context_ptr,
6541 : picture_control_set_ptr,
6542 : input_picture_ptr,
6543 0 : context_ptr->cu_ptr->qp,
6544 : &(*count_non_zero_coeffs[0]),
6545 : &y_coeff_bits,
6546 : &y_full_distortion[0]);
6547 :
6548 0 : candidate_ptr->chroma_distortion_inter_depth = 0;
6549 0 : candidate_ptr->chroma_distortion = 0;
6550 :
6551 : //CHROMA
6552 0 : cbFullDistortion[DIST_CALC_RESIDUAL] = 0;
6553 0 : crFullDistortion[DIST_CALC_RESIDUAL] = 0;
6554 0 : cbFullDistortion[DIST_CALC_PREDICTION] = 0;
6555 0 : crFullDistortion[DIST_CALC_PREDICTION] = 0;
6556 :
6557 0 : cb_coeff_bits = 0;
6558 0 : cr_coeff_bits = 0;
6559 :
6560 : // FullLoop and TU search
6561 0 : uint16_t cb_qp = context_ptr->qp;
6562 0 : uint16_t cr_qp = context_ptr->qp;
6563 0 : if (context_ptr->blk_geom->has_uv && context_ptr->chroma_level <= CHROMA_MODE_1) {
6564 0 : full_loop_r(
6565 : context_ptr->sb_ptr,
6566 : candidate_buffer,
6567 : context_ptr,
6568 : input_picture_ptr,
6569 : picture_control_set_ptr,
6570 : PICTURE_BUFFER_DESC_CHROMA_MASK,
6571 : cb_qp,
6572 : cr_qp,
6573 : &(*count_non_zero_coeffs[1]),
6574 : &(*count_non_zero_coeffs[2]));
6575 :
6576 0 : cu_full_distortion_fast_tu_mode_r(
6577 : context_ptr->sb_ptr,
6578 : candidate_buffer,
6579 : context_ptr,
6580 : candidate_ptr,
6581 : picture_control_set_ptr,
6582 : input_picture_ptr,
6583 : cbFullDistortion,
6584 : crFullDistortion,
6585 : count_non_zero_coeffs,
6586 : COMPONENT_CHROMA,
6587 : &cb_coeff_bits,
6588 : &cr_coeff_bits,
6589 : 1);
6590 :
6591 0 : candidate_ptr->block_has_coeff = (candidate_ptr->y_has_coeff | candidate_ptr->u_has_coeff | candidate_ptr->v_has_coeff) ? EB_TRUE : EB_FALSE;
6592 : }
6593 :
6594 0 : Av1ProductFullCostFuncTable[candidate_ptr->type](
6595 : picture_control_set_ptr,
6596 : context_ptr,
6597 : candidate_buffer,
6598 : cu_ptr,
6599 : y_full_distortion,
6600 : cbFullDistortion,
6601 : crFullDistortion,
6602 0 : context_ptr->full_lambda,
6603 : &y_coeff_bits,
6604 : &cb_coeff_bits,
6605 : &cr_coeff_bits,
6606 0 : context_ptr->blk_geom->bsize);
6607 :
6608 0 : candidate_buffer->cb_distortion[DIST_CALC_RESIDUAL] = cbFullDistortion[DIST_CALC_RESIDUAL];
6609 0 : candidate_buffer->cb_distortion[DIST_CALC_PREDICTION] = cbFullDistortion[DIST_CALC_PREDICTION];
6610 0 : candidate_buffer->cb_coeff_bits = cb_coeff_bits;
6611 :
6612 0 : candidate_buffer->cr_distortion[DIST_CALC_RESIDUAL] = crFullDistortion[DIST_CALC_RESIDUAL];
6613 0 : candidate_buffer->cr_distortion[DIST_CALC_PREDICTION] = crFullDistortion[DIST_CALC_PREDICTION];
6614 0 : candidate_buffer->cr_coeff_bits = cr_coeff_bits;
6615 :
6616 0 : candidate_buffer->candidate_ptr->full_distortion = (uint32_t)(y_full_distortion[0]);
6617 :
6618 0 : candidate_buffer->y_coeff_bits = y_coeff_bits;
6619 0 : candidate_ptr->full_distortion = (uint32_t)(y_full_distortion[0]);
6620 : //Update tx
6621 0 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].cost = *(candidate_buffer->full_cost_ptr);
6622 0 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].cost = (context_ptr->md_local_cu_unit[cu_ptr->mds_idx].cost - candidate_buffer->candidate_ptr->chroma_distortion) + candidate_buffer->candidate_ptr->chroma_distortion_inter_depth;
6623 :
6624 0 : if (candidate_ptr->type == INTRA_MODE)
6625 0 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].cost_luma = candidate_buffer->full_cost_luma;
6626 :
6627 0 : context_ptr->md_ep_pipe_sb[cu_ptr->mds_idx].merge_cost = *candidate_buffer->full_cost_merge_ptr;
6628 0 : context_ptr->md_ep_pipe_sb[cu_ptr->mds_idx].skip_cost = *candidate_buffer->full_cost_skip_ptr;
6629 :
6630 0 : if (candidate_ptr->type == INTER_MODE && candidate_ptr->merge_flag == EB_TRUE)
6631 0 : context_ptr->md_ep_pipe_sb[cu_ptr->leaf_index].chroma_distortion = candidate_buffer->candidate_ptr->chroma_distortion;
6632 0 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].full_distortion = candidate_buffer->candidate_ptr->full_distortion;
6633 :
6634 0 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].chroma_distortion = (uint32_t)candidate_buffer->candidate_ptr->chroma_distortion;
6635 0 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].chroma_distortion_inter_depth = (uint32_t)candidate_buffer->candidate_ptr->chroma_distortion_inter_depth;
6636 :
6637 : //cu_ptr->prediction_mode_flag = candidate_ptr->type;
6638 0 : cu_ptr->skip_flag = candidate_ptr->skip_flag; // note, the skip flag is re-checked in the ENCDEC process
6639 0 : cu_ptr->block_has_coeff = ((candidate_ptr->block_has_coeff) > 0) ? EB_TRUE : EB_FALSE;
6640 : // This kernel assumes no atb
6641 0 : cu_ptr->quantized_dc[0][0] = candidate_buffer->candidate_ptr->quantized_dc[0][0];
6642 0 : cu_ptr->quantized_dc[1][0] = candidate_buffer->candidate_ptr->quantized_dc[1][0];
6643 0 : cu_ptr->quantized_dc[2][0] = candidate_buffer->candidate_ptr->quantized_dc[2][0];
6644 :
6645 0 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].count_non_zero_coeffs = candidate_ptr->count_non_zero_coeffs;
6646 :
6647 : TransformUnit *txb_ptr;
6648 : uint32_t txb_itr;
6649 : uint32_t tu_index;
6650 : uint32_t tuTotalCount;
6651 0 : tuTotalCount = context_ptr->blk_geom->txb_count[candidate_buffer->candidate_ptr->tx_depth];
6652 0 : tu_index = 0;
6653 0 : txb_itr = 0;
6654 :
6655 : #if NO_ENCDEC
6656 : int32_t txb_1d_offset = 0, txb_1d_offset_uv = 0;
6657 :
6658 : cu_ptr->block_has_coeff = 0;
6659 : #endif
6660 :
6661 : // Set TU
6662 : do {
6663 0 : txb_ptr = &cu_ptr->transform_unit_array[tu_index];
6664 :
6665 0 : txb_ptr->split_flag = EB_FALSE;
6666 0 : txb_ptr->y_has_coeff = (EbBool)(((candidate_ptr->y_has_coeff) & (1 << tu_index)) > 0);
6667 0 : txb_ptr->u_has_coeff = (EbBool)(((candidate_ptr->u_has_coeff) & (1 << (tu_index))) > 0);
6668 0 : txb_ptr->v_has_coeff = (EbBool)(((candidate_ptr->v_has_coeff) & (1 << (tu_index))) > 0);
6669 0 : txb_ptr->transform_type[PLANE_TYPE_Y] = candidate_ptr->transform_type[tu_index];
6670 0 : txb_ptr->transform_type[PLANE_TYPE_UV] = candidate_ptr->transform_type_uv;
6671 :
6672 : #if NO_ENCDEC
6673 :
6674 : if (context_ptr->blk_geom->has_uv) {
6675 : cu_ptr->block_has_coeff |= txb_ptr->y_has_coeff;
6676 : cu_ptr->block_has_coeff |= txb_ptr->u_has_coeff;
6677 : cu_ptr->block_has_coeff |= txb_ptr->v_has_coeff;
6678 : }
6679 : else
6680 : cu_ptr->block_has_coeff |= txb_ptr->y_has_coeff;
6681 : cu_ptr->cand_buff_index = lowestCostIndex;
6682 :
6683 : cu_ptr->skip_flag = 0; //SKIP is turned OFF for this case!!
6684 : txb_ptr->nz_coef_count[0] = candidate_ptr->eob[0][tu_index];
6685 : txb_ptr->nz_coef_count[1] = candidate_ptr->eob[1][tu_index];
6686 : txb_ptr->nz_coef_count[2] = candidate_ptr->eob[2][tu_index];
6687 :
6688 : if (pu_ptr->inter_pred_direction_index == UNI_PRED_LIST_0) {
6689 : cu_ptr->predmv[0].as_mv.col = candidate_ptr->motion_vector_pred_x[REF_LIST_0];
6690 : cu_ptr->predmv[0].as_mv.row = candidate_ptr->motion_vector_pred_y[REF_LIST_0];
6691 : }
6692 : else if (pu_ptr->inter_pred_direction_index == UNI_PRED_LIST_1) {
6693 : cu_ptr->predmv[0].as_mv.col = candidate_ptr->motion_vector_pred_x[REF_LIST_1];
6694 : cu_ptr->predmv[0].as_mv.row = candidate_ptr->motion_vector_pred_y[REF_LIST_1];
6695 : }
6696 : else if (pu_ptr->inter_pred_direction_index == BI_PRED) {
6697 : cu_ptr->predmv[0].as_mv.col = candidate_ptr->motion_vector_pred_x[REF_LIST_0];
6698 : cu_ptr->predmv[0].as_mv.row = candidate_ptr->motion_vector_pred_y[REF_LIST_0];
6699 : cu_ptr->predmv[1].as_mv.col = candidate_ptr->motion_vector_pred_x[REF_LIST_1];
6700 : cu_ptr->predmv[1].as_mv.row = candidate_ptr->motion_vector_pred_y[REF_LIST_1];
6701 : }
6702 : #endif
6703 : #if NO_ENCDEC
6704 : //copy coeff
6705 : {
6706 : uint32_t bwidth = context_ptr->blk_geom->tx_width[txb_itr] < 64 ? context_ptr->blk_geom->tx_width[txb_itr] : 32;
6707 : uint32_t bheight = context_ptr->blk_geom->tx_height[txb_itr] < 64 ? context_ptr->blk_geom->tx_height[txb_itr] : 32;
6708 :
6709 : int32_t* src_ptr = &(((int32_t*)buffer_ptr_array[lowestCostIndex]->residual_quant_coeff_ptr->buffer_y)[txb_1d_offset]);
6710 : int32_t* dst_ptr = &(((int32_t*)context_ptr->cu_ptr->coeff_tmp->buffer_y)[txb_1d_offset]);
6711 :
6712 : uint32_t j;
6713 :
6714 : for (j = 0; j < bheight; j++)
6715 : memcpy(dst_ptr + j * bwidth, src_ptr + j * bwidth, bwidth * sizeof(int32_t));
6716 : if (context_ptr->blk_geom->has_uv)
6717 : {
6718 : // Cb
6719 : bwidth = context_ptr->blk_geom->tx_width_uv[txb_itr];
6720 : bheight = context_ptr->blk_geom->tx_height_uv[txb_itr];
6721 :
6722 : src_ptr = &(((int32_t*)buffer_ptr_array[lowestCostIndex]->residual_quant_coeff_ptr->buffer_cb)[txb_1d_offset_uv]);
6723 : dst_ptr = &(((int32_t*)context_ptr->cu_ptr->coeff_tmp->buffer_cb)[txb_1d_offset_uv]);
6724 :
6725 : for (j = 0; j < bheight; j++)
6726 : memcpy(dst_ptr + j * bwidth, src_ptr + j * bwidth, bwidth * sizeof(int32_t));
6727 : src_ptr = &(((int32_t*)buffer_ptr_array[lowestCostIndex]->residual_quant_coeff_ptr->buffer_cr)[txb_1d_offset_uv]);
6728 : dst_ptr = &(((int32_t*)context_ptr->cu_ptr->coeff_tmp->buffer_cr)[txb_1d_offset_uv]);
6729 :
6730 : for (j = 0; j < bheight; j++)
6731 : memcpy(dst_ptr + j * bwidth, src_ptr + j * bwidth, bwidth * sizeof(int32_t));
6732 : }
6733 :
6734 : txb_1d_offset += context_ptr->blk_geom->tx_width[txb_itr] * context_ptr->blk_geom->tx_height[txb_itr];
6735 : if (context_ptr->blk_geom->has_uv)
6736 : txb_1d_offset_uv += context_ptr->blk_geom->tx_width_uv[txb_itr] * context_ptr->blk_geom->tx_height_uv[txb_itr];
6737 : }
6738 :
6739 : #endif
6740 :
6741 0 : ++tu_index;
6742 0 : ++txb_itr;
6743 0 : } while (txb_itr < tuTotalCount);
6744 : }
6745 811381 : }
6746 :
6747 : /****************************************************
6748 : * generate the the size in pixel for partition code
6749 : ****************************************************/
6750 0 : uint8_t get_part_side(
6751 : PartitionContextType part) {
6752 0 : switch (part) {
6753 0 : case 31:
6754 0 : return 4;
6755 : break;
6756 0 : case 30:
6757 0 : return 8;
6758 : break;
6759 0 : case 28:
6760 0 : return 16;
6761 : break;
6762 0 : case 24:
6763 0 : return 32;
6764 : break;
6765 0 : case 16:
6766 0 : return 64;
6767 : break;
6768 0 : case 0:
6769 0 : return 128;
6770 : break;
6771 0 : default:
6772 0 : return 255;
6773 : printf("error: non supported partition!!\n");
6774 : break;
6775 : }
6776 : }
6777 : /****************************************************
6778 : * Return a predicted Shape based on the above and
6779 : * left partitions
6780 : ****************************************************/
6781 0 : PART get_partition_shape(
6782 : PartitionContextType above,
6783 : PartitionContextType left,
6784 : uint8_t width,
6785 : uint8_t height) {
6786 0 : uint8_t above_size = get_part_side(above);
6787 0 : uint8_t left_size = get_part_side(left);
6788 0 : PART part = PART_N;
6789 :
6790 0 : if (above_size == width && left_size == height)
6791 0 : part = PART_N;
6792 0 : else if (above_size > width && left_size > height)
6793 0 : part = PART_N;
6794 0 : else if (above_size > width) {
6795 0 : if (left_size == height)
6796 0 : part = PART_N;
6797 0 : else if (left_size < (height / 2))
6798 0 : part = PART_H4;
6799 0 : else if (left_size < height)
6800 0 : part = PART_H;
6801 : else
6802 0 : printf("error: unsupported left_size\n");
6803 : }
6804 0 : else if (left_size > height) {
6805 0 : if (above_size == width)
6806 0 : part = PART_N;
6807 0 : else if (above_size < (width / 2))
6808 0 : part = PART_V4;
6809 0 : else if (above_size < width)
6810 0 : part = PART_V;
6811 : else
6812 0 : printf("error: unsupported above_size\n");
6813 : }
6814 0 : else if (above_size < width) {
6815 0 : if (left_size == height)
6816 0 : part = PART_VA;
6817 0 : else if (left_size < height)
6818 0 : part = PART_S;
6819 : else
6820 0 : printf("error: unsupported left_size\n");
6821 : }
6822 0 : else if (left_size < height) {
6823 0 : if (above_size == width)
6824 0 : part = PART_HA;
6825 0 : else if (above_size < width)
6826 0 : part = PART_S;
6827 : else
6828 0 : printf("error: unsupported above_size\n");
6829 : }
6830 0 : else if (above_size == width) {
6831 0 : if (left_size < height)
6832 0 : part = PART_HB;
6833 : else
6834 0 : printf("error: unsupported left_size\n");
6835 : }
6836 0 : else if (left_size == height) {
6837 0 : if (above_size == width)
6838 0 : part = PART_HB;
6839 : else
6840 0 : printf("error: unsupported above_size\n");
6841 : }
6842 : else
6843 0 : printf("error: unsupported above_size && left_size\n");
6844 0 : return part;
6845 : };
6846 :
6847 : #if ADJUST_NSQ_RANK_BASED_ON_NEIGH
6848 : /****************************************************
6849 : * Adjust the nsq_rank in order to keep the most
6850 : * probable Shape to be selected in the lowest index
6851 : ****************************************************/
6852 0 : void adjust_nsq_rank(
6853 : PictureControlSet *picture_control_set_ptr,
6854 : ModeDecisionContext *context_ptr,
6855 : const SequenceControlSet *sequence_control_set_ptr,
6856 : LargestCodingUnit *sb_ptr,
6857 : NeighborArrayUnit *leaf_partition_neighbor_array) {
6858 0 : const uint32_t lcu_addr = sb_ptr->index;
6859 0 : uint8_t ol_part1 = context_ptr->best_nsq_sahpe1;
6860 0 : uint8_t ol_part2 = context_ptr->best_nsq_sahpe2;
6861 0 : uint8_t ol_part3 = context_ptr->best_nsq_sahpe3;
6862 0 : uint8_t ol_part4 = context_ptr->best_nsq_sahpe4;
6863 0 : uint8_t ol_part5 = context_ptr->best_nsq_sahpe5;
6864 0 : uint8_t ol_part6 = context_ptr->best_nsq_sahpe6;
6865 0 : uint8_t ol_part7 = context_ptr->best_nsq_sahpe7;
6866 0 : uint8_t ol_part8 = context_ptr->best_nsq_sahpe8;
6867 0 : EbBool is_compound_enabled = (picture_control_set_ptr->parent_pcs_ptr->reference_mode == SINGLE_REFERENCE) ? 0 : 1;
6868 : uint32_t me_sb_addr;
6869 : uint32_t me_2Nx2N_table_offset;
6870 : uint32_t max_number_of_pus_per_sb;
6871 0 : uint32_t geom_offset_x = 0;
6872 0 : uint32_t geom_offset_y = 0;
6873 0 : uint8_t cnt[PART_S + 1] = { 0 };
6874 0 : if (sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128) {
6875 0 : uint32_t me_sb_size = sequence_control_set_ptr->sb_sz;
6876 0 : uint32_t me_pic_width_in_sb = (sequence_control_set_ptr->seq_header.max_frame_width + sequence_control_set_ptr->sb_sz - 1) / me_sb_size;
6877 0 : uint32_t me_sb_x = (context_ptr->cu_origin_x / me_sb_size);
6878 0 : uint32_t me_sb_y = (context_ptr->cu_origin_y / me_sb_size);
6879 0 : me_sb_addr = me_sb_x + me_sb_y * me_pic_width_in_sb;
6880 0 : geom_offset_x = (me_sb_x & 0x1) * me_sb_size;
6881 0 : geom_offset_y = (me_sb_y & 0x1) * me_sb_size;
6882 : }
6883 : else
6884 0 : me_sb_addr = lcu_addr;
6885 0 : max_number_of_pus_per_sb = picture_control_set_ptr->parent_pcs_ptr->max_number_of_pus_per_sb;
6886 0 : me_2Nx2N_table_offset = (context_ptr->blk_geom->bwidth == 4 || context_ptr->blk_geom->bheight == 4 || context_ptr->blk_geom->bwidth == 128 || context_ptr->blk_geom->bheight == 128) ? 0 :
6887 :
6888 0 : get_me_info_index(
6889 : max_number_of_pus_per_sb,
6890 : context_ptr->blk_geom,
6891 : geom_offset_x,
6892 : geom_offset_y);
6893 :
6894 0 : const MeLcuResults *me_results = picture_control_set_ptr->parent_pcs_ptr->me_results[me_sb_addr];
6895 0 : uint8_t nsq0 = me_results->me_nsq_0[me_2Nx2N_table_offset];
6896 0 : uint8_t nsq1 = me_results->me_nsq_1[me_2Nx2N_table_offset];
6897 :
6898 0 : uint8_t me_part_0 = nsq0 == 0 ? PART_N : nsq0 == 1 ? PART_H : nsq0 == 2 ? PART_V : nsq0 == 3 ? PART_H4 : nsq0 == 4 ? PART_V4 : nsq0 == 5 ? PART_S : 0;
6899 0 : uint8_t me_part_1 = nsq1 == 0 ? PART_N : nsq1 == 1 ? PART_H : nsq1 == 2 ? PART_V : nsq1 == 3 ? PART_H4 : nsq1 == 4 ? PART_V4 : nsq1 == 5 ? PART_S : 0;
6900 :
6901 : // Generate Partition context
6902 0 : uint32_t partition_left_neighbor_index = get_neighbor_array_unit_left_index(
6903 : leaf_partition_neighbor_array,
6904 0 : context_ptr->cu_origin_y);
6905 0 : uint32_t partition_above_neighbor_index = get_neighbor_array_unit_top_index(
6906 : leaf_partition_neighbor_array,
6907 0 : context_ptr->cu_origin_x);
6908 0 : const PartitionContextType above_ctx = (((PartitionContext*)leaf_partition_neighbor_array->top_array)[partition_above_neighbor_index].above == (int8_t)INVALID_NEIGHBOR_DATA) ?
6909 0 : 0 : ((PartitionContext*)leaf_partition_neighbor_array->top_array)[partition_above_neighbor_index].above;
6910 0 : const PartitionContextType left_ctx = (((PartitionContext*)leaf_partition_neighbor_array->left_array)[partition_left_neighbor_index].left == (int8_t)INVALID_NEIGHBOR_DATA) ?
6911 0 : 0 : ((PartitionContext*)leaf_partition_neighbor_array->left_array)[partition_left_neighbor_index].left;
6912 :
6913 0 : PART neighbor_part = get_partition_shape(
6914 : above_ctx,
6915 : left_ctx,
6916 0 : context_ptr->blk_geom->bwidth,
6917 0 : context_ptr->blk_geom->bheight);
6918 :
6919 : //init table
6920 0 : context_ptr->nsq_table[0] = PART_H;
6921 0 : context_ptr->nsq_table[1] = PART_V;
6922 0 : context_ptr->nsq_table[2] = PART_HA;
6923 0 : context_ptr->nsq_table[3] = PART_HB;
6924 0 : context_ptr->nsq_table[4] = PART_VA;
6925 0 : context_ptr->nsq_table[5] = PART_VB;
6926 0 : context_ptr->nsq_table[6] = PART_H4;
6927 0 : context_ptr->nsq_table[7] = PART_V4;
6928 :
6929 0 : if (is_compound_enabled == 0) me_part_1 = me_part_0;
6930 :
6931 : // Insert predicted Shapes based on ME information
6932 0 : if (me_part_0 != me_part_1) {
6933 0 : context_ptr->nsq_table[0] = me_part_0;
6934 0 : context_ptr->nsq_table[1] = me_part_1;
6935 :
6936 0 : if (me_part_0 == PART_H) {
6937 0 : context_ptr->nsq_table[2] = PART_HA;
6938 0 : context_ptr->nsq_table[3] = PART_HB;
6939 0 : context_ptr->nsq_table[4] = me_part_1 != PART_H4 ? PART_H4 : PART_V;
6940 : }
6941 0 : else if (me_part_0 == PART_V) {
6942 0 : context_ptr->nsq_table[2] = PART_VA;
6943 0 : context_ptr->nsq_table[3] = PART_VB;
6944 0 : context_ptr->nsq_table[4] = me_part_1 != PART_V4 ? PART_V4 : PART_H;
6945 : }
6946 0 : else if (me_part_0 == PART_H4) {
6947 0 : context_ptr->nsq_table[2] = PART_HA;
6948 0 : context_ptr->nsq_table[3] = PART_HB;
6949 0 : context_ptr->nsq_table[4] = me_part_1 != PART_H ? PART_H : PART_V;
6950 : }
6951 0 : else if (me_part_0 == PART_V4) {
6952 0 : context_ptr->nsq_table[2] = PART_VA;
6953 0 : context_ptr->nsq_table[3] = PART_VB;
6954 0 : context_ptr->nsq_table[4] = me_part_1 != PART_V ? PART_V : PART_H;
6955 : }
6956 0 : else if (me_part_0 == PART_S) {
6957 0 : context_ptr->nsq_table[2] = PART_VA;
6958 0 : context_ptr->nsq_table[3] = PART_HB;
6959 0 : context_ptr->nsq_table[4] = me_part_1 != PART_V ? PART_V : PART_H;
6960 : }
6961 : }
6962 : else {
6963 0 : context_ptr->nsq_table[0] = me_part_0;
6964 0 : if (me_part_0 == PART_H) {
6965 0 : context_ptr->nsq_table[1] = PART_HA;
6966 0 : context_ptr->nsq_table[2] = PART_HB;
6967 0 : context_ptr->nsq_table[3] = PART_H4;
6968 0 : context_ptr->nsq_table[4] = PART_V;
6969 : }
6970 0 : else if (me_part_0 == PART_V) {
6971 0 : context_ptr->nsq_table[1] = PART_VA;
6972 0 : context_ptr->nsq_table[2] = PART_VB;
6973 0 : context_ptr->nsq_table[3] = PART_V4;
6974 0 : context_ptr->nsq_table[4] = PART_H;
6975 : }
6976 0 : else if (me_part_0 == PART_H4) {
6977 0 : context_ptr->nsq_table[1] = PART_H;
6978 0 : context_ptr->nsq_table[2] = PART_HA;
6979 0 : context_ptr->nsq_table[3] = PART_HB;
6980 0 : context_ptr->nsq_table[4] = PART_V;
6981 : }
6982 0 : else if (me_part_0 == PART_V4) {
6983 0 : context_ptr->nsq_table[1] = PART_V;
6984 0 : context_ptr->nsq_table[2] = PART_VA;
6985 0 : context_ptr->nsq_table[3] = PART_VB;
6986 0 : context_ptr->nsq_table[4] = PART_H;
6987 : }
6988 0 : else if (me_part_0 == PART_S) {
6989 0 : context_ptr->nsq_table[1] = PART_HA;
6990 0 : context_ptr->nsq_table[2] = PART_VA;
6991 0 : context_ptr->nsq_table[3] = PART_HB;
6992 0 : context_ptr->nsq_table[4] = PART_VB;
6993 : }
6994 : }
6995 : // Insert predicted Shapes based on neighbor information
6996 0 : if (neighbor_part == PART_S && me_part_0 == PART_S && me_part_1 == PART_S) {
6997 0 : context_ptr->nsq_table[0] = PART_HA;
6998 0 : context_ptr->nsq_table[1] = PART_VA;
6999 0 : context_ptr->nsq_table[2] = PART_HB;
7000 0 : context_ptr->nsq_table[3] = PART_VB;
7001 0 : context_ptr->nsq_table[4] = PART_H4;
7002 0 : context_ptr->nsq_table[5] = PART_V4;
7003 : }
7004 : else {
7005 0 : if (neighbor_part != PART_N && neighbor_part != PART_S && neighbor_part != me_part_0 && neighbor_part != me_part_1) {
7006 0 : context_ptr->nsq_table[5] = context_ptr->nsq_table[4];
7007 0 : context_ptr->nsq_table[4] = context_ptr->nsq_table[3];
7008 0 : context_ptr->nsq_table[3] = context_ptr->nsq_table[2];
7009 0 : context_ptr->nsq_table[2] = context_ptr->nsq_table[1];
7010 0 : context_ptr->nsq_table[1] = context_ptr->nsq_table[0];
7011 0 : context_ptr->nsq_table[0] = neighbor_part;
7012 : }
7013 : else
7014 0 : context_ptr->nsq_table[5] = neighbor_part != PART_N && neighbor_part != PART_S ? neighbor_part : me_part_0;
7015 : }
7016 : #if MDC_ADAPTIVE_LEVEL
7017 0 : if (picture_control_set_ptr->parent_pcs_ptr->enable_adaptive_ol_partitioning) {
7018 : #else
7019 : if (picture_control_set_ptr->parent_pcs_ptr->mdc_depth_level < MAX_MDC_LEVEL) {
7020 : #endif
7021 0 : context_ptr->nsq_table[2] = context_ptr->nsq_table[0] != ol_part1 && context_ptr->nsq_table[1] != ol_part1 ? ol_part1
7022 0 : : context_ptr->nsq_table[0] != ol_part2 && context_ptr->nsq_table[1] != ol_part2 ? ol_part2
7023 : : ol_part3 != PART_N ? ol_part3 : context_ptr->nsq_table[2];
7024 0 : context_ptr->nsq_table[3] = context_ptr->nsq_table[0] != ol_part1 && context_ptr->nsq_table[1] != ol_part1 && context_ptr->nsq_table[2] != ol_part1 ? ol_part1
7025 0 : : context_ptr->nsq_table[0] != ol_part2 && context_ptr->nsq_table[1] != ol_part2 && context_ptr->nsq_table[2] != ol_part2 ? ol_part2
7026 0 : : context_ptr->nsq_table[0] != ol_part3 && context_ptr->nsq_table[1] != ol_part3 && context_ptr->nsq_table[2] != ol_part3 ? ol_part3
7027 : : ol_part4 != PART_N ? ol_part4 : context_ptr->nsq_table[3];
7028 0 : context_ptr->nsq_table[4] = context_ptr->nsq_table[0] != ol_part1 && context_ptr->nsq_table[1] != ol_part1 && context_ptr->nsq_table[2] != ol_part1 && context_ptr->nsq_table[3] != ol_part1 ? ol_part1
7029 0 : : context_ptr->nsq_table[0] != ol_part2 && context_ptr->nsq_table[1] != ol_part2 && context_ptr->nsq_table[2] != ol_part2 && context_ptr->nsq_table[3] != ol_part2 ? ol_part2
7030 0 : : context_ptr->nsq_table[0] != ol_part3 && context_ptr->nsq_table[1] != ol_part3 && context_ptr->nsq_table[2] != ol_part3 && context_ptr->nsq_table[3] != ol_part3 ? ol_part3
7031 0 : : context_ptr->nsq_table[0] != ol_part4 && context_ptr->nsq_table[1] != ol_part4 && context_ptr->nsq_table[2] != ol_part4 && context_ptr->nsq_table[3] != ol_part4 ? ol_part4
7032 : : ol_part5 != PART_N ? ol_part5 : context_ptr->nsq_table[4];
7033 0 : context_ptr->nsq_table[5] = context_ptr->nsq_table[0] != ol_part1 && context_ptr->nsq_table[1] != ol_part1 && context_ptr->nsq_table[2] != ol_part1 && context_ptr->nsq_table[3] != ol_part1 && context_ptr->nsq_table[4] != ol_part1 ? ol_part1
7034 0 : : context_ptr->nsq_table[0] != ol_part2 && context_ptr->nsq_table[1] != ol_part2 && context_ptr->nsq_table[2] != ol_part2 && context_ptr->nsq_table[3] != ol_part2 && context_ptr->nsq_table[4] != ol_part2 ? ol_part2
7035 0 : : context_ptr->nsq_table[0] != ol_part3 && context_ptr->nsq_table[1] != ol_part3 && context_ptr->nsq_table[2] != ol_part3 && context_ptr->nsq_table[3] != ol_part3 && context_ptr->nsq_table[4] != ol_part3 ? ol_part3
7036 0 : : context_ptr->nsq_table[0] != ol_part4 && context_ptr->nsq_table[1] != ol_part4 && context_ptr->nsq_table[2] != ol_part4 && context_ptr->nsq_table[3] != ol_part4 && context_ptr->nsq_table[4] != ol_part4 ? ol_part4
7037 0 : : context_ptr->nsq_table[0] != ol_part5 && context_ptr->nsq_table[1] != ol_part5 && context_ptr->nsq_table[2] != ol_part5 && context_ptr->nsq_table[3] != ol_part5 && context_ptr->nsq_table[4] != ol_part5 ? ol_part5
7038 : : ol_part6 != PART_N ? ol_part6 : context_ptr->nsq_table[5];
7039 0 : context_ptr->nsq_table[6] = context_ptr->nsq_table[0] != ol_part1 && context_ptr->nsq_table[1] != ol_part1 && context_ptr->nsq_table[2] != ol_part1 && context_ptr->nsq_table[3] != ol_part1 && context_ptr->nsq_table[4] != ol_part1 && context_ptr->nsq_table[5] != ol_part1 ? ol_part1
7040 0 : : context_ptr->nsq_table[0] != ol_part2 && context_ptr->nsq_table[1] != ol_part2 && context_ptr->nsq_table[2] != ol_part2 && context_ptr->nsq_table[3] != ol_part2 && context_ptr->nsq_table[4] != ol_part2 && context_ptr->nsq_table[5] != ol_part2 ? ol_part2
7041 0 : : context_ptr->nsq_table[0] != ol_part3 && context_ptr->nsq_table[1] != ol_part3 && context_ptr->nsq_table[2] != ol_part3 && context_ptr->nsq_table[3] != ol_part3 && context_ptr->nsq_table[4] != ol_part3 && context_ptr->nsq_table[5] != ol_part3 ? ol_part3
7042 0 : : context_ptr->nsq_table[0] != ol_part4 && context_ptr->nsq_table[1] != ol_part4 && context_ptr->nsq_table[2] != ol_part4 && context_ptr->nsq_table[3] != ol_part4 && context_ptr->nsq_table[4] != ol_part4 && context_ptr->nsq_table[5] != ol_part4 ? ol_part4
7043 0 : : context_ptr->nsq_table[0] != ol_part5 && context_ptr->nsq_table[1] != ol_part5 && context_ptr->nsq_table[2] != ol_part5 && context_ptr->nsq_table[3] != ol_part5 && context_ptr->nsq_table[4] != ol_part5 && context_ptr->nsq_table[5] != ol_part5 ? ol_part5
7044 0 : : context_ptr->nsq_table[0] != ol_part6 && context_ptr->nsq_table[1] != ol_part6 && context_ptr->nsq_table[2] != ol_part6 && context_ptr->nsq_table[3] != ol_part6 && context_ptr->nsq_table[4] != ol_part6 && context_ptr->nsq_table[5] != ol_part6 ? ol_part6
7045 : : ol_part7;
7046 :
7047 : // Replace PART_N by best MDC.
7048 0 : for (uint8_t idx = 0; idx < NSQ_TAB_SIZE; idx++) {
7049 0 : if (context_ptr->nsq_table[idx] == PART_N) {
7050 0 : context_ptr->nsq_table[idx] = ol_part1 != PART_N ? ol_part1 :
7051 : ol_part2 != PART_N ? ol_part2 :
7052 : ol_part3 != PART_N ? ol_part3 :
7053 : ol_part4 != PART_N ? ol_part4 :
7054 : ol_part5 != PART_N ? ol_part5 :
7055 : ol_part6 != PART_N ? ol_part6 :
7056 : ol_part7 != PART_N ? ol_part7 : ol_part8;
7057 0 : break;
7058 : }
7059 : }
7060 : }
7061 : // Remove duplicate candidates
7062 0 : for (int pidx = 0; pidx < NSQ_TAB_SIZE; pidx++)
7063 0 : cnt[context_ptr->nsq_table[pidx]]++;
7064 0 : cnt[context_ptr->nsq_table[0]] = 1;
7065 0 : for (int iter = 0; iter < NSQ_TAB_SIZE - 1; iter++) {
7066 0 : for (int idx = 1 + iter; idx < NSQ_TAB_SIZE; idx++) {
7067 0 : if (context_ptr->nsq_table[iter] != context_ptr->nsq_table[idx])
7068 0 : continue;
7069 : else {
7070 0 : for (int i = idx; i < NSQ_TAB_SIZE; i++) {
7071 0 : if (idx < NSQ_TAB_SIZE - 1)
7072 0 : context_ptr->nsq_table[idx] = context_ptr->nsq_table[idx + 1];
7073 0 : else if (idx == NSQ_TAB_SIZE - 1) {
7074 0 : for (int pidx = 1; pidx < PART_S; pidx++) {
7075 0 : if (cnt[pidx] == 0)
7076 0 : context_ptr->nsq_table[idx] = (PART)pidx;
7077 : }
7078 : }
7079 : }
7080 : }
7081 : }
7082 : }
7083 0 : }
7084 : #endif
7085 :
7086 : /****************************************************
7087 : * Reorder the nsq_table in order to keep the most
7088 : * probable Shape to be selected in the lowest index
7089 : ****************************************************/
7090 0 : void order_nsq_table(
7091 : PictureControlSet *picture_control_set_ptr,
7092 : ModeDecisionContext *context_ptr,
7093 : const SequenceControlSet *sequence_control_set_ptr,
7094 : LargestCodingUnit *sb_ptr,
7095 : NeighborArrayUnit *leaf_partition_neighbor_array) {
7096 0 : FrameHeader *frm_hdr = &picture_control_set_ptr->parent_pcs_ptr->frm_hdr;
7097 0 : const uint32_t lcuAddr = sb_ptr->index;
7098 0 : EbBool isCompoundEnabled = (frm_hdr->reference_mode == SINGLE_REFERENCE) ? 0 : 1;
7099 : uint32_t me_sb_addr;
7100 : uint32_t me2Nx2NTableOffset;
7101 : uint32_t max_number_of_pus_per_sb;
7102 0 : uint32_t geom_offset_x = 0;
7103 0 : uint32_t geom_offset_y = 0;
7104 0 : uint8_t cnt[PART_S + 1] = { 0 };
7105 0 : if (sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128) {
7106 0 : uint32_t me_sb_size = sequence_control_set_ptr->sb_sz;
7107 0 : uint32_t me_pic_width_in_sb = (sequence_control_set_ptr->seq_header.max_frame_width + sequence_control_set_ptr->sb_sz - 1) / me_sb_size;
7108 0 : uint32_t me_sb_x = (context_ptr->cu_origin_x / me_sb_size);
7109 0 : uint32_t me_sb_y = (context_ptr->cu_origin_y / me_sb_size);
7110 0 : me_sb_addr = me_sb_x + me_sb_y * me_pic_width_in_sb;
7111 0 : geom_offset_x = (me_sb_x & 0x1) * me_sb_size;
7112 0 : geom_offset_y = (me_sb_y & 0x1) * me_sb_size;
7113 : }
7114 : else
7115 0 : me_sb_addr = lcuAddr;
7116 0 : max_number_of_pus_per_sb = picture_control_set_ptr->parent_pcs_ptr->max_number_of_pus_per_sb;
7117 0 : me2Nx2NTableOffset = (context_ptr->blk_geom->bwidth == 4 || context_ptr->blk_geom->bheight == 4 || context_ptr->blk_geom->bwidth == 128 || context_ptr->blk_geom->bheight == 128) ? 0 :
7118 :
7119 0 : get_me_info_index(
7120 : max_number_of_pus_per_sb,
7121 : context_ptr->blk_geom,
7122 : geom_offset_x,
7123 : geom_offset_y);
7124 :
7125 0 : const MeLcuResults *me_results = picture_control_set_ptr->parent_pcs_ptr->me_results[me_sb_addr];
7126 0 : uint8_t nsq0 = me_results->me_nsq_0[me2Nx2NTableOffset];
7127 0 : uint8_t nsq1 = me_results->me_nsq_1[me2Nx2NTableOffset];
7128 0 : uint8_t me_part_0 = nsq0 == 0 ? PART_N : nsq0 == 1 ? PART_H : nsq0 == 2 ? PART_V : nsq0 == 3 ? PART_H4 : nsq0 == 4 ? PART_V4 : nsq0 == 5 ? PART_S : 0;
7129 0 : uint8_t me_part_1 = nsq1 == 0 ? PART_N : nsq1 == 1 ? PART_H : nsq1 == 2 ? PART_V : nsq1 == 3 ? PART_H4 : nsq1 == 4 ? PART_V4 : nsq1 == 5 ? PART_S : 0;
7130 :
7131 : // Generate Partition context
7132 0 : uint32_t partition_left_neighbor_index = get_neighbor_array_unit_left_index(
7133 : leaf_partition_neighbor_array,
7134 0 : context_ptr->cu_origin_y);
7135 0 : uint32_t partition_above_neighbor_index = get_neighbor_array_unit_top_index(
7136 : leaf_partition_neighbor_array,
7137 0 : context_ptr->cu_origin_x);
7138 0 : const PartitionContextType above_ctx = (((PartitionContext*)leaf_partition_neighbor_array->top_array)[partition_above_neighbor_index].above == (int8_t)INVALID_NEIGHBOR_DATA) ?
7139 0 : 0 : ((PartitionContext*)leaf_partition_neighbor_array->top_array)[partition_above_neighbor_index].above;
7140 0 : const PartitionContextType left_ctx = (((PartitionContext*)leaf_partition_neighbor_array->left_array)[partition_left_neighbor_index].left == (int8_t)INVALID_NEIGHBOR_DATA) ?
7141 0 : 0 : ((PartitionContext*)leaf_partition_neighbor_array->left_array)[partition_left_neighbor_index].left;
7142 :
7143 0 : PART neighbor_part = get_partition_shape(
7144 : above_ctx,
7145 : left_ctx,
7146 0 : context_ptr->blk_geom->bwidth,
7147 0 : context_ptr->blk_geom->bheight);
7148 :
7149 : //init table
7150 0 : context_ptr->nsq_table[0] = PART_H;
7151 0 : context_ptr->nsq_table[1] = PART_V;
7152 0 : context_ptr->nsq_table[2] = PART_HA;
7153 0 : context_ptr->nsq_table[3] = PART_HB;
7154 0 : context_ptr->nsq_table[4] = PART_VA;
7155 0 : context_ptr->nsq_table[5] = PART_VB;
7156 0 : context_ptr->nsq_table[6] = PART_H4;
7157 0 : context_ptr->nsq_table[7] = PART_V4;
7158 :
7159 0 : if (isCompoundEnabled == 0) me_part_1 = me_part_0;
7160 :
7161 : // Insert predicted Shapes based on ME information
7162 0 : if (me_part_0 != me_part_1) {
7163 0 : context_ptr->nsq_table[0] = me_part_0;
7164 0 : context_ptr->nsq_table[1] = me_part_1;
7165 :
7166 0 : if (me_part_0 == PART_H) {
7167 0 : context_ptr->nsq_table[2] = PART_HA;
7168 0 : context_ptr->nsq_table[3] = PART_HB;
7169 0 : context_ptr->nsq_table[4] = me_part_1 != PART_H4 ? PART_H4 : PART_V;
7170 : }
7171 0 : else if (me_part_0 == PART_V) {
7172 0 : context_ptr->nsq_table[2] = PART_VA;
7173 0 : context_ptr->nsq_table[3] = PART_VB;
7174 0 : context_ptr->nsq_table[4] = me_part_1 != PART_V4 ? PART_V4 : PART_H;
7175 : }
7176 0 : else if (me_part_0 == PART_H4) {
7177 0 : context_ptr->nsq_table[2] = PART_HA;
7178 0 : context_ptr->nsq_table[3] = PART_HB;
7179 0 : context_ptr->nsq_table[4] = me_part_1 != PART_H ? PART_H : PART_V;
7180 : }
7181 0 : else if (me_part_0 == PART_V4) {
7182 0 : context_ptr->nsq_table[2] = PART_VA;
7183 0 : context_ptr->nsq_table[3] = PART_VB;
7184 0 : context_ptr->nsq_table[4] = me_part_1 != PART_V ? PART_V : PART_H;
7185 : }
7186 0 : else if (me_part_0 == PART_S) {
7187 0 : context_ptr->nsq_table[2] = PART_VA;
7188 0 : context_ptr->nsq_table[3] = PART_HB;
7189 0 : context_ptr->nsq_table[4] = me_part_1 != PART_V ? PART_V : PART_H;
7190 : }
7191 : }
7192 : else {
7193 0 : context_ptr->nsq_table[0] = me_part_0;
7194 0 : if (me_part_0 == PART_H) {
7195 0 : context_ptr->nsq_table[1] = PART_HA;
7196 0 : context_ptr->nsq_table[2] = PART_HB;
7197 0 : context_ptr->nsq_table[3] = PART_H4;
7198 0 : context_ptr->nsq_table[4] = PART_V;
7199 : }
7200 0 : else if (me_part_0 == PART_V) {
7201 0 : context_ptr->nsq_table[1] = PART_VA;
7202 0 : context_ptr->nsq_table[2] = PART_VB;
7203 0 : context_ptr->nsq_table[3] = PART_V4;
7204 0 : context_ptr->nsq_table[4] = PART_H;
7205 : }
7206 0 : else if (me_part_0 == PART_H4) {
7207 0 : context_ptr->nsq_table[1] = PART_H;
7208 0 : context_ptr->nsq_table[2] = PART_HA;
7209 0 : context_ptr->nsq_table[3] = PART_HB;
7210 0 : context_ptr->nsq_table[4] = PART_V;
7211 : }
7212 0 : else if (me_part_0 == PART_V4) {
7213 0 : context_ptr->nsq_table[1] = PART_V;
7214 0 : context_ptr->nsq_table[2] = PART_VA;
7215 0 : context_ptr->nsq_table[3] = PART_VB;
7216 0 : context_ptr->nsq_table[4] = PART_H;
7217 : }
7218 0 : else if (me_part_0 == PART_S) {
7219 0 : context_ptr->nsq_table[1] = PART_HA;
7220 0 : context_ptr->nsq_table[2] = PART_VA;
7221 0 : context_ptr->nsq_table[3] = PART_HB;
7222 0 : context_ptr->nsq_table[4] = PART_VB;
7223 : }
7224 : }
7225 :
7226 : // Insert predicted Shapes based on neighbor information
7227 0 : if (neighbor_part == PART_S && me_part_0 == PART_S && me_part_1 == PART_S) {
7228 0 : context_ptr->nsq_table[0] = PART_HA;
7229 0 : context_ptr->nsq_table[1] = PART_VA;
7230 0 : context_ptr->nsq_table[2] = PART_HB;
7231 0 : context_ptr->nsq_table[3] = PART_VB;
7232 0 : context_ptr->nsq_table[4] = PART_H4;
7233 0 : context_ptr->nsq_table[5] = PART_V4;
7234 : }
7235 : else {
7236 0 : if (neighbor_part != PART_N && neighbor_part != PART_S && neighbor_part != me_part_0 && neighbor_part != me_part_1) {
7237 0 : context_ptr->nsq_table[5] = context_ptr->nsq_table[4];
7238 0 : context_ptr->nsq_table[4] = context_ptr->nsq_table[3];
7239 0 : context_ptr->nsq_table[3] = context_ptr->nsq_table[2];
7240 0 : context_ptr->nsq_table[2] = context_ptr->nsq_table[1];
7241 0 : context_ptr->nsq_table[1] = context_ptr->nsq_table[0];
7242 0 : context_ptr->nsq_table[0] = neighbor_part;
7243 : }
7244 : else
7245 0 : context_ptr->nsq_table[5] = neighbor_part != PART_N && neighbor_part != PART_S ? neighbor_part : me_part_0;
7246 : }
7247 :
7248 : // Remove duplicate candidates
7249 0 : for (int pidx = 0; pidx < NSQ_TAB_SIZE; pidx++)
7250 0 : cnt[context_ptr->nsq_table[pidx]]++;
7251 0 : cnt[context_ptr->nsq_table[0]] = 1;
7252 0 : for (int iter = 0; iter < NSQ_TAB_SIZE - 1; iter++) {
7253 0 : for (int idx = 1 + iter; idx < NSQ_TAB_SIZE; idx++) {
7254 0 : if (context_ptr->nsq_table[iter] != context_ptr->nsq_table[idx])
7255 0 : continue;
7256 : else {
7257 0 : for (int i = idx; i < NSQ_TAB_SIZE; i++) {
7258 0 : if (idx < NSQ_TAB_SIZE - 1)
7259 0 : context_ptr->nsq_table[idx] = context_ptr->nsq_table[idx + 1];
7260 0 : else if (idx == NSQ_TAB_SIZE - 1) {
7261 0 : for (int pidx = 1; pidx < PART_S; pidx++) {
7262 0 : if (cnt[pidx] == 0)
7263 0 : context_ptr->nsq_table[idx] = (PART)pidx;
7264 : }
7265 : }
7266 : }
7267 : }
7268 : }
7269 : }
7270 0 : }
7271 0 : uint8_t check_skip_sub_blks(
7272 : PictureControlSet *picture_control_set_ptr,
7273 : ModeDecisionContext *context_ptr,
7274 : CodingUnit *cu_ptr,
7275 : uint8_t is_complete_sb,
7276 : uint32_t sb_index) {
7277 0 : uint8_t skip_sub_blocks = 0;
7278 0 : if (picture_control_set_ptr->parent_pcs_ptr->pic_depth_mode == PIC_OPEN_LOOP_DEPTH_MODE || (picture_control_set_ptr->parent_pcs_ptr->pic_depth_mode == PIC_SB_SWITCH_DEPTH_MODE && picture_control_set_ptr->parent_pcs_ptr->sb_depth_mode_array[sb_index] >= SB_OPEN_LOOP_DEPTH_MODE))
7279 0 : if (is_complete_sb)
7280 0 : if ((context_ptr->md_local_cu_unit[cu_ptr->mds_idx].top_neighbor_depth == context_ptr->blk_geom->bsize) && (context_ptr->md_local_cu_unit[cu_ptr->mds_idx].left_neighbor_depth == context_ptr->blk_geom->bsize)) {
7281 0 : skip_sub_blocks = 1;
7282 0 : context_ptr->md_cu_arr_nsq[context_ptr->blk_geom->sqi_mds].split_flag = 0;
7283 : }
7284 0 : return skip_sub_blocks;
7285 : }
7286 :
7287 : // Hsan (chroma search) : av1_get_tx_type() to define as extern
7288 82392 : void search_best_independent_uv_mode(
7289 : PictureControlSet *picture_control_set_ptr,
7290 : EbPictureBufferDesc *input_picture_ptr,
7291 : uint32_t inputCbOriginIndex,
7292 : uint32_t cuChromaOriginIndex,
7293 : ModeDecisionContext *context_ptr)
7294 : {
7295 82392 : FrameHeader *frm_hdr = &picture_control_set_ptr->parent_pcs_ptr->frm_hdr;
7296 : // Start uv search path
7297 82392 : context_ptr->uv_search_path = EB_TRUE;
7298 : #if !PAETH_HBD
7299 : uint8_t is_16_bit = (sequence_control_set_ptr->static_config.encoder_bit_depth > EB_8BIT);
7300 : #endif
7301 82392 : EbBool use_angle_delta = av1_use_angle_delta(context_ptr->blk_geom->bsize);
7302 :
7303 : UvPredictionMode uv_mode;
7304 :
7305 : int coeff_rate[UV_PAETH_PRED + 1][(MAX_ANGLE_DELTA << 1) + 1];
7306 : int distortion[UV_PAETH_PRED + 1][(MAX_ANGLE_DELTA << 1) + 1];
7307 :
7308 : // Use the 1st spot of the candidate buffer to hold cfl settings to use same kernel as MD for coef cost estimation
7309 82392 : ModeDecisionCandidateBuffer *candidate_buffer = &(context_ptr->candidate_buffer_ptr_array[0][0]);
7310 82392 : candidate_buffer->candidate_ptr = &(context_ptr->fast_candidate_array[0]);
7311 82392 : candidate_buffer->candidate_ptr->type = INTRA_MODE;
7312 82392 : candidate_buffer->candidate_ptr->distortion_ready = 0;
7313 82392 : candidate_buffer->candidate_ptr->use_intrabc = 0;
7314 82392 : candidate_buffer->candidate_ptr->angle_delta[PLANE_TYPE_UV] = 0;
7315 :
7316 82392 : uint8_t uv_mode_start = UV_DC_PRED;
7317 : #if PAETH_HBD
7318 82392 : uint8_t uv_mode_end = UV_PAETH_PRED;
7319 : #else
7320 : uint8_t uv_mode_end = is_16_bit ? UV_SMOOTH_H_PRED : UV_PAETH_PRED;
7321 : #endif
7322 1153420 : for (uv_mode = uv_mode_start; uv_mode <= uv_mode_end; uv_mode++) {
7323 1071050 : uint8_t uv_angleDeltaCandidateCount = (use_angle_delta && av1_is_directional_mode((PredictionMode)uv_mode)) ? 7 : 1;
7324 1071050 : uint8_t uv_angle_delta_shift = 1;
7325 :
7326 5126890 : for (uint8_t uv_angleDeltaCounter = 0; uv_angleDeltaCounter < uv_angleDeltaCandidateCount; ++uv_angleDeltaCounter) {
7327 4055860 : int32_t uv_angle_delta = CLIP(uv_angle_delta_shift * (uv_angleDeltaCandidateCount == 1 ? 0 : uv_angleDeltaCounter - (uv_angleDeltaCandidateCount >> 1)), -MAX_ANGLE_DELTA, MAX_ANGLE_DELTA);
7328 : #if RDOQ_CHROMA
7329 4055860 : candidate_buffer->candidate_ptr->pred_mode = DC_PRED;
7330 : #endif
7331 4055860 : candidate_buffer->candidate_ptr->intra_chroma_mode = uv_mode;
7332 4055860 : candidate_buffer->candidate_ptr->is_directional_chroma_mode_flag = (uint8_t)av1_is_directional_mode((PredictionMode)uv_mode);
7333 4055610 : candidate_buffer->candidate_ptr->angle_delta[PLANE_TYPE_UV] = uv_angle_delta;
7334 4055610 : candidate_buffer->candidate_ptr->tx_depth = 0;
7335 :
7336 8111270 : candidate_buffer->candidate_ptr->transform_type_uv =
7337 4055610 : av1_get_tx_type(
7338 4055610 : context_ptr->blk_geom->bsize,
7339 : 0,
7340 : (PredictionMode)NULL,
7341 : (UvPredictionMode)uv_mode,
7342 : PLANE_TYPE_UV,
7343 : 0,
7344 : 0,
7345 : 0,
7346 4055610 : context_ptr->blk_geom->txsize_uv[0][0],
7347 4055610 : frm_hdr->reduced_tx_set);
7348 :
7349 4055670 : uint16_t cb_qp = context_ptr->qp;
7350 4055670 : uint16_t cr_qp = context_ptr->qp;
7351 4055670 : uint64_t cb_coeff_bits = 0;
7352 4055670 : uint64_t cr_coeff_bits = 0;
7353 4055670 : uint64_t cbFullDistortion[DIST_CALC_TOTAL] = { 0, 0 };
7354 4055670 : uint64_t crFullDistortion[DIST_CALC_TOTAL] = { 0, 0 };
7355 :
7356 : uint32_t count_non_zero_coeffs[3][MAX_NUM_OF_TU_PER_CU];
7357 4055670 : context_ptr->md_staging_skip_inter_chroma_pred = EB_FALSE;
7358 4055670 : ProductPredictionFunTable[candidate_buffer->candidate_ptr->type](
7359 : context_ptr,
7360 : picture_control_set_ptr,
7361 : candidate_buffer);
7362 :
7363 : //Cb Residual
7364 4055980 : residual_kernel(
7365 : input_picture_ptr->buffer_cb,
7366 : inputCbOriginIndex,
7367 4055980 : input_picture_ptr->stride_cb,
7368 4055980 : candidate_buffer->prediction_ptr->buffer_cb,
7369 : cuChromaOriginIndex,
7370 4055980 : candidate_buffer->prediction_ptr->stride_cb,
7371 4055980 : (int16_t*)candidate_buffer->residual_ptr->buffer_cb,
7372 : cuChromaOriginIndex,
7373 4055980 : candidate_buffer->residual_ptr->stride_cb,
7374 4055980 : context_ptr->hbd_mode_decision,
7375 4055980 : context_ptr->blk_geom->bwidth_uv,
7376 4055980 : context_ptr->blk_geom->bheight_uv);
7377 :
7378 : //Cr Residual
7379 4055780 : residual_kernel(
7380 : input_picture_ptr->buffer_cr,
7381 : inputCbOriginIndex,
7382 4055780 : input_picture_ptr->stride_cr,
7383 4055780 : candidate_buffer->prediction_ptr->buffer_cr,
7384 : cuChromaOriginIndex,
7385 4055780 : candidate_buffer->prediction_ptr->stride_cr,
7386 4055780 : (int16_t*)candidate_buffer->residual_ptr->buffer_cr,
7387 : cuChromaOriginIndex,
7388 4055780 : candidate_buffer->residual_ptr->stride_cr,
7389 4055780 : context_ptr->hbd_mode_decision,
7390 4055780 : context_ptr->blk_geom->bwidth_uv,
7391 4055780 : context_ptr->blk_geom->bheight_uv);
7392 :
7393 4055780 : full_loop_r(
7394 : context_ptr->sb_ptr,
7395 : candidate_buffer,
7396 : context_ptr,
7397 : input_picture_ptr,
7398 : picture_control_set_ptr,
7399 : PICTURE_BUFFER_DESC_CHROMA_MASK,
7400 : cb_qp,
7401 : cr_qp,
7402 : &(*count_non_zero_coeffs[1]),
7403 : &(*count_non_zero_coeffs[2]));
7404 :
7405 4055780 : cu_full_distortion_fast_tu_mode_r(
7406 : context_ptr->sb_ptr,
7407 : candidate_buffer,
7408 : context_ptr,
7409 : candidate_buffer->candidate_ptr,
7410 : picture_control_set_ptr,
7411 : input_picture_ptr,
7412 : cbFullDistortion,
7413 : crFullDistortion,
7414 : count_non_zero_coeffs,
7415 : COMPONENT_CHROMA,
7416 : &cb_coeff_bits,
7417 : &cr_coeff_bits,
7418 : 1);
7419 :
7420 4055840 : coeff_rate[uv_mode][MAX_ANGLE_DELTA + uv_angle_delta] = (int)(cb_coeff_bits + cr_coeff_bits);
7421 4055840 : distortion[uv_mode][MAX_ANGLE_DELTA + uv_angle_delta] = (int)(cbFullDistortion[DIST_CALC_RESIDUAL] + crFullDistortion[DIST_CALC_RESIDUAL]);
7422 : }
7423 : }
7424 :
7425 82371 : uint8_t intra_mode_start = DC_PRED;
7426 : #if PAETH_HBD
7427 82371 : uint8_t intra_mode_end = PAETH_PRED;
7428 : #else
7429 : uint8_t intra_mode_end = is_16_bit ? SMOOTH_H_PRED : PAETH_PRED;
7430 : #endif
7431 : // Loop over all intra mode, then over all uv move to derive the best uv mode for a given intra mode in term of rate
7432 818838 : for (uint8_t intra_mode = intra_mode_start; intra_mode <= intra_mode_end; ++intra_mode) {
7433 1071060 : uint8_t angleDeltaCandidateCount = (use_angle_delta && av1_is_directional_mode((PredictionMode)intra_mode)) ? 7 : 1;
7434 1071050 : uint8_t angle_delta_shift = 1;
7435 :
7436 4792210 : for (uint8_t angleDeltaCounter = 0; angleDeltaCounter < angleDeltaCandidateCount; ++angleDeltaCounter) {
7437 4055740 : int32_t angle_delta = CLIP(angle_delta_shift * (angleDeltaCandidateCount == 1 ? 0 : angleDeltaCounter - (angleDeltaCandidateCount >> 1)), -MAX_ANGLE_DELTA, MAX_ANGLE_DELTA);
7438 :
7439 4055740 : candidate_buffer->candidate_ptr->type = INTRA_MODE;
7440 4055740 : candidate_buffer->candidate_ptr->intra_luma_mode = intra_mode;
7441 4055740 : candidate_buffer->candidate_ptr->distortion_ready = 0;
7442 4055740 : candidate_buffer->candidate_ptr->use_intrabc = 0;
7443 4055740 : candidate_buffer->candidate_ptr->is_directional_mode_flag = (uint8_t)av1_is_directional_mode((PredictionMode)intra_mode);
7444 4055520 : candidate_buffer->candidate_ptr->angle_delta[PLANE_TYPE_Y] = angle_delta;
7445 4055520 : candidate_buffer->candidate_ptr->cfl_alpha_signs = 0;
7446 4055520 : candidate_buffer->candidate_ptr->cfl_alpha_idx = 0;
7447 : // This kernel assumes no atb
7448 4055520 : candidate_buffer->candidate_ptr->transform_type[0] = DCT_DCT;
7449 4055520 : candidate_buffer->candidate_ptr->ref_frame_type = INTRA_FRAME;
7450 4055520 : candidate_buffer->candidate_ptr->pred_mode = (PredictionMode)intra_mode;
7451 4055520 : candidate_buffer->candidate_ptr->motion_mode = SIMPLE_TRANSLATION;
7452 :
7453 : //int32_t p_angle = mode_to_angle_map[(PredictionMode)openLoopIntraCandidate] + angle_delta * ANGLE_STEP;
7454 : //if (!disable_z2_prediction || (p_angle <= 90 || p_angle >= 180)) {
7455 : // uv mode loop
7456 4055520 : context_ptr->best_uv_cost[intra_mode][MAX_ANGLE_DELTA + angle_delta] = (uint64_t)~0;
7457 56384800 : for (uv_mode = uv_mode_start; uv_mode <= uv_mode_end; uv_mode++) {
7458 52663700 : uint8_t uv_angleDeltaCandidateCount = (use_angle_delta && av1_is_directional_mode((PredictionMode)uv_mode)) ? 7 : 1;
7459 52660900 : uint8_t uv_angle_delta_shift = 1;
7460 :
7461 285010000 : for (uint8_t uv_angleDeltaCounter = 0; uv_angleDeltaCounter < uv_angleDeltaCandidateCount; ++uv_angleDeltaCounter) {
7462 232680000 : int32_t uv_angle_delta = CLIP(uv_angle_delta_shift * (uv_angleDeltaCandidateCount == 1 ? 0 : uv_angleDeltaCounter - (uv_angleDeltaCandidateCount >> 1)), -MAX_ANGLE_DELTA, MAX_ANGLE_DELTA);
7463 :
7464 232680000 : candidate_buffer->candidate_ptr->intra_chroma_mode = uv_mode;
7465 232680000 : candidate_buffer->candidate_ptr->is_directional_chroma_mode_flag = (uint8_t)av1_is_directional_mode((PredictionMode)uv_mode);
7466 232326000 : candidate_buffer->candidate_ptr->angle_delta[PLANE_TYPE_UV] = uv_angle_delta;
7467 :
7468 464786000 : candidate_buffer->candidate_ptr->transform_type_uv =
7469 232326000 : av1_get_tx_type(
7470 232326000 : context_ptr->blk_geom->bsize,
7471 : 0,
7472 232326000 : (PredictionMode)candidate_buffer->candidate_ptr->intra_luma_mode,
7473 232326000 : (UvPredictionMode)candidate_buffer->candidate_ptr->intra_chroma_mode,
7474 : PLANE_TYPE_UV,
7475 : 0,
7476 : 0,
7477 : 0,
7478 232326000 : context_ptr->blk_geom->txsize_uv[0][0],
7479 232326000 : frm_hdr->reduced_tx_set);
7480 :
7481 : // Fast Cost
7482 464809000 : *(candidate_buffer->fast_cost_ptr) = Av1ProductFastCostFuncTable[candidate_buffer->candidate_ptr->type](
7483 : context_ptr->cu_ptr,
7484 232460000 : candidate_buffer->candidate_ptr,
7485 232460000 : context_ptr->qp,
7486 : 0,
7487 : 0,
7488 : 0,
7489 : 0,
7490 : picture_control_set_ptr,
7491 232460000 : &(context_ptr->md_local_cu_unit[context_ptr->blk_geom->blkidx_mds].ed_ref_mv_stack[candidate_buffer->candidate_ptr->ref_frame_type][0]),
7492 : context_ptr->blk_geom,
7493 232460000 : context_ptr->cu_origin_y >> MI_SIZE_LOG2,
7494 232460000 : context_ptr->cu_origin_x >> MI_SIZE_LOG2,
7495 : 1,
7496 232460000 : context_ptr->intra_luma_left_mode,
7497 232460000 : context_ptr->intra_luma_top_mode);
7498 :
7499 232349000 : uint64_t rate = coeff_rate[uv_mode][MAX_ANGLE_DELTA + uv_angle_delta] + candidate_buffer->candidate_ptr->fast_luma_rate + candidate_buffer->candidate_ptr->fast_chroma_rate;
7500 232349000 : uint64_t uv_cost = RDCOST(context_ptr->full_lambda, rate, distortion[uv_mode][MAX_ANGLE_DELTA + uv_angle_delta]);
7501 :
7502 232349000 : if (uv_cost < context_ptr->best_uv_cost[intra_mode][MAX_ANGLE_DELTA + angle_delta]) {
7503 12342800 : context_ptr->best_uv_mode[intra_mode][MAX_ANGLE_DELTA + angle_delta] = uv_mode;
7504 12342800 : context_ptr->best_uv_angle[intra_mode][MAX_ANGLE_DELTA + angle_delta] = uv_angle_delta;
7505 :
7506 12342800 : context_ptr->best_uv_cost[intra_mode][MAX_ANGLE_DELTA + angle_delta] = uv_cost;
7507 12342800 : context_ptr->fast_luma_rate[intra_mode][MAX_ANGLE_DELTA + angle_delta] = candidate_buffer->candidate_ptr->fast_luma_rate;
7508 12342800 : context_ptr->fast_chroma_rate[intra_mode][MAX_ANGLE_DELTA + angle_delta] = candidate_buffer->candidate_ptr->fast_chroma_rate;
7509 : }
7510 : }
7511 : }
7512 : }
7513 : }
7514 :
7515 : // End uv search path
7516 0 : context_ptr->uv_search_path = EB_FALSE;
7517 0 : }
7518 : #if SPEED_OPT
7519 : #if !REMOVE_MD_STAGE_1
7520 : void inter_class_decision_count_1(
7521 : struct ModeDecisionContext *context_ptr
7522 : )
7523 : {
7524 : ModeDecisionCandidateBuffer **buffer_ptr_array = context_ptr->candidate_buffer_ptr_array;
7525 : // Distortion-based NIC pruning not applied to INTRA clases: CLASS_0 and CLASS
7526 : for (CAND_CLASS cand_class_it = CAND_CLASS_1; cand_class_it <= CAND_CLASS_3; cand_class_it++) {
7527 : if (context_ptr->md_stage_0_count[cand_class_it] > 0 && context_ptr->md_stage_1_count[cand_class_it] > 0) {
7528 : uint32_t *cand_buff_indices = context_ptr->cand_buff_indices[cand_class_it];
7529 : if (*(buffer_ptr_array[cand_buff_indices[0]]->fast_cost_ptr) < *(buffer_ptr_array[context_ptr->cand_buff_indices[CAND_CLASS_0][0]]->fast_cost_ptr)) {
7530 : uint32_t fast1_cand_count = 1;
7531 : while (fast1_cand_count < context_ptr->md_stage_1_count[cand_class_it] && ((((*(buffer_ptr_array[cand_buff_indices[fast1_cand_count]]->fast_cost_ptr) - *(buffer_ptr_array[cand_buff_indices[0]]->fast_cost_ptr)) * 100) / (*(buffer_ptr_array[cand_buff_indices[0]]->fast_cost_ptr))) < context_ptr->dist_base_md_stage_0_count_th)) {
7532 : fast1_cand_count++;
7533 : }
7534 : context_ptr->md_stage_1_count[cand_class_it] = fast1_cand_count;
7535 : }
7536 : }
7537 : }
7538 : }
7539 : #endif
7540 : extern aom_variance_fn_ptr_t mefn_ptr[BlockSizeS_ALL];
7541 : unsigned int eb_av1_get_sby_perpixel_variance(const aom_variance_fn_ptr_t *fn_ptr, const uint8_t *src, int stride, BlockSize bs);
7542 : #endif
7543 :
7544 : #if INTER_INTRA_CLASS_PRUNING
7545 :
7546 811372 : void interintra_class_pruning_1(ModeDecisionContext *context_ptr, uint64_t best_md_stage_cost) {
7547 :
7548 8113460 : for (CAND_CLASS cand_class_it = CAND_CLASS_0; cand_class_it < CAND_CLASS_TOTAL; cand_class_it++) {
7549 7302080 : if (context_ptr->md_stage_0_count[cand_class_it] > 0 && context_ptr->md_stage_1_count[cand_class_it] > 0) {
7550 3306330 : uint32_t *cand_buff_indices = context_ptr->cand_buff_indices[cand_class_it];
7551 3306330 : uint64_t class_best_cost = *(context_ptr->candidate_buffer_ptr_array[cand_buff_indices[0]]->fast_cost_ptr);
7552 :
7553 : // inter class pruning
7554 3306330 : if ((((class_best_cost - best_md_stage_cost) * 100) / best_md_stage_cost) > context_ptr->md_stage_1_class_prune_th){
7555 0 : context_ptr->md_stage_1_count[cand_class_it] = 0;
7556 0 : continue;
7557 : }
7558 : // intra class pruning
7559 3306330 : uint32_t cand_count = 1;
7560 33974000 : while (cand_count < context_ptr->md_stage_1_count[cand_class_it] && ((((*(context_ptr->candidate_buffer_ptr_array[cand_buff_indices[cand_count]]->fast_cost_ptr) - class_best_cost) * 100) / class_best_cost) < context_ptr->md_stage_1_cand_prune_th)) {
7561 30667700 : cand_count++;
7562 : }
7563 3306330 : context_ptr->md_stage_1_count[cand_class_it] = cand_count;
7564 : }
7565 7302080 : context_ptr->md_stage_1_total_count += context_ptr->md_stage_1_count[cand_class_it];
7566 : }
7567 811372 : }
7568 :
7569 811413 : void interintra_class_pruning_2(ModeDecisionContext *context_ptr, uint64_t best_md_stage_cost) {
7570 :
7571 8113690 : for (CAND_CLASS cand_class_it = CAND_CLASS_0; cand_class_it < CAND_CLASS_TOTAL; cand_class_it++) {
7572 7302280 : if (context_ptr->md_stage_1_count[cand_class_it] > 0 && context_ptr->md_stage_2_count[cand_class_it] > 0 && context_ptr->bypass_md_stage_1[cand_class_it] == EB_FALSE) {
7573 3160810 : uint32_t *cand_buff_indices = context_ptr->cand_buff_indices[cand_class_it];
7574 3160810 : uint64_t class_best_cost = *(context_ptr->candidate_buffer_ptr_array[cand_buff_indices[0]]->full_cost_ptr);
7575 :
7576 : // inter class pruning
7577 3160810 : if ((((class_best_cost - best_md_stage_cost) * 100) / best_md_stage_cost) > context_ptr->md_stage_2_class_prune_th) {
7578 1650830 : context_ptr->md_stage_2_count[cand_class_it] = 0;
7579 1650830 : continue;
7580 : }
7581 :
7582 : // intra class pruning
7583 1509980 : uint32_t cand_count = 1;
7584 3776060 : while (cand_count < context_ptr->md_stage_2_count[cand_class_it] && ((((*(context_ptr->candidate_buffer_ptr_array[cand_buff_indices[cand_count]]->full_cost_ptr) - class_best_cost) * 100) / class_best_cost) < context_ptr->md_stage_2_cand_prune_th)) {
7585 2266080 : cand_count++;
7586 : }
7587 1509980 : context_ptr->md_stage_2_count[cand_class_it] = cand_count;
7588 : }
7589 5651450 : context_ptr->md_stage_2_total_count += context_ptr->md_stage_2_count[cand_class_it];
7590 : }
7591 811413 : }
7592 :
7593 : #endif
7594 :
7595 811401 : void md_encode_block(
7596 : SequenceControlSet *sequence_control_set_ptr,
7597 : PictureControlSet *picture_control_set_ptr,
7598 : ModeDecisionContext *context_ptr,
7599 : EbPictureBufferDesc *input_picture_ptr,
7600 : SsMeContext *ss_mecontext,
7601 : uint8_t *skip_sub_blocks,
7602 : uint32_t lcuAddr,
7603 : ModeDecisionCandidateBuffer *bestcandidate_buffers[5])
7604 : {
7605 811401 : ModeDecisionCandidateBuffer **candidate_buffer_ptr_array_base = context_ptr->candidate_buffer_ptr_array;
7606 : ModeDecisionCandidateBuffer **candidate_buffer_ptr_array;
7607 811401 : const BlockGeom *blk_geom = context_ptr->blk_geom;
7608 : ModeDecisionCandidateBuffer *candidate_buffer;
7609 811401 : ModeDecisionCandidate *fast_candidate_array = context_ptr->fast_candidate_array;
7610 : uint32_t candidate_index;
7611 : uint32_t fast_candidate_total_count;
7612 811401 : uint32_t best_intra_mode = EB_INTRA_MODE_INVALID;
7613 811401 : const uint32_t inputOriginIndex = (context_ptr->cu_origin_y + input_picture_ptr->origin_y) * input_picture_ptr->stride_y + (context_ptr->cu_origin_x + input_picture_ptr->origin_x);
7614 :
7615 811401 : const uint32_t inputCbOriginIndex = ((context_ptr->round_origin_y >> 1) + (input_picture_ptr->origin_y >> 1)) * input_picture_ptr->stride_cb + ((context_ptr->round_origin_x >> 1) + (input_picture_ptr->origin_x >> 1));
7616 811401 : const uint32_t cuOriginIndex = blk_geom->origin_x + blk_geom->origin_y * SB_STRIDE_Y;
7617 811401 : const uint32_t cuChromaOriginIndex = ROUND_UV(blk_geom->origin_x) / 2 + ROUND_UV(blk_geom->origin_y) / 2 * SB_STRIDE_UV;
7618 811401 : CodingUnit * cu_ptr = context_ptr->cu_ptr;
7619 811401 : candidate_buffer_ptr_array = &(candidate_buffer_ptr_array_base[0]);
7620 25150500 : for (uint8_t ref_idx = 0; ref_idx < MAX_REF_TYPE_CAND; ref_idx++)
7621 24339100 : context_ptr->ref_best_cost_sq_table[ref_idx] = MAX_CU_COST;
7622 :
7623 : #if PREDICT_NSQ_SHAPE
7624 2326580 : EbBool is_nsq_table_used = (picture_control_set_ptr->slice_type == !I_SLICE &&
7625 703778 : picture_control_set_ptr->parent_pcs_ptr->pic_depth_mode <= PIC_ALL_C_DEPTH_MODE &&
7626 637579 : picture_control_set_ptr->parent_pcs_ptr->nsq_search_level >= NSQ_SEARCH_LEVEL1 &&
7627 1515180 : picture_control_set_ptr->parent_pcs_ptr->nsq_search_level < NSQ_SEARCH_FULL) ? EB_TRUE : EB_FALSE;
7628 :
7629 811401 : is_nsq_table_used = picture_control_set_ptr->parent_pcs_ptr->sc_content_detected || picture_control_set_ptr->enc_mode == ENC_M0 ? EB_FALSE : is_nsq_table_used;
7630 : #if ADJUST_NSQ_RANK_BASED_ON_NEIGH
7631 811401 : if (is_nsq_table_used) {
7632 0 : if (context_ptr->blk_geom->shape == PART_N) {
7633 : #if MDC_ADAPTIVE_LEVEL
7634 0 : if (picture_control_set_ptr->parent_pcs_ptr->enable_adaptive_ol_partitioning) {
7635 : #else
7636 : if (picture_control_set_ptr->parent_pcs_ptr->mdc_depth_level < MAX_MDC_LEVEL) {
7637 : #endif
7638 0 : adjust_nsq_rank(
7639 : picture_control_set_ptr,
7640 : context_ptr,
7641 : sequence_control_set_ptr,
7642 : context_ptr->sb_ptr,
7643 : context_ptr->leaf_partition_neighbor_array);
7644 : }
7645 : else {
7646 0 : order_nsq_table(
7647 : picture_control_set_ptr,
7648 : context_ptr,
7649 : sequence_control_set_ptr,
7650 : context_ptr->sb_ptr,
7651 : context_ptr->leaf_partition_neighbor_array);
7652 : }
7653 : }
7654 : }
7655 : #endif
7656 : #else
7657 : EbBool is_nsq_table_used = (picture_control_set_ptr->slice_type == !I_SLICE &&
7658 : picture_control_set_ptr->parent_pcs_ptr->pic_depth_mode <= PIC_ALL_C_DEPTH_MODE &&
7659 : picture_control_set_ptr->parent_pcs_ptr->nsq_search_level >= NSQ_SEARCH_LEVEL1 &&
7660 : picture_control_set_ptr->parent_pcs_ptr->nsq_search_level < NSQ_SEARCH_FULL) ? EB_TRUE : EB_FALSE;
7661 :
7662 : is_nsq_table_used = picture_control_set_ptr->enc_mode == ENC_M0 ? EB_FALSE : is_nsq_table_used;
7663 : if (is_nsq_table_used) {
7664 : if (context_ptr->blk_geom->shape == PART_N) {
7665 : order_nsq_table(
7666 : picture_control_set_ptr,
7667 : context_ptr,
7668 : sequence_control_set_ptr,
7669 : context_ptr->sb_ptr,
7670 : context_ptr->leaf_partition_neighbor_array);
7671 : }
7672 : }
7673 : #endif
7674 :
7675 811401 : uint8_t is_complete_sb = sequence_control_set_ptr->sb_geom[lcuAddr].is_complete_sb;
7676 :
7677 811414 : if (allowed_ns_cu(
7678 : #if COMBINE_MDC_NSQ_TABLE
7679 : #if MDC_ADAPTIVE_LEVEL
7680 811401 : picture_control_set_ptr->parent_pcs_ptr->enable_adaptive_ol_partitioning,
7681 : #else
7682 : picture_control_set_ptr->parent_pcs_ptr->mdc_depth_level,
7683 : #endif
7684 : #endif
7685 811401 : is_nsq_table_used, picture_control_set_ptr->parent_pcs_ptr->nsq_max_shapes_md, context_ptr, is_complete_sb))
7686 : {
7687 :
7688 : #if SPEED_OPT
7689 811414 : const aom_variance_fn_ptr_t *fn_ptr = &mefn_ptr[context_ptr->blk_geom->bsize];
7690 811414 : context_ptr->source_variance = eb_av1_get_sby_perpixel_variance(fn_ptr, (input_picture_ptr->buffer_y + inputOriginIndex), input_picture_ptr->stride_y, context_ptr->blk_geom->bsize);
7691 : #endif
7692 :
7693 811422 : cu_ptr->av1xd->tile.mi_col_start = context_ptr->sb_ptr->tile_info.mi_col_start;
7694 811422 : cu_ptr->av1xd->tile.mi_col_end = context_ptr->sb_ptr->tile_info.mi_col_end;
7695 811422 : cu_ptr->av1xd->tile.mi_row_start = context_ptr->sb_ptr->tile_info.mi_row_start;
7696 811422 : cu_ptr->av1xd->tile.mi_row_end = context_ptr->sb_ptr->tile_info.mi_row_end;
7697 :
7698 811422 : ProductCodingLoopInitFastLoop(
7699 : context_ptr,
7700 : context_ptr->skip_coeff_neighbor_array,
7701 : context_ptr->inter_pred_dir_neighbor_array,
7702 : context_ptr->ref_frame_type_neighbor_array,
7703 : context_ptr->intra_luma_mode_neighbor_array,
7704 : context_ptr->skip_flag_neighbor_array,
7705 : context_ptr->mode_type_neighbor_array,
7706 : context_ptr->leaf_depth_neighbor_array,
7707 : context_ptr->leaf_partition_neighbor_array);
7708 : // Skip sub blocks if the current block has the same depth as the left block and above block
7709 811418 : if (picture_control_set_ptr->parent_pcs_ptr->skip_sub_blks)
7710 0 : *skip_sub_blocks =check_skip_sub_blks(picture_control_set_ptr,
7711 : context_ptr,
7712 : cu_ptr,
7713 : is_complete_sb,
7714 : lcuAddr);
7715 :
7716 : // Initialize uv_search_path
7717 811418 : context_ptr->uv_search_path = EB_FALSE;
7718 : // Search the best independent intra chroma mode
7719 811418 : if (context_ptr->chroma_level == CHROMA_MODE_0) {
7720 124200 : if (context_ptr->blk_geom->sq_size < 128) {
7721 124200 : if (context_ptr->blk_geom->has_uv) {
7722 82392 : search_best_independent_uv_mode(
7723 : picture_control_set_ptr,
7724 : input_picture_ptr,
7725 : inputCbOriginIndex,
7726 : cuChromaOriginIndex,
7727 : context_ptr);
7728 : }
7729 : }
7730 : }
7731 :
7732 811419 : FrameHeader *frm_hdr = &picture_control_set_ptr->parent_pcs_ptr->frm_hdr;
7733 811419 : context_ptr->geom_offset_x = 0;
7734 811419 : context_ptr->geom_offset_y = 0;
7735 :
7736 811419 : if (sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128) {
7737 0 : uint32_t me_sb_size = sequence_control_set_ptr->sb_sz;
7738 0 : uint32_t me_pic_width_in_sb = (sequence_control_set_ptr->seq_header.max_frame_width + sequence_control_set_ptr->sb_sz - 1) / me_sb_size;
7739 0 : uint32_t me_sb_x = (context_ptr->cu_origin_x / me_sb_size);
7740 0 : uint32_t me_sb_y = (context_ptr->cu_origin_y / me_sb_size);
7741 0 : context_ptr->me_sb_addr = me_sb_x + me_sb_y * me_pic_width_in_sb;
7742 0 : context_ptr->geom_offset_x = (me_sb_x & 0x1) * me_sb_size;
7743 0 : context_ptr->geom_offset_y = (me_sb_y & 0x1) * me_sb_size;
7744 : }
7745 : else
7746 811419 : context_ptr->me_sb_addr = lcuAddr;
7747 :
7748 811402 : context_ptr->me_block_offset =
7749 640590 : (context_ptr->blk_geom->bwidth == 4 || context_ptr->blk_geom->bheight == 4 || context_ptr->blk_geom->bwidth == 128 || context_ptr->blk_geom->bheight == 128) ?
7750 950339 : 0 :
7751 501670 : get_me_info_index(picture_control_set_ptr->parent_pcs_ptr->max_number_of_pus_per_sb, context_ptr->blk_geom, context_ptr->geom_offset_x, context_ptr->geom_offset_y);
7752 :
7753 : // Generate MVP(s)
7754 811402 : if (frm_hdr->allow_intrabc) // picture_control_set_ptr->slice_type == I_SLICE
7755 0 : generate_av1_mvp_table(
7756 0 : &context_ptr->sb_ptr->tile_info,
7757 : context_ptr,
7758 : context_ptr->cu_ptr,
7759 : context_ptr->blk_geom,
7760 0 : context_ptr->cu_origin_x,
7761 0 : context_ptr->cu_origin_y,
7762 0 : picture_control_set_ptr->parent_pcs_ptr->ref_frame_type_arr,
7763 : 1,
7764 : picture_control_set_ptr);
7765 811402 : else if (picture_control_set_ptr->slice_type != I_SLICE)
7766 745162 : generate_av1_mvp_table(
7767 745162 : &context_ptr->sb_ptr->tile_info,
7768 : context_ptr,
7769 : context_ptr->cu_ptr,
7770 : context_ptr->blk_geom,
7771 745162 : context_ptr->cu_origin_x,
7772 745162 : context_ptr->cu_origin_y,
7773 745162 : picture_control_set_ptr->parent_pcs_ptr->ref_frame_type_arr,
7774 745162 : picture_control_set_ptr->parent_pcs_ptr->tot_ref_frame_types,
7775 : picture_control_set_ptr);
7776 :
7777 : // Perform ME search around the best MVP
7778 811363 : if (context_ptr->predictive_me_level)
7779 675250 : predictive_me_search(
7780 : picture_control_set_ptr,
7781 : context_ptr,
7782 : input_picture_ptr,
7783 : inputOriginIndex,
7784 : cuOriginIndex);
7785 :
7786 : #if II_COMP_FLAG
7787 : //for every CU, perform Luma DC/V/H/S intra prediction to be used later in inter-intra search
7788 811366 : int allow_ii = is_interintra_allowed_bsize(context_ptr->blk_geom->bsize);
7789 811360 : if (picture_control_set_ptr->parent_pcs_ptr->enable_inter_intra && allow_ii)
7790 295933 : precompute_intra_pred_for_inter_intra(
7791 : picture_control_set_ptr,
7792 : context_ptr);
7793 : #endif
7794 :
7795 811357 : generate_md_stage_0_cand(
7796 : context_ptr->sb_ptr,
7797 : context_ptr,
7798 : ss_mecontext,
7799 : &fast_candidate_total_count,
7800 : picture_control_set_ptr);
7801 :
7802 : //MD Stages
7803 : //The first stage(old fast loop) and the last stage(old full loop) should remain at their locations, new stages could be created between those two.
7804 : //a bypass mechanism should be added to skip one or all of the intermediate stages, in a way to to be able to fall back to org design (FastLoop->FullLoop)
7805 811355 : set_md_stage_counts(
7806 : picture_control_set_ptr,
7807 : context_ptr,
7808 : fast_candidate_total_count);
7809 :
7810 : CAND_CLASS cand_class_it;
7811 811052 : uint32_t buffer_start_idx = 0;
7812 : uint32_t buffer_count_for_curr_class;
7813 811052 : uint32_t buffer_total_count = 0;
7814 : #if REMOVE_MD_STAGE_1
7815 811052 : context_ptr->md_stage_1_total_count = 0;
7816 811052 : context_ptr->md_stage_2_total_count = 0;
7817 : #else
7818 : context_ptr->md_stage_2_total_count = 0;
7819 : context_ptr->md_stage_3_total_count = 0;
7820 :
7821 : context_ptr->md_stage = MD_STAGE_0;
7822 : #endif
7823 : #if INTER_INTRA_CLASS_PRUNING
7824 811052 : uint64_t best_md_stage_cost = (uint64_t)~0;
7825 : #endif
7826 8112060 : for (cand_class_it = CAND_CLASS_0; cand_class_it < CAND_CLASS_TOTAL; cand_class_it++) {
7827 :
7828 : //number of next level candidates could not exceed number of curr level candidates
7829 7300640 : context_ptr->md_stage_1_count[cand_class_it] = MIN(context_ptr->md_stage_0_count[cand_class_it], context_ptr->md_stage_1_count[cand_class_it]);
7830 :
7831 7300640 : if (context_ptr->md_stage_0_count[cand_class_it] > 0 && context_ptr->md_stage_1_count[cand_class_it] > 0) {
7832 :
7833 3305860 : buffer_count_for_curr_class = context_ptr->md_stage_0_count[cand_class_it] > context_ptr->md_stage_1_count[cand_class_it] ? (context_ptr->md_stage_1_count[cand_class_it] + 1) : context_ptr->md_stage_1_count[cand_class_it];
7834 :
7835 3305860 : buffer_total_count += buffer_count_for_curr_class;
7836 3305860 : assert(buffer_total_count <= MAX_NFL_BUFF && "not enough cand buffers");
7837 :
7838 : //Input: md_stage_0_count[cand_class_it] Output: md_stage_1_count[cand_class_it]
7839 3305860 : context_ptr->target_class = cand_class_it;
7840 :
7841 3305860 : md_stage_0(
7842 : picture_control_set_ptr,
7843 : context_ptr,
7844 : candidate_buffer_ptr_array_base,
7845 : fast_candidate_array,
7846 : 0,
7847 3305860 : fast_candidate_total_count - 1,
7848 : input_picture_ptr,
7849 : inputOriginIndex,
7850 : inputCbOriginIndex,
7851 : inputCbOriginIndex,
7852 : cu_ptr,
7853 : cuOriginIndex,
7854 : cuChromaOriginIndex,
7855 : buffer_start_idx,
7856 : buffer_count_for_curr_class,
7857 3305860 : context_ptr->md_stage_0_count[cand_class_it] > context_ptr->md_stage_1_count[cand_class_it], //is there need to max the temp buffer
7858 : 0);
7859 :
7860 : //Sort: md_stage_1_count[cand_class_it]
7861 3305790 : memset(context_ptr->cand_buff_indices[cand_class_it], 0xFFFFFFFF, MAX_NFL_BUFF * sizeof(uint32_t));
7862 3305790 : sort_stage0_fast_candidates(
7863 : context_ptr,
7864 : buffer_start_idx,
7865 : buffer_count_for_curr_class, //how many cand buffers to sort. one of the buffers can have max cost.
7866 3305790 : context_ptr->cand_buff_indices[cand_class_it]);
7867 :
7868 : #if INTER_INTRA_CLASS_PRUNING
7869 3306220 : uint32_t *cand_buff_indices = context_ptr->cand_buff_indices[cand_class_it];
7870 3306220 : best_md_stage_cost = MIN((*(context_ptr->candidate_buffer_ptr_array[cand_buff_indices[0]]->fast_cost_ptr)), best_md_stage_cost);
7871 : #else
7872 : #if REMOVE_MD_STAGE_1
7873 : // Distortion-based NIC pruning to CLASS_1, CLASS_2, CLASS_3
7874 : if (cand_class_it == CAND_CLASS_1 || cand_class_it == CAND_CLASS_2 || cand_class_it == CAND_CLASS_3) {
7875 : uint32_t *cand_buff_indices = context_ptr->cand_buff_indices[cand_class_it];
7876 : assert(context_ptr->md_stage_0_count[CAND_CLASS_0] > 0);
7877 : if (context_ptr->md_stage_0_count[CAND_CLASS_0] > 0 && *(context_ptr->candidate_buffer_ptr_array[cand_buff_indices[0]]->fast_cost_ptr) <
7878 : *(context_ptr->candidate_buffer_ptr_array[context_ptr->cand_buff_indices[CAND_CLASS_0][0]]->fast_cost_ptr)) {
7879 : uint32_t fast1_cand_count = 1;
7880 : while (fast1_cand_count < context_ptr->md_stage_1_count[cand_class_it] && ((((*(context_ptr->candidate_buffer_ptr_array[cand_buff_indices[fast1_cand_count]]->fast_cost_ptr) - *(context_ptr->candidate_buffer_ptr_array[cand_buff_indices[0]]->fast_cost_ptr)) * 100) / (*(context_ptr->candidate_buffer_ptr_array[cand_buff_indices[0]]->fast_cost_ptr))) < context_ptr->dist_base_md_stage_0_count_th)) {
7881 : fast1_cand_count++;
7882 : }
7883 : context_ptr->md_stage_1_count[cand_class_it] = fast1_cand_count;
7884 : }
7885 : }
7886 : #endif
7887 : #endif
7888 :
7889 3306220 : buffer_start_idx += buffer_count_for_curr_class;//for next iteration.
7890 :
7891 : }
7892 :
7893 : #if !INTER_INTRA_CLASS_PRUNING
7894 : #if REMOVE_MD_STAGE_1
7895 : context_ptr->md_stage_1_total_count += context_ptr->md_stage_1_count[cand_class_it];
7896 : #endif
7897 : #endif
7898 : }
7899 :
7900 : #if INTER_INTRA_CLASS_PRUNING
7901 811412 : interintra_class_pruning_1(context_ptr,best_md_stage_cost);
7902 : #endif
7903 :
7904 : #if !REMOVE_MD_STAGE_1
7905 : #if SPEED_OPT
7906 : //after completing stage0, we might shorten cand count for some classes.
7907 : inter_class_decision_count_1(context_ptr);
7908 : #endif
7909 :
7910 : context_ptr->md_stage = MD_STAGE_1;
7911 : for (cand_class_it = CAND_CLASS_0; cand_class_it < CAND_CLASS_TOTAL; cand_class_it++) {
7912 :
7913 : //number of next level candidates could not exceed number of curr level candidates
7914 : context_ptr->md_stage_2_count[cand_class_it] = MIN(context_ptr->md_stage_1_count[cand_class_it], context_ptr->md_stage_2_count[cand_class_it]);
7915 : context_ptr->md_stage_2_total_count += context_ptr->md_stage_2_count[cand_class_it];
7916 :
7917 : if (context_ptr->bypass_stage1[cand_class_it] == 0 && context_ptr->md_stage_1_count[cand_class_it] > 0 && context_ptr->md_stage_2_count[cand_class_it] > 0) {
7918 : //Input: md_stage_1_count[cand_class_it] Output: full_cand_count[cand_class_it]
7919 : context_ptr->target_class = cand_class_it;
7920 : md_stage_1(
7921 : picture_control_set_ptr,
7922 : context_ptr,
7923 : candidate_buffer_ptr_array_base,
7924 : context_ptr->md_stage_1_count[cand_class_it],
7925 : input_picture_ptr,
7926 : inputOriginIndex,
7927 : inputCbOriginIndex,
7928 : inputCbOriginIndex,
7929 : cu_ptr,
7930 : cuOriginIndex,
7931 : cuChromaOriginIndex,
7932 : 0);
7933 :
7934 : //sort the new set of candidates
7935 : sort_stage1_fast_candidates(
7936 : context_ptr,
7937 : context_ptr->md_stage_1_count[cand_class_it],
7938 : context_ptr->cand_buff_indices[cand_class_it]);
7939 : }
7940 : }
7941 : #endif
7942 811402 : memset(context_ptr->best_candidate_index_array, 0xFFFFFFFF, MAX_NFL_BUFF * sizeof(uint32_t));
7943 811402 : memset(context_ptr->sorted_candidate_index_array, 0xFFFFFFFF, MAX_NFL * sizeof(uint32_t));
7944 :
7945 811402 : uint64_t ref_fast_cost = MAX_MODE_COST;
7946 : #if REMOVE_MD_STAGE_1
7947 811402 : construct_best_sorted_arrays_md_stage_1(
7948 : #else
7949 : construct_best_sorted_arrays_md_stage_2(
7950 : #endif
7951 : context_ptr,
7952 : candidate_buffer_ptr_array,
7953 811402 : context_ptr->best_candidate_index_array,
7954 811402 : context_ptr->sorted_candidate_index_array,
7955 : &ref_fast_cost);
7956 :
7957 :
7958 : // 1st Full-Loop
7959 : #if INTER_INTRA_CLASS_PRUNING
7960 811456 : best_md_stage_cost = (uint64_t)~0;
7961 : #endif
7962 : #if REMOVE_MD_STAGE_1
7963 8113500 : for (cand_class_it = CAND_CLASS_0; cand_class_it < CAND_CLASS_TOTAL; cand_class_it++) {
7964 : //number of next level candidates could not exceed number of curr level candidates
7965 7302080 : context_ptr->md_stage_2_count[cand_class_it] = MIN(context_ptr->md_stage_1_count[cand_class_it], context_ptr->md_stage_2_count[cand_class_it]);
7966 : #if !INTER_INTRA_CLASS_PRUNING
7967 : context_ptr->md_stage_2_total_count += context_ptr->md_stage_2_count[cand_class_it];
7968 : #endif
7969 7302080 : if (context_ptr->bypass_md_stage_1[cand_class_it] == EB_FALSE && context_ptr->md_stage_1_count[cand_class_it] > 0 && context_ptr->md_stage_2_count[cand_class_it] > 0) {
7970 : #else
7971 : context_ptr->md_stage = MD_STAGE_2;
7972 : for (cand_class_it = CAND_CLASS_0; cand_class_it < CAND_CLASS_TOTAL; cand_class_it++) {
7973 : //number of next level candidates could not exceed number of curr level candidates
7974 : context_ptr->md_stage_3_count[cand_class_it] = MIN(context_ptr->md_stage_2_count[cand_class_it], context_ptr->md_stage_3_count[cand_class_it]);
7975 : context_ptr->md_stage_3_total_count += context_ptr->md_stage_3_count[cand_class_it];
7976 :
7977 : if (context_ptr->bypass_stage2[cand_class_it] == EB_FALSE && context_ptr->md_stage_2_count[cand_class_it] > 0 && context_ptr->md_stage_3_count[cand_class_it] > 0) {
7978 : #endif
7979 3160780 : context_ptr->target_class = cand_class_it;
7980 : #if REMOVE_MD_STAGE_1
7981 3160780 : md_stage_1(
7982 : #else
7983 : md_stage_2(
7984 : #endif
7985 : picture_control_set_ptr,
7986 : context_ptr->sb_ptr,
7987 : cu_ptr,
7988 : context_ptr,
7989 : input_picture_ptr,
7990 : inputOriginIndex,
7991 : inputCbOriginIndex,
7992 : cuOriginIndex,
7993 : cuChromaOriginIndex,
7994 : ref_fast_cost);
7995 :
7996 : // Sort the candidates of the target class based on the 1st full loop cost
7997 :
7998 : //sort the new set of candidates
7999 : #if REMOVE_MD_STAGE_1
8000 3160750 : if (context_ptr->md_stage_1_count[cand_class_it])
8001 3160780 : sort_stage1_candidates(
8002 : context_ptr,
8003 : context_ptr->md_stage_1_count[cand_class_it],
8004 3160780 : context_ptr->cand_buff_indices[cand_class_it]);
8005 : #else
8006 : if (context_ptr->md_stage_2_count[cand_class_it])
8007 : sort_stage2_candidates(
8008 : context_ptr,
8009 : context_ptr->md_stage_2_count[cand_class_it],
8010 : context_ptr->cand_buff_indices[cand_class_it]);
8011 : #endif
8012 :
8013 : #if INTER_INTRA_CLASS_PRUNING
8014 3160740 : uint32_t *cand_buff_indices = context_ptr->cand_buff_indices[cand_class_it];
8015 3160740 : best_md_stage_cost = MIN((*(context_ptr->candidate_buffer_ptr_array[cand_buff_indices[0]]->full_cost_ptr)), best_md_stage_cost);
8016 : #endif
8017 : }
8018 : }
8019 : #if INTER_INTRA_CLASS_PRUNING
8020 811424 : interintra_class_pruning_2(context_ptr, best_md_stage_cost);
8021 : #endif
8022 :
8023 : #if REMOVE_MD_STAGE_1
8024 811401 : assert(context_ptr->md_stage_2_total_count <= MAX_NFL);
8025 811402 : assert(context_ptr->md_stage_2_total_count > 0);
8026 811402 : construct_best_sorted_arrays_md_stage_2(
8027 : #else
8028 : assert(context_ptr->md_stage_3_total_count <= MAX_NFL);
8029 : assert(context_ptr->md_stage_3_total_count > 0);
8030 : construct_best_sorted_arrays_md_stage_3(
8031 : #endif
8032 : context_ptr,
8033 : candidate_buffer_ptr_array,
8034 811402 : context_ptr->best_candidate_index_array,
8035 811402 : context_ptr->sorted_candidate_index_array);
8036 :
8037 : // 2nd Full-Loop
8038 : #if REMOVE_MD_STAGE_1
8039 811417 : md_stage_2(
8040 : #else
8041 : context_ptr->md_stage = MD_STAGE_3;
8042 : md_stage_3(
8043 : #endif
8044 : picture_control_set_ptr,
8045 : context_ptr->sb_ptr,
8046 : cu_ptr,
8047 : context_ptr,
8048 : input_picture_ptr,
8049 : inputOriginIndex,
8050 : inputCbOriginIndex,
8051 : cuOriginIndex,
8052 : cuChromaOriginIndex,
8053 : #if REMOVE_MD_STAGE_1
8054 : context_ptr->md_stage_2_total_count,
8055 : #else
8056 : context_ptr->md_stage_3_total_count,
8057 : #endif
8058 : ref_fast_cost); // fullCandidateTotalCount to number of buffers to process
8059 :
8060 : // Full Mode Decision (choose the best mode)
8061 811396 : candidate_index = product_full_mode_decision(
8062 : context_ptr,
8063 : cu_ptr,
8064 : candidate_buffer_ptr_array,
8065 : #if REMOVE_MD_STAGE_1
8066 : context_ptr->md_stage_2_total_count,
8067 : #else
8068 : context_ptr->md_stage_3_total_count,
8069 : #endif
8070 811396 : (context_ptr->full_loop_escape == 2) ? context_ptr->sorted_candidate_index_array : context_ptr->best_candidate_index_array,
8071 811396 : context_ptr->prune_ref_frame_for_rec_partitions,
8072 : &best_intra_mode);
8073 811430 : candidate_buffer = candidate_buffer_ptr_array[candidate_index];
8074 :
8075 811430 : bestcandidate_buffers[0] = candidate_buffer;
8076 :
8077 :
8078 811430 : if (picture_control_set_ptr->parent_pcs_ptr->interpolation_search_level == IT_SEARCH_INTER_DEPTH) {
8079 0 : if (candidate_buffer->candidate_ptr->type != INTRA_MODE && candidate_buffer->candidate_ptr->motion_mode == SIMPLE_TRANSLATION) {
8080 :
8081 0 : context_ptr->md_staging_skip_interpolation_search = EB_FALSE;
8082 0 : context_ptr->md_staging_skip_inter_chroma_pred = EB_FALSE;
8083 0 : ProductPredictionFunTable[candidate_buffer->candidate_ptr->type](
8084 : context_ptr,
8085 : picture_control_set_ptr,
8086 : candidate_buffer);
8087 0 : cu_ptr->interp_filters = candidate_buffer->candidate_ptr->interp_filters;
8088 : }
8089 : }
8090 811430 : inter_depth_tx_search(
8091 : picture_control_set_ptr,
8092 : candidate_buffer,
8093 : cu_ptr,
8094 : context_ptr,
8095 : input_picture_ptr,
8096 : ref_fast_cost);
8097 :
8098 811374 : uint8_t sq_index = LOG2F(context_ptr->blk_geom->sq_size) - 2;
8099 811386 : if (context_ptr->blk_geom->shape == PART_N) {
8100 164402 : context_ptr->parent_sq_type[sq_index] = candidate_buffer->candidate_ptr->type;
8101 :
8102 457182 : context_ptr->parent_sq_has_coeff[sq_index] = (candidate_buffer->candidate_ptr->y_has_coeff ||
8103 128378 : candidate_buffer->candidate_ptr->u_has_coeff ||
8104 292780 : candidate_buffer->candidate_ptr->v_has_coeff) ? 1 : 0;
8105 :
8106 164402 : context_ptr->parent_sq_pred_mode[sq_index] = candidate_buffer->candidate_ptr->pred_mode;
8107 : }
8108 :
8109 811386 : AV1PerformInverseTransformRecon(
8110 : picture_control_set_ptr,
8111 : context_ptr,
8112 : candidate_buffer,
8113 : cu_ptr,
8114 : context_ptr->blk_geom);
8115 :
8116 811377 : if (!context_ptr->blk_geom->has_uv) {
8117 : // Store the luma data for 4x* and *x4 blocks to be used for CFL
8118 184727 : EbPictureBufferDesc *recon_ptr = candidate_buffer->recon_ptr;
8119 184727 : uint32_t rec_luma_offset = context_ptr->blk_geom->origin_x + context_ptr->blk_geom->origin_y * recon_ptr->stride_y;
8120 184727 : if (context_ptr->hbd_mode_decision) {
8121 0 : for (uint32_t j = 0; j < context_ptr->blk_geom->bheight; ++j)
8122 0 : memcpy(context_ptr->cfl_temp_luma_recon16bit + rec_luma_offset + j* recon_ptr->stride_y, ((uint16_t *)recon_ptr->buffer_y) + (rec_luma_offset + j * recon_ptr->stride_y), sizeof(uint16_t) * context_ptr->blk_geom->bwidth);
8123 : } else {
8124 1578480 : for (uint32_t j = 0; j < context_ptr->blk_geom->bheight; ++j)
8125 1393750 : memcpy(&context_ptr->cfl_temp_luma_recon[rec_luma_offset + j* recon_ptr->stride_y], recon_ptr->buffer_y + rec_luma_offset + j * recon_ptr->stride_y, context_ptr->blk_geom->bwidth);
8126 : }
8127 : }
8128 : //copy neigh recon data in cu_ptr
8129 : {
8130 : uint32_t j;
8131 811377 : EbPictureBufferDesc *recon_ptr = candidate_buffer->recon_ptr;
8132 811377 : uint32_t recLumaOffset = context_ptr->blk_geom->origin_x + context_ptr->blk_geom->origin_y * recon_ptr->stride_y;
8133 :
8134 811377 : uint32_t recCbOffset = ((((context_ptr->blk_geom->origin_x >> 3) << 3) + ((context_ptr->blk_geom->origin_y >> 3) << 3) * candidate_buffer->recon_ptr->stride_cb) >> 1);
8135 811377 : uint32_t recCrOffset = ((((context_ptr->blk_geom->origin_x >> 3) << 3) + ((context_ptr->blk_geom->origin_y >> 3) << 3) * candidate_buffer->recon_ptr->stride_cr) >> 1);
8136 :
8137 811377 : if (!context_ptr->hbd_mode_decision) {
8138 811377 : memcpy(cu_ptr->neigh_top_recon[0], recon_ptr->buffer_y + recLumaOffset + (context_ptr->blk_geom->bheight - 1)*recon_ptr->stride_y, context_ptr->blk_geom->bwidth);
8139 811377 : if (context_ptr->blk_geom->has_uv && context_ptr->chroma_level <= CHROMA_MODE_1) {
8140 551039 : memcpy(cu_ptr->neigh_top_recon[1], recon_ptr->buffer_cb + recCbOffset + (context_ptr->blk_geom->bheight_uv - 1)*recon_ptr->stride_cb, context_ptr->blk_geom->bwidth_uv);
8141 551039 : memcpy(cu_ptr->neigh_top_recon[2], recon_ptr->buffer_cr + recCrOffset + (context_ptr->blk_geom->bheight_uv - 1)*recon_ptr->stride_cr, context_ptr->blk_geom->bwidth_uv);
8142 : }
8143 :
8144 12128200 : for (j = 0; j < context_ptr->blk_geom->bheight; ++j)
8145 11316900 : cu_ptr->neigh_left_recon[0][j] = recon_ptr->buffer_y[recLumaOffset + context_ptr->blk_geom->bwidth - 1 + j * recon_ptr->stride_y];
8146 :
8147 811377 : if (context_ptr->blk_geom->has_uv && context_ptr->chroma_level <= CHROMA_MODE_1) {
8148 5074060 : for (j = 0; j < context_ptr->blk_geom->bheight_uv; ++j) {
8149 4523020 : cu_ptr->neigh_left_recon[1][j] = recon_ptr->buffer_cb[recCbOffset + context_ptr->blk_geom->bwidth_uv - 1 + j * recon_ptr->stride_cb];
8150 4523020 : cu_ptr->neigh_left_recon[2][j] = recon_ptr->buffer_cr[recCrOffset + context_ptr->blk_geom->bwidth_uv - 1 + j * recon_ptr->stride_cr];
8151 : }
8152 : }
8153 : } else {
8154 0 : uint16_t sz = sizeof(uint16_t);
8155 0 : memcpy(cu_ptr->neigh_top_recon_16bit[0], recon_ptr->buffer_y + sz * (recLumaOffset + (context_ptr->blk_geom->bheight - 1)*recon_ptr->stride_y), sz * context_ptr->blk_geom->bwidth);
8156 0 : if (context_ptr->blk_geom->has_uv && context_ptr->chroma_level <= CHROMA_MODE_1) {
8157 0 : memcpy(cu_ptr->neigh_top_recon_16bit[1], recon_ptr->buffer_cb + sz * (recCbOffset + (context_ptr->blk_geom->bheight_uv - 1)*recon_ptr->stride_cb), sz * context_ptr->blk_geom->bwidth_uv);
8158 0 : memcpy(cu_ptr->neigh_top_recon_16bit[2], recon_ptr->buffer_cr + sz * (recCrOffset + (context_ptr->blk_geom->bheight_uv - 1)*recon_ptr->stride_cr), sz * context_ptr->blk_geom->bwidth_uv);
8159 : }
8160 :
8161 0 : for (j = 0; j < context_ptr->blk_geom->bheight; ++j)
8162 0 : cu_ptr->neigh_left_recon_16bit[0][j] = ((uint16_t *) recon_ptr->buffer_y)[recLumaOffset + context_ptr->blk_geom->bwidth - 1 + j * recon_ptr->stride_y];
8163 :
8164 0 : if (context_ptr->blk_geom->has_uv && context_ptr->chroma_level <= CHROMA_MODE_1) {
8165 0 : for (j = 0; j < context_ptr->blk_geom->bheight_uv; ++j) {
8166 0 : cu_ptr->neigh_left_recon_16bit[1][j] = ((uint16_t *) recon_ptr->buffer_cb)[recCbOffset + context_ptr->blk_geom->bwidth_uv - 1 + j * recon_ptr->stride_cb];
8167 0 : cu_ptr->neigh_left_recon_16bit[2][j] = ((uint16_t *) recon_ptr->buffer_cr)[recCrOffset + context_ptr->blk_geom->bwidth_uv - 1 + j * recon_ptr->stride_cr];
8168 : }
8169 : }
8170 : }
8171 : }
8172 :
8173 : #if NO_ENCDEC
8174 : //copy recon
8175 : uint32_t tu_origin_index = context_ptr->blk_geom->origin_x + (context_ptr->blk_geom->origin_y * 128);
8176 : uint32_t bwidth = context_ptr->blk_geom->bwidth;
8177 : uint32_t bheight = context_ptr->blk_geom->bheight;
8178 :
8179 : if (!context_ptr->hbd_mode_decision) {
8180 : uint8_t* src_ptr = &(((uint8_t*)candidate_buffer->recon_ptr->buffer_y)[tu_origin_index]);
8181 : uint8_t* dst_ptr = &(((uint8_t*)context_ptr->cu_ptr->recon_tmp->buffer_y)[0]);
8182 :
8183 : uint32_t j;
8184 : for (j = 0; j < bheight; j++)
8185 : memcpy(dst_ptr + j * 128, src_ptr + j * 128, bwidth * sizeof(uint8_t));
8186 :
8187 : if (context_ptr->blk_geom->has_uv)
8188 : {
8189 : uint32_t tu_origin_index = ((((context_ptr->blk_geom->origin_x >> 3) << 3) + ((context_ptr->blk_geom->origin_y >> 3) << 3) * candidate_buffer->recon_ptr->stride_cb) >> 1);
8190 : bwidth = context_ptr->blk_geom->bwidth_uv;
8191 : bheight = context_ptr->blk_geom->bheight_uv;
8192 :
8193 : // Cb
8194 : src_ptr = &(((uint8_t*)candidate_buffer->recon_ptr->buffer_cb)[tu_origin_index]);
8195 : dst_ptr = &(((uint8_t*)context_ptr->cu_ptr->recon_tmp->buffer_cb)[0]);
8196 :
8197 : for (j = 0; j < bheight; j++)
8198 : memcpy(dst_ptr + j * 64, src_ptr + j * 64, bwidth * sizeof(uint8_t));
8199 :
8200 : // Cr
8201 : src_ptr = &(((uint8_t*)candidate_buffer->recon_ptr->buffer_cr)[tu_origin_index]);
8202 : dst_ptr = &(((uint8_t*)context_ptr->cu_ptr->recon_tmp->buffer_cr)[0]);
8203 :
8204 : for (j = 0; j < bheight; j++)
8205 : memcpy(dst_ptr + j * 64, src_ptr + j * 64, bwidth * sizeof(uint8_t));
8206 : }
8207 : } else {
8208 : uint16_t* src_ptr = ((uint16_t*) candidate_buffer->recon_ptr->buffer_y) + tu_origin_index;
8209 : uint16_t* dst_ptr = (uint16_t*) context_ptr->cu_ptr->recon_tmp->buffer_y;
8210 : for (uint32_t j = 0; j < bheight; j++)
8211 : memcpy(dst_ptr + j * 128, src_ptr + j * 128, bwidth * sizeof(uint16_t));
8212 :
8213 : if (context_ptr->blk_geom->has_uv) {
8214 : tu_origin_index = ((((context_ptr->blk_geom->origin_x >> 3) << 3) + ((context_ptr->blk_geom->origin_y >> 3) << 3) * candidate_buffer->recon_ptr->stride_cb) >> 1);
8215 : bwidth = context_ptr->blk_geom->bwidth_uv;
8216 : bheight = context_ptr->blk_geom->bheight_uv;
8217 :
8218 : // Cb
8219 : src_ptr = ((uint16_t*) candidate_buffer->recon_ptr->buffer_cb) + tu_origin_index;
8220 : dst_ptr = (uint16_t*) context_ptr->cu_ptr->recon_tmp->buffer_cb;
8221 : for (uint32_t j = 0; j < bheight; j++)
8222 : memcpy(dst_ptr + j * 64, src_ptr + j * 64, bwidth * sizeof(uint16_t));
8223 :
8224 : // Cr
8225 : src_ptr = ((uint16_t*) candidate_buffer->recon_ptr->buffer_cr) + tu_origin_index;
8226 : dst_ptr = (uint16_t*) context_ptr->cu_ptr->recon_tmp->buffer_cr;
8227 : for (uint32_t j = 0; j < bheight; j++)
8228 : memcpy(dst_ptr + j * 64, src_ptr + j * 64, bwidth * sizeof(uint16_t));
8229 : }
8230 : }
8231 : #endif
8232 :
8233 811377 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].avail_blk_flag = EB_TRUE;
8234 : }
8235 : else
8236 : {
8237 0 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].cost = MAX_MODE_COST;
8238 0 : cu_ptr->prediction_unit_array->ref_frame_type = 0;
8239 : }
8240 811377 : }
8241 :
8242 : #if LESS_RECTANGULAR_CHECK_LEVEL
8243 987920 : void update_skip_next_nsq_for_a_b_shapes(
8244 : ModeDecisionContext *context_ptr,
8245 : uint64_t *sq_cost, uint64_t *h_cost,
8246 : uint64_t *v_cost, int *skip_next_nsq) {
8247 :
8248 987920 : switch (context_ptr->blk_geom->d1i)
8249 : {
8250 :
8251 : // NS
8252 89363 : case 0:
8253 89363 : *sq_cost = context_ptr->md_local_cu_unit[context_ptr->cu_ptr->mds_idx].cost;
8254 89363 : *h_cost = 0;
8255 89363 : *v_cost = 0;
8256 89363 : break;
8257 :
8258 : // H
8259 47750 : case 1:
8260 47750 : *h_cost = context_ptr->md_local_cu_unit[context_ptr->cu_ptr->mds_idx].cost;
8261 47750 : break;
8262 47750 : case 2:
8263 47750 : *h_cost += context_ptr->md_local_cu_unit[context_ptr->cu_ptr->mds_idx].cost;
8264 47750 : break;
8265 :
8266 : // V
8267 47749 : case 3:
8268 47749 : *v_cost = context_ptr->md_local_cu_unit[context_ptr->cu_ptr->mds_idx].cost;
8269 47749 : break;
8270 47750 : case 4:
8271 47750 : *v_cost += context_ptr->md_local_cu_unit[context_ptr->cu_ptr->mds_idx].cost;
8272 47750 : *skip_next_nsq = (*h_cost > ((*sq_cost * context_ptr->sq_weight) / 100)) ? 1 : *skip_next_nsq;
8273 47750 : break;
8274 :
8275 : // HA
8276 145966 : case 5:
8277 : case 6:
8278 : case 7:
8279 :
8280 : // HB
8281 : case 8:
8282 : case 9:
8283 145966 : *skip_next_nsq = (*h_cost > ((*sq_cost * context_ptr->sq_weight) / 100)) ? 1 : *skip_next_nsq;
8284 145966 : break;
8285 162788 : case 10:
8286 :
8287 : // VA
8288 : case 11:
8289 : case 12:
8290 : case 13:
8291 :
8292 : // VB
8293 : case 14:
8294 : case 15:
8295 162788 : *skip_next_nsq = (*v_cost > ((*sq_cost * context_ptr->sq_weight) / 100)) ? 1 : *skip_next_nsq;
8296 162788 : break;
8297 : }
8298 987920 : }
8299 : #endif
8300 :
8301 7193 : EB_EXTERN EbErrorType mode_decision_sb(
8302 : SequenceControlSet *sequence_control_set_ptr,
8303 : PictureControlSet *picture_control_set_ptr,
8304 : const MdcLcuData * const mdcResultTbPtr,
8305 : LargestCodingUnit *sb_ptr,
8306 : uint16_t sb_origin_x,
8307 : uint16_t sb_origin_y,
8308 : uint32_t lcuAddr,
8309 : SsMeContext *ss_mecontext,
8310 : ModeDecisionContext *context_ptr)
8311 : {
8312 7193 : EbErrorType return_error = EB_ErrorNone;
8313 :
8314 : uint32_t cuIdx;
8315 : ModeDecisionCandidateBuffer *bestcandidate_buffers[5];
8316 : // Pre Intra Search
8317 7193 : uint32_t leaf_count = mdcResultTbPtr->leaf_count;
8318 7193 : const EbMdcLeafData *const leaf_data_array = mdcResultTbPtr->leaf_data_array;
8319 7193 : context_ptr->sb_ptr = sb_ptr;
8320 : #if FIX_COEF_BASED_ATB_SKIP
8321 7193 : context_ptr->coeff_based_skip_atb = 0;
8322 : #endif
8323 7193 : EbBool all_cu_init = (picture_control_set_ptr->parent_pcs_ptr->pic_depth_mode <= PIC_SQ_DEPTH_MODE);
8324 7193 : if (all_cu_init) {
8325 3600 : init_sq_nsq_block(
8326 : sequence_control_set_ptr,
8327 : context_ptr);
8328 : }
8329 : else {
8330 3593 : init_sq_non4_block(
8331 : sequence_control_set_ptr,
8332 : context_ptr);
8333 : }
8334 : // Mode Decision Neighbor Arrays
8335 7191 : context_ptr->intra_luma_mode_neighbor_array = picture_control_set_ptr->md_intra_luma_mode_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
8336 7191 : context_ptr->intra_chroma_mode_neighbor_array = picture_control_set_ptr->md_intra_chroma_mode_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
8337 7191 : context_ptr->mv_neighbor_array = picture_control_set_ptr->md_mv_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
8338 7191 : context_ptr->skip_flag_neighbor_array = picture_control_set_ptr->md_skip_flag_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
8339 7191 : context_ptr->mode_type_neighbor_array = picture_control_set_ptr->md_mode_type_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
8340 7191 : context_ptr->leaf_depth_neighbor_array = picture_control_set_ptr->md_leaf_depth_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
8341 7191 : context_ptr->leaf_partition_neighbor_array = picture_control_set_ptr->mdleaf_partition_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
8342 :
8343 7191 : if (!context_ptr->hbd_mode_decision) {
8344 7191 : context_ptr->luma_recon_neighbor_array = picture_control_set_ptr->md_luma_recon_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
8345 7191 : context_ptr->cb_recon_neighbor_array = picture_control_set_ptr->md_cb_recon_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
8346 7191 : context_ptr->cr_recon_neighbor_array = picture_control_set_ptr->md_cr_recon_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
8347 : } else {
8348 0 : context_ptr->luma_recon_neighbor_array16bit = picture_control_set_ptr->md_luma_recon_neighbor_array16bit[MD_NEIGHBOR_ARRAY_INDEX];
8349 0 : context_ptr->cb_recon_neighbor_array16bit = picture_control_set_ptr->md_cb_recon_neighbor_array16bit[MD_NEIGHBOR_ARRAY_INDEX];
8350 0 : context_ptr->cr_recon_neighbor_array16bit = picture_control_set_ptr->md_cr_recon_neighbor_array16bit[MD_NEIGHBOR_ARRAY_INDEX];
8351 : }
8352 7191 : context_ptr->skip_coeff_neighbor_array = picture_control_set_ptr->md_skip_coeff_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
8353 7191 : context_ptr->luma_dc_sign_level_coeff_neighbor_array = picture_control_set_ptr->md_luma_dc_sign_level_coeff_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
8354 7191 : context_ptr->cb_dc_sign_level_coeff_neighbor_array = picture_control_set_ptr->md_cb_dc_sign_level_coeff_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
8355 7191 : context_ptr->cr_dc_sign_level_coeff_neighbor_array = picture_control_set_ptr->md_cr_dc_sign_level_coeff_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
8356 7191 : context_ptr->txfm_context_array = picture_control_set_ptr->md_txfm_context_array[MD_NEIGHBOR_ARRAY_INDEX];
8357 7191 : context_ptr->inter_pred_dir_neighbor_array = picture_control_set_ptr->md_inter_pred_dir_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
8358 7191 : context_ptr->ref_frame_type_neighbor_array = picture_control_set_ptr->md_ref_frame_type_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
8359 7191 : context_ptr->interpolation_type_neighbor_array = picture_control_set_ptr->md_interpolation_type_neighbor_array[MD_NEIGHBOR_ARRAY_INDEX];
8360 : #if ADD_SUPPORT_TO_SKIP_PART_N
8361 7191 : uint32_t d1_block_itr = 0;
8362 7191 : uint32_t d1_first_block = 1;
8363 : #endif
8364 :
8365 7191 : EbPictureBufferDesc *input_picture_ptr = picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
8366 7191 : if (context_ptr->hbd_mode_decision) {
8367 0 : const uint32_t input_luma_offset = ((sb_origin_y + input_picture_ptr->origin_y) * input_picture_ptr->stride_y) + (sb_origin_x + input_picture_ptr->origin_x);
8368 0 : const uint32_t input_bit_inc_luma_offset = ((sb_origin_y + input_picture_ptr->origin_y) * input_picture_ptr->stride_bit_inc_y) + (sb_origin_x + input_picture_ptr->origin_x);
8369 0 : const uint32_t input_cb_offset = (((sb_origin_y + input_picture_ptr->origin_y) >> 1) * input_picture_ptr->stride_cb) + ((sb_origin_x + input_picture_ptr->origin_x) >> 1);
8370 0 : const uint32_t input_bit_inc_cb_offset = (((sb_origin_y + input_picture_ptr->origin_y) >> 1) * input_picture_ptr->stride_bit_inc_cb) + ((sb_origin_x + input_picture_ptr->origin_x) >> 1);
8371 0 : const uint32_t input_cr_offset = (((sb_origin_y + input_picture_ptr->origin_y) >> 1) * input_picture_ptr->stride_cr) + ((sb_origin_x + input_picture_ptr->origin_x) >> 1);
8372 0 : const uint32_t input_bit_inc_cr_offset = (((sb_origin_y + input_picture_ptr->origin_y) >> 1) * input_picture_ptr->stride_bit_inc_cr) + ((sb_origin_x + input_picture_ptr->origin_x) >> 1);
8373 :
8374 0 : uint32_t sb_width = MIN(sequence_control_set_ptr->sb_size_pix, sequence_control_set_ptr->seq_header.max_frame_width - sb_origin_x);
8375 0 : uint32_t sb_height = MIN(sequence_control_set_ptr->sb_size_pix, sequence_control_set_ptr->seq_header.max_frame_height - sb_origin_y);
8376 :
8377 0 : pack2d_src(
8378 0 : input_picture_ptr->buffer_y + input_luma_offset,
8379 0 : input_picture_ptr->stride_y,
8380 0 : input_picture_ptr->buffer_bit_inc_y + input_bit_inc_luma_offset,
8381 0 : input_picture_ptr->stride_bit_inc_y,
8382 0 : (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_y,
8383 0 : context_ptr->input_sample16bit_buffer->stride_y,
8384 : sb_width,
8385 : sb_height);
8386 :
8387 0 : pack2d_src(
8388 0 : input_picture_ptr->buffer_cb + input_cb_offset,
8389 0 : input_picture_ptr->stride_cb,
8390 0 : input_picture_ptr->buffer_bit_inc_cb + input_bit_inc_cb_offset,
8391 0 : input_picture_ptr->stride_bit_inc_cb,
8392 0 : (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cb,
8393 0 : context_ptr->input_sample16bit_buffer->stride_cb,
8394 : sb_width >> 1,
8395 : sb_height >> 1);
8396 :
8397 0 : pack2d_src(
8398 0 : input_picture_ptr->buffer_cr + input_cr_offset,
8399 0 : input_picture_ptr->stride_cr,
8400 0 : input_picture_ptr->buffer_bit_inc_cr + input_bit_inc_cr_offset,
8401 0 : input_picture_ptr->stride_bit_inc_cr,
8402 0 : (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cr,
8403 0 : context_ptr->input_sample16bit_buffer->stride_cr,
8404 : sb_width >> 1,
8405 : sb_height >> 1);
8406 :
8407 0 : Store16bitInputSrc(context_ptr->input_sample16bit_buffer, picture_control_set_ptr, sb_origin_x, sb_origin_y, sb_width, sb_height);
8408 : //input_picture_ptr = context_ptr->input_sample16bit_buffer;
8409 0 : input_picture_ptr = picture_control_set_ptr->input_frame16bit;
8410 : }
8411 :
8412 : //CU Loop
8413 7135 : cuIdx = 0; //index over mdc array
8414 :
8415 : #if LESS_RECTANGULAR_CHECK_LEVEL
8416 7135 : uint64_t sq_cost = 0;
8417 : uint64_t h_cost;
8418 : uint64_t v_cost;
8419 : #endif
8420 :
8421 7135 : uint32_t blk_idx_mds = 0;
8422 7135 : uint32_t d1_blocks_accumlated = 0;
8423 7135 : int skip_next_nsq = 0;
8424 7135 : int skip_next_sq = 0;
8425 7135 : uint32_t next_non_skip_blk_idx_mds = 0;
8426 : uint8_t skip_sub_blocks;
8427 : do {
8428 1822010 : skip_sub_blocks = 0;
8429 1822010 : blk_idx_mds = leaf_data_array[cuIdx].mds_idx;
8430 :
8431 1822010 : const BlockGeom * blk_geom = context_ptr->blk_geom = get_blk_geom_mds(blk_idx_mds);
8432 1822020 : CodingUnit * cu_ptr = context_ptr->cu_ptr = &context_ptr->md_cu_arr_nsq[blk_idx_mds];
8433 :
8434 1822020 : context_ptr->cu_size_log2 = blk_geom->bwidth_log2;
8435 1822020 : context_ptr->cu_origin_x = sb_origin_x + blk_geom->origin_x;
8436 1822020 : context_ptr->cu_origin_y = sb_origin_y + blk_geom->origin_y;
8437 :
8438 1822020 : const EbMdcLeafData * const leafDataPtr = &mdcResultTbPtr->leaf_data_array[cuIdx];
8439 1822020 : context_ptr->sb_sz = BLOCK_SIZE_64;
8440 1822020 : context_ptr->round_origin_x = ((context_ptr->cu_origin_x >> 3) << 3);
8441 1822020 : context_ptr->round_origin_y = ((context_ptr->cu_origin_y >> 3) << 3);
8442 1822020 : context_ptr->sb_origin_x = sb_origin_x;
8443 1822020 : context_ptr->sb_origin_y = sb_origin_y;
8444 1822020 : context_ptr->md_local_cu_unit[blk_idx_mds].tested_cu_flag = EB_TRUE;
8445 1822020 : context_ptr->md_ep_pipe_sb[blk_idx_mds].merge_cost = 0;
8446 1822020 : context_ptr->md_ep_pipe_sb[blk_idx_mds].skip_cost = 0;
8447 :
8448 : #if OBMC_FLAG
8449 1822020 : cu_ptr->av1xd->sb_type = blk_geom->bsize;
8450 : #endif
8451 1822020 : cu_ptr->mds_idx = blk_idx_mds;
8452 1822020 : context_ptr->md_cu_arr_nsq[blk_idx_mds].mdc_split_flag = (uint16_t)leafDataPtr->split_flag;
8453 : #if ADD_SUPPORT_TO_SKIP_PART_N
8454 1822020 : context_ptr->md_cu_arr_nsq[blk_geom->sqi_mds].split_flag = (uint16_t)leafDataPtr->split_flag;
8455 : #endif
8456 1822020 : cu_ptr->split_flag = (uint16_t)leafDataPtr->split_flag; //mdc indicates smallest or non valid CUs with split flag=
8457 1822020 : cu_ptr->qp = context_ptr->qp;
8458 1822020 : cu_ptr->best_d1_blk = blk_idx_mds;
8459 : #if COMBINE_MDC_NSQ_TABLE
8460 1822020 : context_ptr->best_nsq_sahpe1 = leafDataPtr->ol_best_nsq_shape1;
8461 1822020 : context_ptr->best_nsq_sahpe2 = leafDataPtr->ol_best_nsq_shape2;
8462 1822020 : context_ptr->best_nsq_sahpe3 = leafDataPtr->ol_best_nsq_shape3;
8463 1822020 : context_ptr->best_nsq_sahpe4 = leafDataPtr->ol_best_nsq_shape4;
8464 1822020 : context_ptr->best_nsq_sahpe5 = leafDataPtr->ol_best_nsq_shape5;
8465 1822020 : context_ptr->best_nsq_sahpe6 = leafDataPtr->ol_best_nsq_shape6;
8466 1822020 : context_ptr->best_nsq_sahpe7 = leafDataPtr->ol_best_nsq_shape7;
8467 1822020 : context_ptr->best_nsq_sahpe8 = leafDataPtr->ol_best_nsq_shape8;
8468 : #endif
8469 1822020 : if (leafDataPtr->tot_d1_blocks != 1)
8470 : {
8471 : #if ADD_SUPPORT_TO_SKIP_PART_N
8472 : // We need to get the index of the sq_block for each NSQ branch
8473 1527050 : if (d1_first_block) {
8474 : #else
8475 : if (blk_geom->shape == PART_N)
8476 : #endif
8477 114418 : copy_neighbour_arrays( //save a clean neigh in [1], encode uses [0], reload the clean in [0] after done last ns block in a partition
8478 : picture_control_set_ptr,
8479 : context_ptr,
8480 : 0, 1,
8481 : #if ADD_SUPPORT_TO_SKIP_PART_N
8482 114418 : blk_geom->sqi_mds,
8483 : #else
8484 : blk_idx_mds,
8485 : #endif
8486 : sb_origin_x,
8487 : sb_origin_y);
8488 : #if ADD_SUPPORT_TO_SKIP_PART_N
8489 : }
8490 : #endif
8491 : }
8492 :
8493 1822010 : int32_t mi_row = context_ptr->cu_origin_y >> MI_SIZE_LOG2;
8494 1822010 : int32_t mi_col = context_ptr->cu_origin_x >> MI_SIZE_LOG2;
8495 1822010 : int mi_stride = picture_control_set_ptr->parent_pcs_ptr->av1_cm->mi_stride;
8496 1822010 : const int32_t offset = mi_row * mi_stride + mi_col;
8497 1822010 : cu_ptr->av1xd->mi = picture_control_set_ptr->parent_pcs_ptr->av1_cm->pcs_ptr->mi_grid_base + offset;
8498 1822010 : ModeInfo *mi_ptr = *cu_ptr->av1xd->mi;
8499 1822010 : cu_ptr->av1xd->up_available = (mi_row > sb_ptr->tile_info.mi_row_start);
8500 1822010 : cu_ptr->av1xd->left_available = (mi_col > sb_ptr->tile_info.mi_col_start);
8501 1822010 : if (cu_ptr->av1xd->up_available)
8502 1762400 : cu_ptr->av1xd->above_mbmi = &mi_ptr[-mi_stride].mbmi;
8503 : else
8504 59611 : cu_ptr->av1xd->above_mbmi = NULL;
8505 1822010 : if (cu_ptr->av1xd->left_available)
8506 1773510 : cu_ptr->av1xd->left_mbmi = &mi_ptr[-1].mbmi;
8507 : else
8508 48501 : cu_ptr->av1xd->left_mbmi = NULL;
8509 :
8510 1822010 : uint8_t redundant_blk_avail = 0;
8511 : uint16_t redundant_blk_mds;
8512 1822010 : if (all_cu_init)
8513 1712880 : check_redundant_block(blk_geom, context_ptr, &redundant_blk_avail, &redundant_blk_mds);
8514 :
8515 1822080 : if (redundant_blk_avail && context_ptr->redundant_blk)
8516 101020 : {
8517 : // Copy results
8518 101022 : CodingUnit *src_cu = &context_ptr->md_cu_arr_nsq[redundant_blk_mds];
8519 101022 : CodingUnit *dst_cu = cu_ptr;
8520 : #if PAL_SUP
8521 :
8522 101022 : move_cu_data_redund(picture_control_set_ptr, context_ptr,src_cu, dst_cu);
8523 : #else
8524 : move_cu_data_redund(src_cu, dst_cu);
8525 : #endif
8526 101020 : memcpy(&context_ptr->md_local_cu_unit[cu_ptr->mds_idx], &context_ptr->md_local_cu_unit[redundant_blk_mds], sizeof(MdCodingUnit));
8527 :
8528 101020 : if (!context_ptr->hbd_mode_decision) {
8529 101022 : memcpy(dst_cu->neigh_left_recon[0], src_cu->neigh_left_recon[0], 128);
8530 101022 : memcpy(dst_cu->neigh_left_recon[1], src_cu->neigh_left_recon[1], 128);
8531 101022 : memcpy(dst_cu->neigh_left_recon[2], src_cu->neigh_left_recon[2], 128);
8532 101022 : memcpy(dst_cu->neigh_top_recon[0], src_cu->neigh_top_recon[0], 128);
8533 101022 : memcpy(dst_cu->neigh_top_recon[1], src_cu->neigh_top_recon[1], 128);
8534 101022 : memcpy(dst_cu->neigh_top_recon[2], src_cu->neigh_top_recon[2], 128);
8535 : } else {
8536 0 : uint16_t sz = sizeof(uint16_t);
8537 0 : memcpy(dst_cu->neigh_left_recon_16bit[0], src_cu->neigh_left_recon_16bit[0], 128 * sz);
8538 0 : memcpy(dst_cu->neigh_left_recon_16bit[1], src_cu->neigh_left_recon_16bit[1], 128 * sz);
8539 0 : memcpy(dst_cu->neigh_left_recon_16bit[2], src_cu->neigh_left_recon_16bit[2], 128 * sz);
8540 0 : memcpy(dst_cu->neigh_top_recon_16bit[0], src_cu->neigh_top_recon_16bit[0], 128 * sz);
8541 0 : memcpy(dst_cu->neigh_top_recon_16bit[1], src_cu->neigh_top_recon_16bit[1], 128 * sz);
8542 0 : memcpy(dst_cu->neigh_top_recon_16bit[2], src_cu->neigh_top_recon_16bit[2], 128 * sz);
8543 : }
8544 :
8545 101020 : memcpy(&context_ptr->md_ep_pipe_sb[cu_ptr->mds_idx], &context_ptr->md_ep_pipe_sb[redundant_blk_mds], sizeof(MdEncPassCuData));
8546 :
8547 101020 : if (context_ptr->blk_geom->shape == PART_N) {
8548 15708 : uint8_t sq_index = LOG2F(context_ptr->blk_geom->sq_size) - 2;
8549 15708 : context_ptr->parent_sq_type[sq_index] = src_cu->prediction_mode_flag;
8550 15708 : context_ptr->parent_sq_has_coeff[sq_index] = src_cu->block_has_coeff;
8551 15708 : context_ptr->parent_sq_pred_mode[sq_index] = src_cu->pred_mode;
8552 : }
8553 : }
8554 : else
8555 : #if FIX_SKIP_REDUNDANT_BLOCK
8556 : {
8557 : #endif
8558 : // Initialize tx_depth
8559 1721060 : cu_ptr->tx_depth = 0;
8560 : #if ADD_SUPPORT_TO_SKIP_PART_N
8561 1721060 : if (blk_geom->quadi > 0 && d1_block_itr == 0) {
8562 : #else
8563 : if (blk_geom->quadi > 0 && blk_geom->shape == PART_N) {
8564 : #endif
8565 :
8566 299903 : uint32_t blk_mds = context_ptr->blk_geom->sqi_mds;
8567 299903 : uint64_t parent_depth_cost = 0, current_depth_cost = 0;
8568 299903 : SequenceControlSet *sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
8569 299903 : uint32_t parent_depth_idx_mds = blk_mds;
8570 :
8571 : // from a given child index, derive the index of the parent
8572 299903 : parent_depth_idx_mds = (context_ptr->blk_geom->sqi_mds - (context_ptr->blk_geom->quadi - 3) * ns_depth_offset[sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128][context_ptr->blk_geom->depth]) -
8573 299903 : parent_depth_offset[sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128][blk_geom->depth];
8574 :
8575 299903 : if (picture_control_set_ptr->slice_type == I_SLICE && parent_depth_idx_mds == 0 && sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128)
8576 0 : parent_depth_cost = MAX_MODE_COST;
8577 : else
8578 299903 : compute_depth_costs_md_skip(
8579 : context_ptr,
8580 : sequence_control_set_ptr,
8581 : parent_depth_idx_mds,
8582 299903 : ns_depth_offset[sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128][context_ptr->blk_geom->depth], &parent_depth_cost, ¤t_depth_cost);
8583 :
8584 299994 : if (!sequence_control_set_ptr->sb_geom[lcuAddr].block_is_allowed[parent_depth_idx_mds])
8585 61833 : parent_depth_cost = MAX_MODE_COST;
8586 :
8587 : // compare the cost of the parent to the cost of the already encoded child + an estimated cost for the remaining child @ the current depth
8588 : // if the total child cost is higher than the parent cost then skip the remaining child @ the current depth
8589 : // when md_exit_th=0 the estimated cost for the remaining child is not taken into account and the action will be lossless compared to no exit
8590 : // MD_EXIT_THSL could be tuned toward a faster encoder but lossy
8591 : #if SPEED_OPT
8592 299994 : if (parent_depth_cost <= current_depth_cost + (current_depth_cost* (4 - context_ptr->blk_geom->quadi)* context_ptr->md_exit_th / context_ptr->blk_geom->quadi / 100)) {
8593 : #else
8594 : if (parent_depth_cost <= current_depth_cost + (current_depth_cost* (4 - context_ptr->blk_geom->quadi)* MD_EXIT_THSL / context_ptr->blk_geom->quadi / 100)) {
8595 : #endif
8596 134299 : skip_next_sq = 1;
8597 134299 : next_non_skip_blk_idx_mds = parent_depth_idx_mds + ns_depth_offset[sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128][context_ptr->blk_geom->depth - 1];
8598 : }
8599 : else
8600 165695 : skip_next_sq = 0;
8601 : }
8602 : // skip until we reach the next block @ the parent block depth
8603 1721150 : if (cu_ptr->mds_idx >= next_non_skip_blk_idx_mds && skip_next_sq == 1)
8604 782 : skip_next_sq = 0;
8605 :
8606 1721150 : if (picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->sb_geom[lcuAddr].block_is_allowed[cu_ptr->mds_idx] && !skip_next_nsq && !skip_next_sq) {
8607 811420 : md_encode_block(
8608 : sequence_control_set_ptr,
8609 : picture_control_set_ptr,
8610 : context_ptr,
8611 : input_picture_ptr,
8612 : ss_mecontext,
8613 : &skip_sub_blocks,
8614 : lcuAddr,
8615 : bestcandidate_buffers);
8616 :
8617 : }
8618 909727 : else if (skip_next_sq) {
8619 409818 : context_ptr->md_local_cu_unit[context_ptr->cu_ptr->mds_idx].cost = (MAX_MODE_COST >> 10);
8620 : }
8621 : else {
8622 : // If the block is out of the boundaries, md is not performed.
8623 : // - For square blocks, since the blocks can be further splitted, they are considered in d2_inter_depth_block_decision with cost of zero.
8624 : // - For non square blocks, since they can not be splitted further the cost is set to a large value (MAX_MODE_COST >> 4) to make sure they are not selected.
8625 : // The value is set to MAX_MODE_COST >> 4 to make sure there is not overflow when adding costs.
8626 499909 : if (context_ptr->blk_geom->shape != PART_N)
8627 425391 : context_ptr->md_local_cu_unit[context_ptr->cu_ptr->mds_idx].cost = (MAX_MODE_COST >> 4);
8628 : else
8629 74518 : context_ptr->md_local_cu_unit[context_ptr->cu_ptr->mds_idx].cost = 0;
8630 : }
8631 : #if FIX_SKIP_REDUNDANT_BLOCK
8632 : }
8633 : #endif
8634 1822060 : skip_next_nsq = 0;
8635 : #if ADD_SUPPORT_TO_SKIP_PART_N
8636 1822060 : if (blk_geom->nsi + 1 == blk_geom->totns) {
8637 924744 : d1_non_square_block_decision(context_ptr, d1_block_itr);
8638 924753 : d1_block_itr++;
8639 : }
8640 : #else
8641 : if (blk_geom->nsi + 1 == blk_geom->totns)
8642 : d1_non_square_block_decision(context_ptr);
8643 : #endif
8644 : #if ADD_SUPPORT_TO_SKIP_PART_N
8645 897316 : else if (d1_block_itr) {
8646 : #else
8647 : else {
8648 : #endif
8649 897317 : uint64_t tot_cost = 0;
8650 897317 : uint32_t first_blk_idx = context_ptr->cu_ptr->mds_idx - (blk_geom->nsi);//index of first block in this partition
8651 2272130 : for (int blk_it = 0; blk_it < blk_geom->nsi + 1; blk_it++)
8652 1374810 : tot_cost += context_ptr->md_local_cu_unit[first_blk_idx + blk_it].cost;
8653 : #if SPEED_OPT
8654 897317 : if ((tot_cost + tot_cost * (blk_geom->totns - (blk_geom->nsi + 1))* context_ptr->md_exit_th / (blk_geom->nsi + 1) / 100) > context_ptr->md_local_cu_unit[context_ptr->blk_geom->sqi_mds].cost)
8655 : #else
8656 : if ((tot_cost + tot_cost * (blk_geom->totns - (blk_geom->nsi + 1))* MD_EXIT_THSL / (blk_geom->nsi + 1) / 100) > context_ptr->md_local_cu_unit[context_ptr->blk_geom->sqi_mds].cost)
8657 : #endif
8658 447529 : skip_next_nsq = 1;
8659 : }
8660 :
8661 : #if LESS_RECTANGULAR_CHECK_LEVEL
8662 1822070 : if (context_ptr->sq_weight != (uint32_t)~0 && blk_geom->bsize > BLOCK_8X8)
8663 987922 : update_skip_next_nsq_for_a_b_shapes(context_ptr, &sq_cost, &h_cost, &v_cost, &skip_next_nsq);
8664 : #endif
8665 :
8666 1822060 : if (blk_geom->shape != PART_N) {
8667 1412650 : if (blk_geom->nsi + 1 < blk_geom->totns)
8668 897332 : md_update_all_neighbour_arrays(
8669 : picture_control_set_ptr,
8670 : context_ptr,
8671 : blk_idx_mds,
8672 : sb_origin_x,
8673 : sb_origin_y);
8674 : else
8675 515314 : copy_neighbour_arrays( //restore [1] in [0] after done last ns block
8676 : picture_control_set_ptr,
8677 : context_ptr,
8678 : 1, 0,
8679 515314 : blk_geom->sqi_mds,
8680 : sb_origin_x,
8681 : sb_origin_y);
8682 : }
8683 :
8684 : #if ADD_SUPPORT_TO_SKIP_PART_N
8685 1822050 : d1_blocks_accumlated = d1_first_block == 1 ? 1 : d1_blocks_accumlated + 1;
8686 : #else
8687 : d1_blocks_accumlated = blk_geom->shape == PART_N ? 1 : d1_blocks_accumlated + 1;
8688 : #endif
8689 :
8690 1822050 : if (d1_blocks_accumlated == leafDataPtr->tot_d1_blocks)
8691 : {
8692 409427 : uint32_t lastCuIndex_mds = d2_inter_depth_block_decision(
8693 : context_ptr,
8694 409427 : blk_geom->sqi_mds,//input is parent square
8695 : sb_ptr,
8696 : lcuAddr,
8697 : sb_origin_x,
8698 : sb_origin_y,
8699 409427 : context_ptr->full_lambda,
8700 : context_ptr->md_rate_estimation_ptr,
8701 : picture_control_set_ptr);
8702 : #if ADD_SUPPORT_TO_SKIP_PART_N
8703 409386 : d1_block_itr = 0;
8704 409386 : d1_first_block = 1;
8705 : #endif
8706 1228180 : context_ptr->coeff_based_skip_atb = picture_control_set_ptr->parent_pcs_ptr->coeff_based_skip_atb &&
8707 409405 : context_ptr->md_local_cu_unit[lastCuIndex_mds].avail_blk_flag &&
8708 818791 : context_ptr->md_cu_arr_nsq[lastCuIndex_mds].block_has_coeff == 0 ? 1 : 0;
8709 :
8710 409386 : if (context_ptr->md_cu_arr_nsq[lastCuIndex_mds].split_flag == EB_FALSE)
8711 : {
8712 313066 : md_update_all_neighbour_arrays_multiple(
8713 : picture_control_set_ptr,
8714 : context_ptr,
8715 313066 : context_ptr->md_cu_arr_nsq[lastCuIndex_mds].best_d1_blk,
8716 : sb_origin_x,
8717 : sb_origin_y);
8718 : }
8719 : }
8720 : #if ADD_SUPPORT_TO_SKIP_PART_N
8721 1412630 : else if (d1_first_block)
8722 114422 : d1_first_block = 0;
8723 : #endif
8724 :
8725 1822070 : if (skip_sub_blocks && leaf_data_array[cuIdx].split_flag) {
8726 0 : cuIdx++;
8727 0 : while (cuIdx < leaf_count) {
8728 0 : const BlockGeom * next_blk_geom = get_blk_geom_mds(leaf_data_array[cuIdx].mds_idx);
8729 0 : if ((next_blk_geom->origin_x < blk_geom->origin_x + blk_geom->bwidth) && (next_blk_geom->origin_y < blk_geom->origin_y + blk_geom->bheight))
8730 0 : cuIdx++;
8731 : else
8732 : break;
8733 : }
8734 : }
8735 : else
8736 1822070 : cuIdx++;
8737 1822070 : } while (cuIdx < leaf_count);// End of CU loop
8738 :
8739 7200 : return return_error;
8740 : }
8741 :
8742 : static uint32_t tab4x4[256] = {
8743 : 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, 85,
8744 : 2, 3, 6, 7, 18, 19, 22, 23, 66, 67, 70, 71, 82, 83, 86, 87,
8745 : 8, 9, 12, 13, 24, 25, 28, 29, 72, 73, 76, 77, 88, 89, 92, 93,
8746 : 10, 11, 14, 15, 26, 27, 30, 31, 74, 75, 78, 79, 90, 91, 94, 95,
8747 : 32, 33, 36, 37, 48, 49, 52, 53, 96, 97, 100, 101, 112, 113, 116, 117,
8748 : 34, 35, 38, 39, 50, 51, 54, 55, 98, 99, 102, 103, 114, 115, 118, 119,
8749 : 40, 41, 44, 45, 56, 57, 60, 61, 104, 105, 108, 109, 120, 121, 124, 125,
8750 : 42, 43, 46, 47, 58, 59, 62, 63, 106, 107, 110, 111, 122, 123, 126, 127,
8751 : 128, 129, 132, 133, 144, 145, 148, 149, 192, 193, 196, 197, 208, 209, 212, 213,
8752 : 130, 131, 134, 135, 146, 147, 150, 151, 194, 195, 198, 199, 210, 211, 214, 215,
8753 : 136, 137, 140, 141, 152, 153, 156, 157, 200, 201, 204, 205, 216, 217, 220, 221,
8754 : 138, 139, 142, 143, 154, 155, 158, 159, 202, 203, 206, 207, 218, 219, 222, 223,
8755 : 160, 161, 164, 165, 176, 177, 180, 181, 224, 225, 228, 229, 240, 241, 244, 245,
8756 : 162, 163, 166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242, 243, 246, 247,
8757 : 168, 169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253,
8758 : 170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254, 255,
8759 : };
8760 :
8761 : static uint32_t tab8x4[128] = {
8762 : 0, 2, 8, 10, 32, 34, 40, 42,
8763 : 1, 3, 9, 11, 33, 35, 41, 43,
8764 : 4, 6, 12, 14, 36, 38, 44, 46,
8765 : 5, 7, 13, 15, 37, 39, 45, 47,
8766 : 16, 18, 24, 26, 48, 50, 56, 58,
8767 : 17, 19, 25, 27, 49, 51, 57, 59,
8768 : 20, 22, 28, 30, 52, 54, 60, 62,
8769 : 21, 23, 29, 31, 53, 55, 61, 63,
8770 : 64, 66, 72, 74, 96, 98, 104, 106,
8771 : 65, 67, 73, 75, 97, 99, 105, 107,
8772 : 68, 70, 76, 78, 100, 102, 108, 110,
8773 : 69, 71, 77, 79, 101, 103, 109, 111,
8774 : 80, 82, 88, 90, 112, 114, 120, 122,
8775 : 81, 83, 89, 91, 113, 115, 121, 123,
8776 : 84, 86, 92, 94, 116, 118, 124, 126,
8777 : 85, 87, 93, 95, 117, 119, 125, 127
8778 : };
8779 :
8780 : static uint32_t tab4x8[128] = {
8781 : 0, 1, 2, 3, 8, 9, 10, 11, 32, 33, 34, 35, 40, 41, 42, 43,
8782 : 4, 5, 6, 7, 12, 13, 14, 15, 36, 37, 38, 39, 44, 45, 46, 47,
8783 : 16, 17, 18, 19, 24, 25, 26, 27, 48, 49, 50, 51, 56, 57, 58, 59,
8784 : 20, 21, 22, 23, 28, 29, 30, 31, 52, 53, 54, 55, 60, 61, 62, 63,
8785 : 64, 65, 66, 67, 72, 73, 74, 75, 96, 97, 98, 99, 104, 105, 106, 107,
8786 : 68, 69, 70, 71, 76, 77, 78, 79, 100, 101, 102, 103, 108, 109, 110, 111,
8787 : 80, 81, 82, 83, 88, 89, 90, 91, 112, 113, 114, 115, 120, 121, 122, 123,
8788 : 84, 85, 86, 87, 92, 93, 94, 95, 116, 117, 118, 119, 124, 125, 126, 127
8789 : };
8790 :
8791 : static uint32_t tab16x4[64] = {
8792 : 0 , 4, 16, 20,
8793 : 1 , 5, 17, 21,
8794 : 2 , 6, 18, 22,
8795 : 3 , 7, 19, 23,
8796 : 8 , 12, 24, 28,
8797 : 9 , 13, 25, 29,
8798 : 10, 14, 26, 30,
8799 : 11, 15, 27, 31,
8800 : 32, 36, 48, 52,
8801 : 33, 37, 49, 53,
8802 : 34, 38, 50, 54,
8803 : 35, 39, 51, 55,
8804 : 40, 44, 56, 60,
8805 : 41, 45, 57, 61,
8806 : 42, 46, 58, 62,
8807 : 43, 47, 59, 63
8808 : };
8809 : static uint32_t tab4x16[64] = {
8810 : 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27,
8811 : 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31,
8812 : 32, 33, 34, 35, 40, 41, 42, 43, 48, 49, 50, 51, 56, 57, 58, 59,
8813 : 36, 37, 38, 39, 44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63
8814 : };
8815 :
8816 : static uint32_t tab64x16[4] = {
8817 : 0, 1, 2, 3,
8818 : };
8819 :
8820 : static uint32_t tab16x64[4] = {
8821 : 0, 1, 2, 3,
8822 : };
8823 :
8824 : /***************************************************************
8825 : * in_loop_me_8xN_Nx8_distortion_update
8826 : * Compute the distortion at a given position and update
8827 : * the best for the supported 8xN and Nx8 blocks
8828 : ***************************************************************/
8829 0 : static void in_loop_me_8xN_Nx8_distortion_update(
8830 : //Inputs
8831 : uint32_t curr_mv,
8832 : uint32_t block_4x4_index,
8833 : uint32_t *dist_4x4,
8834 : //Outputs
8835 : uint32_t *best_mv_8x4,
8836 : uint32_t *best_dist_8x4,
8837 : uint32_t *dist_8x4,
8838 : uint32_t *best_mv_4x8,
8839 : uint32_t *best_dist_4x8,
8840 : uint32_t *dist_4x8,
8841 : uint32_t *best_mv_8x8,
8842 : uint32_t *best_dist_8x8,
8843 : uint32_t *dist_8x8)
8844 : {
8845 : uint32_t square_block_index;
8846 : uint32_t first_rec_block_index;
8847 : uint32_t second_rec_block_index;
8848 :
8849 : //8x4
8850 0 : first_rec_block_index = (block_4x4_index - 3) / 2;
8851 0 : second_rec_block_index = first_rec_block_index + 1;
8852 :
8853 0 : dist_8x4[first_rec_block_index] = dist_4x4[block_4x4_index - 3] + dist_4x4[block_4x4_index - 2];
8854 :
8855 0 : if (dist_8x4[first_rec_block_index] < best_dist_8x4[first_rec_block_index]) {
8856 0 : best_mv_8x4[first_rec_block_index] = curr_mv;
8857 0 : best_dist_8x4[first_rec_block_index] = dist_8x4[first_rec_block_index];
8858 : }
8859 :
8860 0 : dist_8x4[second_rec_block_index] = dist_4x4[block_4x4_index - 1] + dist_4x4[block_4x4_index];
8861 :
8862 0 : if (dist_8x4[second_rec_block_index] < best_dist_8x4[second_rec_block_index]) {
8863 0 : best_mv_8x4[second_rec_block_index] = curr_mv;
8864 0 : best_dist_8x4[second_rec_block_index] = dist_8x4[second_rec_block_index];
8865 : }
8866 :
8867 : //4x8
8868 0 : dist_4x8[first_rec_block_index] = dist_4x4[block_4x4_index - 3] + dist_4x4[block_4x4_index - 1];
8869 :
8870 0 : if (dist_4x8[first_rec_block_index] < best_dist_4x8[first_rec_block_index]) {
8871 0 : best_mv_4x8[first_rec_block_index] = curr_mv;
8872 0 : best_dist_4x8[first_rec_block_index] = dist_4x8[first_rec_block_index];
8873 : }
8874 :
8875 0 : dist_4x8[second_rec_block_index] = dist_4x4[block_4x4_index - 2] + dist_4x4[block_4x4_index];
8876 :
8877 0 : if (dist_4x8[second_rec_block_index] < best_dist_4x8[second_rec_block_index]) {
8878 0 : best_mv_4x8[second_rec_block_index] = curr_mv;
8879 0 : best_dist_4x8[second_rec_block_index] = dist_4x8[second_rec_block_index];
8880 : }
8881 :
8882 : //8x8
8883 0 : square_block_index = (block_4x4_index - 3) / 4;
8884 :
8885 0 : dist_8x8[square_block_index] = dist_4x8[first_rec_block_index] + dist_4x8[second_rec_block_index];
8886 :
8887 0 : if (dist_8x8[square_block_index] < best_dist_8x8[square_block_index]) {
8888 0 : best_mv_8x8[square_block_index] = curr_mv;
8889 0 : best_dist_8x8[square_block_index] = dist_8x8[square_block_index];
8890 : }
8891 0 : }
8892 : /***************************************************************
8893 : * in_loop_me_16xN_Nx16_distortion_update
8894 : * Compute the distortion at a given position and update
8895 : * the best for the supported 16xN and Nx16 blocks
8896 : ***************************************************************/
8897 0 : static void in_loop_me_16xN_Nx16_distortion_update(
8898 : //Inputs
8899 : uint32_t curr_mv,
8900 : uint32_t block_8x8_index,
8901 : uint32_t block_4x4_index,
8902 : uint32_t *dist_8x4,
8903 : uint32_t *dist_4x8,
8904 : uint32_t *dist_8x8,
8905 : //Outputs
8906 : uint32_t *best_mv_16x4,
8907 : uint32_t *best_dist_16x4,
8908 : uint32_t *dist_16x4,
8909 : uint32_t *best_mv_16x8,
8910 : uint32_t *best_dist_16x8,
8911 : uint32_t *dist_16x8,
8912 : uint32_t *best_mv_4x16,
8913 : uint32_t *best_dist_4x16,
8914 : uint32_t *dist_4x16,
8915 : uint32_t *best_mv_8x16,
8916 : uint32_t *best_dist_8x16,
8917 : uint32_t *dist_8x16,
8918 : uint32_t *best_mv_16x16,
8919 : uint32_t *best_dist_16x16,
8920 : uint32_t *dist_16x16
8921 : )
8922 : {
8923 : uint32_t square_block_index;
8924 : uint32_t first_rec_block_index;
8925 : uint32_t second_rec_block_index;
8926 : uint32_t third_rec_block_index;
8927 : uint32_t fourth_rec_block_index;
8928 : uint32_t start_index;
8929 : //16x4
8930 0 : first_rec_block_index = (block_8x8_index - 3);
8931 0 : second_rec_block_index = first_rec_block_index + 1;
8932 0 : third_rec_block_index = second_rec_block_index + 1;
8933 0 : fourth_rec_block_index = third_rec_block_index + 1;
8934 :
8935 0 : start_index = (block_4x4_index - 15) >> 1;
8936 :
8937 0 : dist_16x4[first_rec_block_index] = dist_8x4[start_index] + dist_8x4[start_index + 2];
8938 :
8939 0 : if (dist_16x4[first_rec_block_index] < best_dist_16x4[first_rec_block_index]) {
8940 0 : best_mv_16x4[first_rec_block_index] = curr_mv;
8941 0 : best_dist_16x4[first_rec_block_index] = dist_16x4[first_rec_block_index];
8942 : }
8943 :
8944 0 : dist_16x4[second_rec_block_index] = dist_8x4[start_index + 1] + dist_8x4[start_index + 3];
8945 :
8946 0 : if (dist_16x4[second_rec_block_index] < best_dist_16x4[second_rec_block_index]) {
8947 0 : best_mv_16x4[second_rec_block_index] = curr_mv;
8948 0 : best_dist_16x4[second_rec_block_index] = dist_16x4[second_rec_block_index];
8949 : }
8950 :
8951 0 : dist_16x4[third_rec_block_index] = dist_8x4[start_index + 4] + dist_8x4[start_index + 6];
8952 :
8953 0 : if (dist_16x4[third_rec_block_index] < best_dist_16x4[third_rec_block_index]) {
8954 0 : best_mv_16x4[third_rec_block_index] = curr_mv;
8955 0 : best_dist_16x4[third_rec_block_index] = dist_16x4[third_rec_block_index];
8956 : }
8957 :
8958 0 : dist_16x4[fourth_rec_block_index] = dist_8x4[start_index + 5] + dist_8x4[start_index + 7];
8959 :
8960 0 : if (dist_16x4[fourth_rec_block_index] < best_dist_16x4[fourth_rec_block_index]) {
8961 0 : best_mv_16x4[fourth_rec_block_index] = curr_mv;
8962 0 : best_dist_16x4[fourth_rec_block_index] = dist_16x4[fourth_rec_block_index];
8963 : }
8964 :
8965 : //4x16
8966 :
8967 0 : dist_4x16[first_rec_block_index] = dist_4x8[start_index] + dist_4x8[start_index + 4];
8968 :
8969 0 : if (dist_4x16[first_rec_block_index] < best_dist_4x16[first_rec_block_index]) {
8970 0 : best_mv_4x16[first_rec_block_index] = curr_mv;
8971 0 : best_dist_4x16[first_rec_block_index] = dist_4x16[first_rec_block_index];
8972 : }
8973 :
8974 0 : dist_4x16[second_rec_block_index] = dist_4x8[start_index + 1] + dist_4x8[start_index + 5];
8975 :
8976 0 : if (dist_4x16[second_rec_block_index] < best_dist_4x16[second_rec_block_index]) {
8977 0 : best_mv_4x16[second_rec_block_index] = curr_mv;
8978 0 : best_dist_4x16[second_rec_block_index] = dist_4x16[second_rec_block_index];
8979 : }
8980 :
8981 0 : dist_4x16[third_rec_block_index] = dist_4x8[start_index + 2] + dist_4x8[start_index + 6];
8982 :
8983 0 : if (dist_4x16[third_rec_block_index] < best_dist_4x16[third_rec_block_index]) {
8984 0 : best_mv_4x16[third_rec_block_index] = curr_mv;
8985 0 : best_dist_4x16[third_rec_block_index] = dist_4x16[third_rec_block_index];
8986 : }
8987 :
8988 0 : dist_4x16[fourth_rec_block_index] = dist_4x8[start_index + 3] + dist_4x8[start_index + 7];
8989 :
8990 0 : if (dist_4x16[fourth_rec_block_index] < best_dist_4x16[fourth_rec_block_index]) {
8991 0 : best_mv_4x16[fourth_rec_block_index] = curr_mv;
8992 0 : best_dist_4x16[fourth_rec_block_index] = dist_4x16[fourth_rec_block_index];
8993 : }
8994 :
8995 : //16x8
8996 0 : first_rec_block_index = (block_8x8_index - 3) / 2;
8997 0 : second_rec_block_index = first_rec_block_index + 1;
8998 :
8999 0 : dist_16x8[first_rec_block_index] = dist_8x8[block_8x8_index - 3] + dist_8x8[block_8x8_index - 2];
9000 :
9001 0 : if (dist_16x8[first_rec_block_index] < best_dist_16x8[first_rec_block_index]) {
9002 0 : best_mv_16x8[first_rec_block_index] = curr_mv;
9003 0 : best_dist_16x8[first_rec_block_index] = dist_16x8[first_rec_block_index];
9004 : }
9005 :
9006 0 : dist_16x8[second_rec_block_index] = dist_8x8[block_8x8_index - 1] + dist_8x8[block_8x8_index];
9007 :
9008 0 : if (dist_16x8[second_rec_block_index] < best_dist_16x8[second_rec_block_index]) {
9009 0 : best_mv_16x8[second_rec_block_index] = curr_mv;
9010 0 : best_dist_16x8[second_rec_block_index] = dist_16x8[second_rec_block_index];
9011 : }
9012 :
9013 : //8x16
9014 0 : dist_8x16[first_rec_block_index] = dist_8x8[block_8x8_index - 3] + dist_8x8[block_8x8_index - 1];
9015 :
9016 0 : if (dist_8x16[first_rec_block_index] < best_dist_8x16[first_rec_block_index]) {
9017 0 : best_mv_8x16[first_rec_block_index] = curr_mv;
9018 0 : best_dist_8x16[first_rec_block_index] = dist_8x16[first_rec_block_index];
9019 : }
9020 :
9021 0 : dist_8x16[second_rec_block_index] = dist_8x8[block_8x8_index - 2] + dist_8x8[block_8x8_index];
9022 :
9023 0 : if (dist_8x16[second_rec_block_index] < best_dist_8x16[second_rec_block_index]) {
9024 0 : best_mv_8x16[second_rec_block_index] = curr_mv;
9025 0 : best_dist_8x16[second_rec_block_index] = dist_8x16[second_rec_block_index];
9026 : }
9027 :
9028 : //16x16
9029 0 : square_block_index = (block_8x8_index - 3) / 4;
9030 :
9031 0 : dist_16x16[square_block_index] = dist_16x8[first_rec_block_index] + dist_16x8[second_rec_block_index];
9032 :
9033 0 : if (dist_16x16[square_block_index] < best_dist_16x16[square_block_index]) {
9034 0 : best_mv_16x16[square_block_index] = curr_mv;
9035 0 : best_dist_16x16[square_block_index] = dist_16x16[square_block_index];
9036 : }
9037 0 : }
9038 : /***************************************************************
9039 : * in_loop_me_32xN_Nx32_distortion_update
9040 : * Compute the distortion at a given position and update
9041 : * the best for the supported 32xN and Nx32 blocks
9042 : ***************************************************************/
9043 0 : static void in_loop_me_32xN_Nx32_distortion_update(
9044 : //Inputs
9045 : uint32_t curr_mv,
9046 : uint32_t block_16x16_index,
9047 : uint32_t block_8x8_index,
9048 : uint32_t *dist_16x8,
9049 : uint32_t *dist_8x16,
9050 : uint32_t *dist_16x16,
9051 : //Outputs
9052 : uint32_t *best_mv_32x8,
9053 : uint32_t *best_dist_32x8,
9054 : uint32_t *dist_32x8,
9055 : uint32_t *best_mv_32x16,
9056 : uint32_t *best_dist_32x16,
9057 : uint32_t *dist_32x16,
9058 : uint32_t *best_mv_8x32,
9059 : uint32_t *best_dist_8x32,
9060 : uint32_t *dist_8x32,
9061 : uint32_t *best_mv_16x32,
9062 : uint32_t *best_dist_16x32,
9063 : uint32_t *dist_16x32,
9064 : uint32_t *best_mv_32x32,
9065 : uint32_t *best_dist_32x32,
9066 : uint32_t *dist_32x32
9067 : )
9068 : {
9069 : uint32_t square_block_index;
9070 : uint32_t first_rec_block_index;
9071 : uint32_t second_rec_block_index;
9072 : uint32_t third_rec_block_index;
9073 : uint32_t fourth_rec_block_index;
9074 : uint32_t start_index;
9075 :
9076 : //32x8
9077 0 : first_rec_block_index = (block_16x16_index - 3);
9078 0 : second_rec_block_index = first_rec_block_index + 1;
9079 0 : third_rec_block_index = second_rec_block_index + 1;
9080 0 : fourth_rec_block_index = third_rec_block_index + 1;
9081 :
9082 0 : start_index = (block_8x8_index - 15) >> 1;
9083 :
9084 0 : dist_32x8[first_rec_block_index] = dist_16x8[start_index] + dist_16x8[start_index + 2];
9085 :
9086 0 : if (dist_32x8[first_rec_block_index] < best_dist_32x8[first_rec_block_index]) {
9087 0 : best_mv_32x8[first_rec_block_index] = curr_mv;
9088 0 : best_dist_32x8[first_rec_block_index] = dist_32x8[first_rec_block_index];
9089 : }
9090 :
9091 0 : dist_32x8[second_rec_block_index] = dist_16x8[start_index + 1] + dist_16x8[start_index + 3];
9092 :
9093 0 : if (dist_32x8[second_rec_block_index] < best_dist_32x8[second_rec_block_index]) {
9094 0 : best_mv_32x8[second_rec_block_index] = curr_mv;
9095 0 : best_dist_32x8[second_rec_block_index] = dist_32x8[second_rec_block_index];
9096 : }
9097 :
9098 0 : dist_32x8[third_rec_block_index] = dist_16x8[start_index + 4] + dist_16x8[start_index + 6];
9099 :
9100 0 : if (dist_32x8[third_rec_block_index] < best_dist_32x8[third_rec_block_index]) {
9101 0 : best_mv_32x8[third_rec_block_index] = curr_mv;
9102 0 : best_dist_32x8[third_rec_block_index] = dist_32x8[third_rec_block_index];
9103 : }
9104 :
9105 0 : dist_32x8[fourth_rec_block_index] = dist_16x8[start_index + 5] + dist_16x8[start_index + 7];
9106 :
9107 0 : if (dist_32x8[fourth_rec_block_index] < best_dist_32x8[fourth_rec_block_index]) {
9108 0 : best_mv_32x8[fourth_rec_block_index] = curr_mv;
9109 0 : best_dist_32x8[fourth_rec_block_index] = dist_32x8[fourth_rec_block_index];
9110 : }
9111 :
9112 : //8x32
9113 :
9114 0 : dist_8x32[first_rec_block_index] = dist_8x16[start_index] + dist_8x16[start_index + 4];
9115 :
9116 0 : if (dist_8x32[first_rec_block_index] < best_dist_8x32[first_rec_block_index]) {
9117 0 : best_mv_8x32[first_rec_block_index] = curr_mv;
9118 0 : best_dist_8x32[first_rec_block_index] = dist_8x32[first_rec_block_index];
9119 : }
9120 :
9121 0 : dist_8x32[second_rec_block_index] = dist_8x16[start_index + 1] + dist_8x16[start_index + 5];
9122 :
9123 0 : if (dist_8x32[second_rec_block_index] < best_dist_8x32[second_rec_block_index]) {
9124 0 : best_mv_8x32[second_rec_block_index] = curr_mv;
9125 0 : best_dist_8x32[second_rec_block_index] = dist_8x32[second_rec_block_index];
9126 : }
9127 :
9128 0 : dist_8x32[third_rec_block_index] = dist_8x16[start_index + 2] + dist_8x16[start_index + 6];
9129 :
9130 0 : if (dist_8x32[third_rec_block_index] < best_dist_8x32[third_rec_block_index]) {
9131 0 : best_mv_8x32[third_rec_block_index] = curr_mv;
9132 0 : best_dist_8x32[third_rec_block_index] = dist_8x32[third_rec_block_index];
9133 : }
9134 :
9135 0 : dist_8x32[fourth_rec_block_index] = dist_8x16[start_index + 3] + dist_8x16[start_index + 7];
9136 :
9137 0 : if (dist_8x32[fourth_rec_block_index] < best_dist_8x32[fourth_rec_block_index]) {
9138 0 : best_mv_8x32[fourth_rec_block_index] = curr_mv;
9139 0 : best_dist_8x32[fourth_rec_block_index] = dist_8x32[fourth_rec_block_index];
9140 : }
9141 :
9142 : //32x16
9143 0 : first_rec_block_index = (block_16x16_index - 3) / 2;
9144 0 : second_rec_block_index = first_rec_block_index + 1;
9145 :
9146 0 : dist_32x16[first_rec_block_index] = dist_16x16[block_16x16_index - 3] + dist_16x16[block_16x16_index - 2];
9147 :
9148 0 : if (dist_32x16[first_rec_block_index] < best_dist_32x16[first_rec_block_index]) {
9149 0 : best_mv_32x16[first_rec_block_index] = curr_mv;
9150 0 : best_dist_32x16[first_rec_block_index] = dist_32x16[first_rec_block_index];
9151 : }
9152 :
9153 0 : dist_32x16[second_rec_block_index] = dist_16x16[block_16x16_index - 1] + dist_16x16[block_16x16_index];
9154 :
9155 0 : if (dist_32x16[second_rec_block_index] < best_dist_32x16[second_rec_block_index]) {
9156 0 : best_mv_32x16[second_rec_block_index] = curr_mv;
9157 0 : best_dist_32x16[second_rec_block_index] = dist_32x16[second_rec_block_index];
9158 : }
9159 :
9160 : //16x32
9161 0 : dist_16x32[first_rec_block_index] = dist_16x16[block_16x16_index - 3] + dist_16x16[block_16x16_index - 1];
9162 :
9163 0 : if (dist_16x32[first_rec_block_index] < best_dist_16x32[first_rec_block_index]) {
9164 0 : best_mv_16x32[first_rec_block_index] = curr_mv;
9165 0 : best_dist_16x32[first_rec_block_index] = dist_16x32[first_rec_block_index];
9166 : }
9167 :
9168 0 : dist_16x32[second_rec_block_index] = dist_16x16[block_16x16_index - 2] + dist_16x16[block_16x16_index];
9169 :
9170 0 : if (dist_16x32[second_rec_block_index] < best_dist_16x32[second_rec_block_index]) {
9171 0 : best_mv_16x32[second_rec_block_index] = curr_mv;
9172 0 : best_dist_16x32[second_rec_block_index] = dist_16x32[second_rec_block_index];
9173 : }
9174 :
9175 : //32x32
9176 0 : square_block_index = (block_16x16_index - 3) / 4;
9177 :
9178 0 : dist_32x32[square_block_index] = dist_32x16[first_rec_block_index] + dist_32x16[second_rec_block_index];
9179 :
9180 0 : if (dist_32x32[square_block_index] < best_dist_32x32[square_block_index]) {
9181 0 : best_mv_32x32[square_block_index] = curr_mv;
9182 0 : best_dist_32x32[square_block_index] = dist_32x32[square_block_index];
9183 : }
9184 0 : }
9185 : /***************************************************************
9186 : * in_loop_me_64xN_Nx64_distortion_update
9187 : * Compute the distortion at a given position and update
9188 : * the best for the supported 64xN and Nx64 blocks
9189 : ***************************************************************/
9190 0 : static void in_loop_me_64xN_Nx64_distortion_update(
9191 : uint32_t curr_mv,
9192 : uint32_t block_32x32_index,
9193 : uint32_t block_16x16_index,
9194 : uint32_t *dist_32x16,
9195 : uint32_t *dist_16x32,
9196 : uint32_t *dist_32x32,
9197 : uint32_t *best_mv_64x16,
9198 : uint32_t *best_dist_64x16,
9199 : uint32_t *dist_64x16,
9200 : uint32_t *best_mv_64x32,
9201 : uint32_t *best_dist_64x32,
9202 : uint32_t *dist_64x32,
9203 : uint32_t *best_mv_16x64,
9204 : uint32_t *best_dist_16x64,
9205 : uint32_t *dist_16x64,
9206 : uint32_t *best_mv_32x64,
9207 : uint32_t *best_dist_32x64,
9208 : uint32_t *dist_32x64,
9209 : uint32_t *best_mv_64x64,
9210 : uint32_t *best_dist_64x64,
9211 : uint32_t *dist_64x64)
9212 : {
9213 : uint32_t square_block_index;
9214 : uint32_t first_rec_block_index;
9215 : uint32_t second_rec_block_index;
9216 : uint32_t third_rec_block_index;
9217 : uint32_t fourth_rec_block_index;
9218 : uint32_t start_index;
9219 : UNUSED(dist_64x32);
9220 : UNUSED(dist_32x64);
9221 : //64x16
9222 0 : first_rec_block_index = (block_32x32_index - 3);
9223 0 : second_rec_block_index = first_rec_block_index + 1;
9224 0 : third_rec_block_index = second_rec_block_index + 1;
9225 0 : fourth_rec_block_index = third_rec_block_index + 1;
9226 :
9227 0 : start_index = (block_16x16_index - 15) >> 1;
9228 :
9229 0 : dist_64x16[first_rec_block_index] = dist_32x16[start_index] + dist_32x16[start_index + 2];
9230 :
9231 0 : if (dist_64x16[first_rec_block_index] < best_dist_64x16[first_rec_block_index]) {
9232 0 : best_mv_64x16[first_rec_block_index] = curr_mv;
9233 0 : best_dist_64x16[first_rec_block_index] = dist_64x16[first_rec_block_index];
9234 : }
9235 :
9236 0 : dist_64x16[second_rec_block_index] = dist_32x16[start_index + 1] + dist_32x16[start_index + 3];
9237 :
9238 0 : if (dist_64x16[second_rec_block_index] < best_dist_64x16[second_rec_block_index]) {
9239 0 : best_mv_64x16[second_rec_block_index] = curr_mv;
9240 0 : best_dist_64x16[second_rec_block_index] = dist_64x16[second_rec_block_index];
9241 : }
9242 :
9243 0 : dist_64x16[third_rec_block_index] = dist_32x16[start_index + 4] + dist_32x16[start_index + 6];
9244 :
9245 0 : if (dist_64x16[third_rec_block_index] < best_dist_64x16[third_rec_block_index]) {
9246 0 : best_mv_64x16[third_rec_block_index] = curr_mv;
9247 0 : best_dist_64x16[third_rec_block_index] = dist_64x16[third_rec_block_index];
9248 : }
9249 :
9250 0 : dist_64x16[fourth_rec_block_index] = dist_32x16[start_index + 5] + dist_32x16[start_index + 7];
9251 :
9252 0 : if (dist_64x16[fourth_rec_block_index] < best_dist_64x16[fourth_rec_block_index]) {
9253 0 : best_mv_64x16[fourth_rec_block_index] = curr_mv;
9254 0 : best_dist_64x16[fourth_rec_block_index] = dist_64x16[fourth_rec_block_index];
9255 : }
9256 :
9257 : //16x64
9258 :
9259 0 : dist_16x64[first_rec_block_index] = dist_16x32[start_index] + dist_16x32[start_index + 4];
9260 :
9261 0 : if (dist_16x64[first_rec_block_index] < best_dist_16x64[first_rec_block_index]) {
9262 0 : best_mv_16x64[first_rec_block_index] = curr_mv;
9263 0 : best_dist_16x64[first_rec_block_index] = dist_16x64[first_rec_block_index];
9264 : }
9265 :
9266 0 : dist_16x64[second_rec_block_index] = dist_16x32[start_index + 1] + dist_16x32[start_index + 5];
9267 :
9268 0 : if (dist_16x64[second_rec_block_index] < best_dist_16x64[second_rec_block_index]) {
9269 0 : best_mv_16x64[second_rec_block_index] = curr_mv;
9270 0 : best_dist_16x64[second_rec_block_index] = dist_16x64[second_rec_block_index];
9271 : }
9272 :
9273 0 : dist_16x64[third_rec_block_index] = dist_16x32[start_index + 2] + dist_16x32[start_index + 6];
9274 :
9275 0 : if (dist_16x64[third_rec_block_index] < best_dist_16x64[third_rec_block_index]) {
9276 0 : best_mv_16x64[third_rec_block_index] = curr_mv;
9277 0 : best_dist_16x64[third_rec_block_index] = dist_16x64[third_rec_block_index];
9278 : }
9279 :
9280 0 : dist_16x64[fourth_rec_block_index] = dist_16x32[start_index + 3] + dist_16x32[start_index + 7];
9281 :
9282 0 : if (dist_16x64[fourth_rec_block_index] < best_dist_16x64[fourth_rec_block_index]) {
9283 0 : best_mv_16x64[fourth_rec_block_index] = curr_mv;
9284 0 : best_dist_16x64[fourth_rec_block_index] = dist_16x64[fourth_rec_block_index];
9285 : }
9286 :
9287 : //64x32
9288 0 : first_rec_block_index = (block_32x32_index - 3) / 2;
9289 0 : second_rec_block_index = first_rec_block_index + 1;
9290 :
9291 0 : dist_64x32[first_rec_block_index] = dist_32x32[block_32x32_index - 3] + dist_32x32[block_32x32_index - 2];
9292 :
9293 0 : if (dist_64x32[first_rec_block_index] < best_dist_64x32[first_rec_block_index]) {
9294 0 : best_mv_64x32[first_rec_block_index] = curr_mv;
9295 0 : best_dist_64x32[first_rec_block_index] = dist_64x32[first_rec_block_index];
9296 : }
9297 :
9298 0 : dist_64x32[second_rec_block_index] = dist_32x32[block_32x32_index - 1] + dist_32x32[block_32x32_index];
9299 :
9300 0 : if (dist_64x32[second_rec_block_index] < best_dist_64x32[second_rec_block_index]) {
9301 0 : best_mv_64x32[second_rec_block_index] = curr_mv;
9302 0 : best_dist_64x32[second_rec_block_index] = dist_64x32[second_rec_block_index];
9303 : }
9304 :
9305 : //32x64
9306 0 : dist_32x64[first_rec_block_index] = dist_32x32[block_32x32_index - 3] + dist_32x32[block_32x32_index - 1];
9307 :
9308 0 : if (dist_32x64[first_rec_block_index] < best_dist_32x64[first_rec_block_index]) {
9309 0 : best_mv_32x64[first_rec_block_index] = curr_mv;
9310 0 : best_dist_32x64[first_rec_block_index] = dist_32x64[first_rec_block_index];
9311 : }
9312 :
9313 0 : dist_32x64[second_rec_block_index] = dist_32x32[block_32x32_index - 2] + dist_32x32[block_32x32_index];
9314 :
9315 0 : if (dist_32x64[second_rec_block_index] < best_dist_32x64[second_rec_block_index]) {
9316 0 : best_mv_32x64[second_rec_block_index] = curr_mv;
9317 0 : best_dist_32x64[second_rec_block_index] = dist_32x64[second_rec_block_index];
9318 : }
9319 :
9320 : //64x64
9321 0 : square_block_index = (block_32x32_index - 3) / 4;
9322 :
9323 0 : dist_64x64[square_block_index] = dist_64x32[first_rec_block_index] + dist_64x32[second_rec_block_index];
9324 :
9325 0 : if (dist_64x64[square_block_index] < best_dist_64x64[square_block_index]) {
9326 0 : best_mv_64x64[square_block_index] = curr_mv;
9327 0 : best_dist_64x64[square_block_index] = dist_64x64[square_block_index];
9328 : }
9329 0 : }
9330 :
9331 : /***************************************************************
9332 : * in_loop_me_128xN_Nx128_distortion_update
9333 : * Compute the distortion at a given position and update
9334 : * the best for the supported 128xN and Nx128 blocks
9335 : ***************************************************************/
9336 0 : static void in_loop_me_128xN_Nx128_distortion_update(
9337 : uint32_t curr_mv,
9338 : uint32_t block_64x64_index,
9339 : uint32_t block_32x32_index,
9340 : uint32_t *dist_64x32,
9341 : uint32_t *dist_32x64,
9342 : uint32_t *dist_64x64,
9343 : uint32_t *best_mv_128x64,
9344 : uint32_t *best_dist_128x64,
9345 : uint32_t *dist_128x64,
9346 : uint32_t *best_mv_64x128,
9347 : uint32_t *best_dist_64x128,
9348 : uint32_t *dist_64x128,
9349 : uint32_t *best_mv_128x128,
9350 : uint32_t *best_dist_128x128,
9351 : uint32_t *dist_128x128
9352 : )
9353 : {
9354 : uint32_t square_block_index;
9355 : uint32_t first_rec_block_index;
9356 : uint32_t second_rec_block_index;
9357 : UNUSED(block_32x32_index);
9358 : UNUSED(dist_64x32);
9359 : UNUSED(dist_32x64);
9360 : //128x64
9361 0 : first_rec_block_index = (block_64x64_index - 3) / 4;
9362 0 : second_rec_block_index = first_rec_block_index + 1;
9363 :
9364 0 : dist_128x64[first_rec_block_index] = dist_64x64[block_64x64_index - 3] + dist_64x64[block_64x64_index - 2];
9365 :
9366 0 : if (dist_128x64[first_rec_block_index] < best_dist_128x64[first_rec_block_index]) {
9367 0 : best_mv_128x64[first_rec_block_index] = curr_mv;
9368 0 : best_dist_128x64[first_rec_block_index] = dist_128x64[first_rec_block_index];
9369 : }
9370 :
9371 0 : dist_128x64[second_rec_block_index] = dist_64x64[block_64x64_index - 1] + dist_64x64[block_64x64_index];
9372 :
9373 0 : if (dist_128x64[second_rec_block_index] < best_dist_128x64[second_rec_block_index]) {
9374 0 : best_mv_128x64[second_rec_block_index] = curr_mv;
9375 0 : best_dist_128x64[second_rec_block_index] = dist_128x64[second_rec_block_index];
9376 : }
9377 :
9378 : //64x128
9379 0 : dist_64x128[first_rec_block_index] = dist_64x64[block_64x64_index - 3] + dist_64x64[block_64x64_index - 1];
9380 :
9381 0 : if (dist_64x128[first_rec_block_index] < best_dist_64x128[first_rec_block_index]) {
9382 0 : best_mv_64x128[first_rec_block_index] = curr_mv;
9383 0 : best_dist_64x128[first_rec_block_index] = dist_64x128[first_rec_block_index];
9384 : }
9385 :
9386 0 : dist_64x128[second_rec_block_index] = dist_64x64[block_64x64_index - 2] + dist_64x64[block_64x64_index];
9387 :
9388 0 : if (dist_64x128[second_rec_block_index] < best_dist_64x128[second_rec_block_index]) {
9389 0 : best_mv_64x128[second_rec_block_index] = curr_mv;
9390 0 : best_dist_64x128[second_rec_block_index] = dist_64x128[second_rec_block_index];
9391 : }
9392 :
9393 : //128x128
9394 0 : square_block_index = (block_64x64_index - 3) / 4;
9395 :
9396 0 : *dist_128x128 = dist_128x64[first_rec_block_index] + dist_128x64[second_rec_block_index];
9397 :
9398 0 : if (*dist_128x128 < best_dist_128x128[square_block_index]) {
9399 0 : best_mv_128x128[square_block_index] = curr_mv;
9400 0 : best_dist_128x128[square_block_index] = *dist_128x128;
9401 : }
9402 0 : }
9403 : /***************************************************************
9404 : * in_loop_me_get_search_point_results_block
9405 : * Compute the distortion at a given position
9406 : ***************************************************************/
9407 :
9408 0 : static void in_loop_me_get_search_point_results_block(
9409 : SsMeContext *context_ptr, // input parameter, ME context Ptr, used to get SB Ptr
9410 : uint32_t list_index, // input parameter, reference list index
9411 : uint32_t ref_index,
9412 : int32_t x_search_index, // input parameter, search region position in the horizontal direction, used to derive xMV
9413 : int32_t y_search_index, // input parameter, search region position in the vertical direction, used to derive yMV
9414 : uint32_t number_of_sb_quad)
9415 : {
9416 0 : uint8_t *src_ptr = context_ptr->sb_buffer;
9417 :
9418 : // NADER
9419 0 : uint8_t *ref_ptr = context_ptr->integer_buffer_ptr[list_index][0] + (ME_FILTER_TAP >> 1) + ((ME_FILTER_TAP >> 1) * context_ptr->interpolated_full_stride[list_index][0]);
9420 0 : uint32_t ref_luma_stride = context_ptr->interpolated_full_stride[list_index][0];
9421 0 : uint32_t curr_mv_1 = (((uint16_t)y_search_index) << 18);
9422 0 : uint16_t curr_mv_2 = (((uint16_t)x_search_index << 2));
9423 0 : uint32_t curr_mv = curr_mv_1 | curr_mv_2;
9424 0 : uint32_t *best_dist_4x4 = context_ptr->p_best_sad4x4;
9425 0 : uint32_t *best_mv_4x4 = context_ptr->p_best_mv4x4;
9426 0 : uint32_t *dist_4x4 = context_ptr->p_sad4x4;
9427 0 : uint32_t *best_dist_8x4 = context_ptr->p_best_sad8x4;
9428 0 : uint32_t *best_mv_8x4 = context_ptr->p_best_mv8x4;
9429 0 : uint32_t *dist_8x4 = context_ptr->p_sad8x4;
9430 0 : uint32_t *best_dist_4x8 = context_ptr->p_best_sad4x8;
9431 0 : uint32_t *best_mv_4x8 = context_ptr->p_best_mv4x8;
9432 0 : uint32_t *dist_4x8 = context_ptr->p_sad4x8;
9433 0 : uint32_t *best_dist_8x8 = context_ptr->p_best_sad8x8;
9434 0 : uint32_t *best_mv_8x8 = context_ptr->p_best_mv8x8;
9435 0 : uint32_t *dist_8x8 = context_ptr->p_sad8x8;
9436 0 : uint32_t *best_dist_16x16 = context_ptr->p_best_sad16x16;
9437 0 : uint32_t *best_mv_16x16 = context_ptr->p_best_mv16x16;
9438 0 : uint32_t *dist_16x16 = context_ptr->p_sad16x16;
9439 0 : uint32_t *best_dist_16x8 = context_ptr->p_best_sad16x8;
9440 0 : uint32_t *best_mv_16x8 = context_ptr->p_best_mv16x8;
9441 0 : uint32_t *dist_16x8 = context_ptr->p_sad16x8;
9442 0 : uint32_t *best_dist_16x4 = context_ptr->p_best_sad16x4;
9443 0 : uint32_t *best_mv_16x4 = context_ptr->p_best_mv16x4;
9444 0 : uint32_t *dist_16x4 = context_ptr->p_sad16x4;
9445 0 : uint32_t *best_dist_8x16 = context_ptr->p_best_sad8x16;
9446 0 : uint32_t *best_mv_8x16 = context_ptr->p_best_mv8x16;
9447 0 : uint32_t *dist_8x16 = context_ptr->p_sad8x16;
9448 0 : uint32_t *best_dist_4x16 = context_ptr->p_best_sad4x16;
9449 0 : uint32_t *best_mv_4x16 = context_ptr->p_best_mv4x16;
9450 0 : uint32_t *dist_4x16 = context_ptr->p_sad4x16;
9451 0 : uint32_t *best_dist_32x8 = context_ptr->p_best_sad32x8;
9452 0 : uint32_t *best_mv_32x8 = context_ptr->p_best_mv32x8;
9453 0 : uint32_t *dist_32x8 = context_ptr->p_sad32x8;
9454 0 : uint32_t *best_dist_32x16 = context_ptr->p_best_sad32x16;
9455 0 : uint32_t *best_mv_32x16 = context_ptr->p_best_mv32x16;
9456 0 : uint32_t *dist_32x16 = context_ptr->p_sad32x16;
9457 0 : uint32_t *best_dist_16x32 = context_ptr->p_best_sad16x32;
9458 0 : uint32_t *best_mv_16x32 = context_ptr->p_best_mv16x32;
9459 0 : uint32_t *dist_16x32 = context_ptr->p_sad16x32;
9460 0 : uint32_t *best_dist_8x32 = context_ptr->p_best_sad8x32;
9461 0 : uint32_t *best_mv_8x32 = context_ptr->p_best_mv8x32;
9462 0 : uint32_t *dist_8x32 = context_ptr->p_sad8x32;
9463 0 : uint32_t *best_dist_32x32 = context_ptr->p_best_sad32x32;
9464 0 : uint32_t *best_mv_32x32 = context_ptr->p_best_mv32x32;
9465 0 : uint32_t *dist_32x32 = context_ptr->p_sad32x32;
9466 0 : uint32_t *best_dist_64x16 = context_ptr->p_best_sad64x16;
9467 0 : uint32_t *best_mv_64x16 = context_ptr->p_best_mv64x16;
9468 0 : uint32_t *dist_64x16 = context_ptr->p_sad64x16;
9469 0 : uint32_t *best_dist_64x32 = context_ptr->p_best_sad64x32;
9470 0 : uint32_t *best_mv_64x32 = context_ptr->p_best_mv64x32;
9471 0 : uint32_t *dist_64x32 = context_ptr->p_sad64x32;
9472 0 : uint32_t *best_dist_32x64 = context_ptr->p_best_sad32x64;
9473 0 : uint32_t *best_mv_32x64 = context_ptr->p_best_mv32x64;
9474 0 : uint32_t *dist_32x64 = context_ptr->p_sad32x64;
9475 0 : uint32_t *best_dist_16x64 = context_ptr->p_best_sad16x64;
9476 0 : uint32_t *best_mv_16x64 = context_ptr->p_best_mv16x64;
9477 0 : uint32_t *dist_16x64 = context_ptr->p_sad16x64;
9478 0 : uint32_t *best_dist_64x64 = context_ptr->p_best_sad64x64;
9479 0 : uint32_t *best_mv_64x64 = context_ptr->p_best_mv64x64;
9480 0 : uint32_t *dist_64x64 = context_ptr->p_sad64x64;
9481 0 : uint32_t *best_dist_128x64 = context_ptr->p_best_sad128x64;
9482 0 : uint32_t *best_mv_128x64 = context_ptr->p_best_mv128x64;
9483 0 : uint32_t *dist_128x64 = context_ptr->p_sad128x64;
9484 0 : uint32_t *best_dist_64x128 = context_ptr->p_best_sad64x128;
9485 0 : uint32_t *best_mv_64x128 = context_ptr->p_best_mv64x128;
9486 0 : uint32_t *dist_64x128 = context_ptr->p_sad64x128;
9487 0 : uint32_t *best_dist_128x128 = context_ptr->p_best_sad128x128;
9488 0 : uint32_t *best_mv_128x128 = context_ptr->p_best_mv128x128;
9489 0 : uint32_t dist_128x128 = context_ptr->p_sad128x128;
9490 0 : const uint32_t src_stride = context_ptr->sb_buffer_stride;
9491 0 : uint32_t block_64x64_index = 0;
9492 0 : uint32_t block_32x32_index = 0;
9493 : uint32_t block_16x16_index;
9494 : uint32_t block_8x8_index;
9495 : uint32_t block_4x4_index;
9496 : uint32_t block_64x64_x;
9497 : uint32_t block_32x32_x;
9498 : uint32_t block_16x16_x;
9499 : uint32_t block_8x8_x;
9500 : uint32_t block_4x4_x;
9501 : uint32_t block_64x64_y;
9502 : uint32_t block_32x32_y;
9503 : uint32_t block_16x16_y;
9504 : uint32_t block_8x8_y;
9505 : uint32_t block_4x4_y;
9506 0 : uint32_t quad_offset = number_of_sb_quad > 1 ? 2 : 1;
9507 :
9508 0 : for (block_64x64_y = 0; block_64x64_y < quad_offset; block_64x64_y++) {
9509 0 : for (block_64x64_x = 0; block_64x64_x < quad_offset; block_64x64_x++) {
9510 0 : block_64x64_index = block_64x64_x + (block_64x64_y * 2);
9511 :
9512 0 : for (block_32x32_y = 0; block_32x32_y < 2; block_32x32_y++) {
9513 0 : for (block_32x32_x = 0; block_32x32_x < 2; block_32x32_x++) {
9514 0 : block_32x32_index = (block_64x64_index * 4) + block_32x32_x + (block_32x32_y * 2);
9515 :
9516 0 : for (block_16x16_y = 0; block_16x16_y < 2; block_16x16_y++) {
9517 0 : for (block_16x16_x = 0; block_16x16_x < 2; block_16x16_x++) {
9518 0 : block_16x16_index = (block_32x32_index * 4) + block_16x16_x + (block_16x16_y * 2);
9519 :
9520 0 : for (block_8x8_y = 0; block_8x8_y < 2; block_8x8_y++) {
9521 0 : for (block_8x8_x = 0; block_8x8_x < 2; block_8x8_x++) {
9522 0 : block_8x8_index = (block_16x16_index * 4) + block_8x8_x + (block_8x8_y * 2);
9523 :
9524 0 : for (block_4x4_y = 0; block_4x4_y < 2; block_4x4_y++) {
9525 0 : for (block_4x4_x = 0; block_4x4_x < 2; block_4x4_x++) {
9526 0 : block_4x4_index = (block_8x8_index * 4) + block_4x4_x + (block_4x4_y * 2);
9527 :
9528 0 : uint32_t block_4x4_addr_y = (block_64x64_y * 64) + (block_32x32_y * 32) + (block_16x16_y * 16) + (block_8x8_y * 8) + (block_4x4_y * 4);
9529 0 : uint32_t block_4x4_addr_x = (block_64x64_x * 64) + (block_32x32_x * 32) + (block_16x16_x * 16) + (block_8x8_x * 8) + (block_4x4_x * 4);
9530 0 : uint32_t block_4x4_addr_src = (block_4x4_addr_y * src_stride) + block_4x4_addr_x;
9531 0 : uint32_t block_4x4_addr_ref = ref_index + ((block_4x4_addr_y * ref_luma_stride) + block_4x4_addr_x);
9532 :
9533 : //4x4
9534 0 : dist_4x4[block_4x4_index] = eb_sad_kernel4x4(
9535 0 : src_ptr + block_4x4_addr_src,
9536 : src_stride,
9537 0 : ref_ptr + block_4x4_addr_ref,
9538 : ref_luma_stride,
9539 : 4,
9540 : 4);
9541 :
9542 0 : if (dist_4x4[block_4x4_index] < best_dist_4x4[block_4x4_index]) {
9543 0 : best_mv_4x4[block_4x4_index] = curr_mv;
9544 0 : best_dist_4x4[block_4x4_index] = dist_4x4[block_4x4_index];
9545 : }
9546 : }
9547 : }
9548 :
9549 : // Nader - Full-pel search for depth 4 blocks
9550 0 : in_loop_me_8xN_Nx8_distortion_update(
9551 : //Inputs
9552 : curr_mv,
9553 : block_4x4_index,
9554 : dist_4x4,
9555 : //Outputs
9556 : best_mv_8x4,
9557 : best_dist_8x4,
9558 : dist_8x4,
9559 : best_mv_4x8,
9560 : best_dist_4x8,
9561 : dist_4x8,
9562 : best_mv_8x8,
9563 : best_dist_8x8,
9564 : dist_8x8);
9565 : }
9566 : }
9567 :
9568 : // Nader - Full-pel search for depth 3 blocks
9569 0 : in_loop_me_16xN_Nx16_distortion_update(
9570 : //Inputs
9571 : curr_mv,
9572 : block_8x8_index,
9573 : block_4x4_index,
9574 : dist_8x4,
9575 : dist_4x8,
9576 : dist_8x8,
9577 : //Outputs
9578 : best_mv_16x4,
9579 : best_dist_16x4,
9580 : dist_16x4,
9581 : best_mv_16x8,
9582 : best_dist_16x8,
9583 : dist_16x8,
9584 : best_mv_4x16,
9585 : best_dist_4x16,
9586 : dist_4x16,
9587 : best_mv_8x16,
9588 : best_dist_8x16,
9589 : dist_8x16,
9590 : best_mv_16x16,
9591 : best_dist_16x16,
9592 : dist_16x16);
9593 : }
9594 : }
9595 :
9596 : // Nader - Full-pel search for depth 2 blocks
9597 0 : in_loop_me_32xN_Nx32_distortion_update(
9598 : //Inputs
9599 : curr_mv,
9600 : block_16x16_index,
9601 : block_8x8_index,
9602 : dist_16x8,
9603 : dist_8x16,
9604 : dist_16x16,
9605 : //Outputs
9606 : best_mv_32x8,
9607 : best_dist_32x8,
9608 : dist_32x8,
9609 : best_mv_32x16,
9610 : best_dist_32x16,
9611 : dist_32x16,
9612 : best_mv_8x32,
9613 : best_dist_8x32,
9614 : dist_8x32,
9615 : best_mv_16x32,
9616 : best_dist_16x32,
9617 : dist_16x32,
9618 : best_mv_32x32,
9619 : best_dist_32x32,
9620 : dist_32x32);
9621 : }
9622 : }
9623 :
9624 : // Nader - Full-pel search for depth 1 blocks
9625 0 : in_loop_me_64xN_Nx64_distortion_update(
9626 : //Inputs
9627 : curr_mv,
9628 : block_32x32_index,
9629 : block_16x16_index,
9630 : dist_32x16,
9631 : dist_16x32,
9632 : dist_32x32,
9633 : //Outputs
9634 : best_mv_64x16,
9635 : best_dist_64x16,
9636 : dist_64x16,
9637 : best_mv_64x32,
9638 : best_dist_64x32,
9639 : dist_64x32,
9640 : best_mv_16x64,
9641 : best_dist_16x64,
9642 : dist_16x64,
9643 : best_mv_32x64,
9644 : best_dist_32x64,
9645 : dist_32x64,
9646 : best_mv_64x64,
9647 : best_dist_64x64,
9648 : dist_64x64);
9649 : }
9650 : }
9651 :
9652 0 : if (number_of_sb_quad > 1) {
9653 : // Nader - Full-pel search for depth 0 blocks
9654 0 : in_loop_me_128xN_Nx128_distortion_update(
9655 : //Inputs
9656 : curr_mv,
9657 : block_64x64_index,
9658 : block_32x32_index,
9659 : dist_64x32,
9660 : dist_32x64,
9661 : dist_64x64,
9662 : //Outputs
9663 : best_mv_128x64,
9664 : best_dist_128x64,
9665 : dist_128x64,
9666 : best_mv_64x128,
9667 : best_dist_64x128,
9668 : dist_64x128,
9669 : best_mv_128x128,
9670 : best_dist_128x128,
9671 : &dist_128x128);
9672 : }
9673 0 : }
9674 :
9675 : /***************************************************************
9676 : * in_loop_me_fullpel_search_sblock
9677 : * perform the full pel search for the whole super-block
9678 : ***************************************************************/
9679 0 : static void in_loop_me_fullpel_search_sblock(
9680 : SsMeContext *context_ptr,
9681 : uint32_t list_index,
9682 : int16_t x_search_area_origin,
9683 : int16_t y_search_area_origin,
9684 : uint32_t search_area_width,
9685 : uint32_t search_area_height,
9686 : uint32_t number_of_sb_quad)
9687 : {
9688 : uint32_t x_search_index, y_search_index;
9689 :
9690 0 : for (y_search_index = 0; y_search_index < search_area_height; y_search_index++) {
9691 0 : for (x_search_index = 0; x_search_index < search_area_width; x_search_index++) {
9692 0 : in_loop_me_get_search_point_results_block(
9693 : context_ptr,
9694 : list_index,
9695 0 : x_search_index + y_search_index * context_ptr->interpolated_full_stride[list_index][0],
9696 0 : (int32_t)x_search_index + x_search_area_origin,
9697 0 : (int32_t)y_search_index + y_search_area_origin,
9698 : number_of_sb_quad);
9699 : }
9700 : }
9701 0 : }
9702 :
9703 0 : static void in_loop_me_context_dctor(EbPtr p)
9704 : {
9705 0 : SsMeContext* obj = (SsMeContext*)p;
9706 : uint32_t listIndex;
9707 : uint32_t refPicIndex;
9708 :
9709 0 : for (listIndex = 0; listIndex < MAX_NUM_OF_REF_PIC_LIST; listIndex++) {
9710 0 : for (refPicIndex = 0; refPicIndex < MAX_REF_IDX; refPicIndex++) {
9711 0 : EB_FREE_ARRAY(obj->integer_buffer[listIndex][refPicIndex]);
9712 0 : EB_FREE_ARRAY(obj->pos_b_buffer[listIndex][refPicIndex]);
9713 0 : EB_FREE_ARRAY(obj->pos_h_buffer[listIndex][refPicIndex]);
9714 0 : EB_FREE_ARRAY(obj->pos_j_buffer[listIndex][refPicIndex]);
9715 : }
9716 : }
9717 :
9718 0 : EB_FREE_ARRAY(obj->avctemp_buffer);
9719 0 : EB_FREE_ALIGNED(obj->sb_buffer);
9720 0 : }
9721 : /***************************************************************
9722 : * in_loop_me_context_ctor
9723 : * in-loop motion estimation construtor
9724 : ***************************************************************/
9725 0 : EbErrorType in_loop_me_context_ctor(
9726 : SsMeContext *object_ptr)
9727 : {
9728 : uint32_t listIndex;
9729 : uint32_t refPicIndex;
9730 :
9731 0 : object_ptr->dctor = in_loop_me_context_dctor;
9732 :
9733 : // Intermediate LCU-sized buffer to retain the input samples
9734 0 : object_ptr->sb_buffer_stride = MAX_SB_SIZE;
9735 :
9736 0 : EB_MALLOC_ALIGNED(object_ptr->sb_buffer, MAX_SB_SIZE * object_ptr->sb_buffer_stride);
9737 :
9738 0 : EB_MEMSET(object_ptr->sb_buffer, 0, sizeof(uint8_t) * MAX_SB_SIZE * object_ptr->sb_buffer_stride);
9739 :
9740 0 : object_ptr->interpolated_stride = MAX_SEARCH_AREA_WIDTH;
9741 :
9742 : // EB_MALLOC(EbBitFraction *, object_ptr->mvd_bits_array, sizeof(EbBitFraction) * NUMBER_OF_MVD_CASES, EB_N_PTR);
9743 : // 15 intermediate buffers to retain the interpolated reference samples
9744 :
9745 : // 0 1 2 3
9746 : // 0 A a b c
9747 : // 1 d e f g
9748 : // 2 h i j k
9749 : // 3 n p q r
9750 :
9751 : // _____________
9752 : // | |
9753 : // --I samples --> |Interpolation|-- O samples -->
9754 : // | ____________|
9755 :
9756 : // Before Interpolation: 2 x 3
9757 : // I I
9758 : // I I
9759 : // I I
9760 :
9761 : // After 1-D Horizontal Interpolation: (2 + 1) x 3 - a, b, and c
9762 : // O I O I O
9763 : // O I O I O
9764 : // O I O I O
9765 :
9766 : // After 1-D Vertical Interpolation: 2 x (3 + 1) - d, h, and n
9767 : // O O
9768 : // I I
9769 : // O O
9770 : // I I
9771 : // O O
9772 : // I I
9773 : // O O
9774 :
9775 : // After 2-D (Horizontal/Vertical) Interpolation: (2 + 1) x (3 + 1) - e, f, g, i, j, k, n, p, q, and r
9776 : // O O O
9777 : // I I
9778 : // O O O
9779 : // I I
9780 : // O O O
9781 : // I I
9782 : // O O O
9783 :
9784 0 : for (listIndex = 0; listIndex < MAX_NUM_OF_REF_PIC_LIST; listIndex++) {
9785 0 : for (refPicIndex = 0; refPicIndex < MAX_REF_IDX; refPicIndex++) {
9786 0 : EB_MALLOC_ARRAY(object_ptr->integer_buffer[listIndex][refPicIndex], object_ptr->interpolated_stride * MAX_SEARCH_AREA_HEIGHT);
9787 :
9788 0 : EB_MALLOC_ARRAY(object_ptr->pos_b_buffer[listIndex][refPicIndex], object_ptr->interpolated_stride * MAX_SEARCH_AREA_HEIGHT);
9789 :
9790 0 : EB_MALLOC_ARRAY(object_ptr->pos_h_buffer[listIndex][refPicIndex], object_ptr->interpolated_stride * MAX_SEARCH_AREA_HEIGHT);
9791 :
9792 0 : EB_MALLOC_ARRAY(object_ptr->pos_j_buffer[listIndex][refPicIndex], object_ptr->interpolated_stride * MAX_SEARCH_AREA_HEIGHT);
9793 : }
9794 : }
9795 :
9796 0 : EB_MALLOC_ARRAY(object_ptr->avctemp_buffer, object_ptr->interpolated_stride * MAX_SEARCH_AREA_HEIGHT);
9797 :
9798 0 : return EB_ErrorNone;
9799 : }
9800 :
9801 : /***************************************************************
9802 : * in_loop_me_interpolate_search_region_avc_style
9803 : * performs AVC-style interpolation for the whole Search Region
9804 : ***************************************************************/
9805 0 : static void in_loop_me_interpolate_search_region_avc_style(
9806 : SsMeContext *context_ptr, // input/output parameter, ME context ptr, used to get/set interpolated search area Ptr
9807 : uint32_t listIndex, // Refrence picture list index
9808 : uint8_t *searchRegionBuffer, // input parameter, search region index, used to point to reference samples
9809 : uint32_t lumaStride, // input parameter, reference Picture stride
9810 : uint32_t search_area_width, // input parameter, search area width
9811 : uint32_t search_area_height, // input parameter, search area height
9812 : uint32_t inputBitDepth) // input parameter, input sample bit depth
9813 : {
9814 : // 0 1 2 3
9815 : // 0 A a b c
9816 : // 1 d e f g
9817 : // 2 h i j k
9818 : // 3 n p q r
9819 :
9820 : // Position Frac-pos Y Frac-pos X Horizontal filter Vertical filter
9821 : // A 0 0 - -
9822 : // a 0 1 F0 -
9823 : // b 0 2 F1 -
9824 : // c 0 3 F2 -
9825 : // d 1 0 - F0
9826 : // e 1 1 F0 F0
9827 : // f 1 2 F1 F0
9828 : // g 1 3 F2 F0
9829 : // h 2 0 - F1
9830 : // i 2 1 F0 F1
9831 : // j 2 2 F1 F1
9832 : // k 2 3 F2 F1
9833 : // n 3 0 - F2
9834 : // p 3 1 F0 F2
9835 : // q 3 2 F1 F2
9836 : // r 3 3 F2 F2
9837 :
9838 : // Start a b c
9839 :
9840 : // The Search area needs to be a multiple of 8 to align with the ASM kernel
9841 : // Also the search area must be oversized by 2 to account for edge conditions
9842 0 : uint32_t searchAreaWidthForAsm = ROUND_UP_MUL_8(search_area_width + 2);
9843 :
9844 : (void)inputBitDepth;
9845 : // Half pel interpolation of the search region using f1 -> pos_b_buffer
9846 0 : if (searchAreaWidthForAsm) {
9847 0 : avc_style_luma_interpolation_filter(
9848 0 : searchRegionBuffer - (ME_FILTER_TAP >> 1) * lumaStride - (ME_FILTER_TAP >> 1) + 1,
9849 : lumaStride,
9850 : context_ptr->pos_b_buffer[listIndex][0],
9851 : context_ptr->interpolated_stride,
9852 : searchAreaWidthForAsm,
9853 : search_area_height + ME_FILTER_TAP,
9854 : context_ptr->avctemp_buffer,
9855 : EB_FALSE,
9856 : 2,
9857 : 2);
9858 : }
9859 :
9860 : // Half pel interpolation of the search region using f1 -> pos_h_buffer
9861 0 : if (searchAreaWidthForAsm) {
9862 0 : avc_style_luma_interpolation_filter(
9863 0 : searchRegionBuffer - (ME_FILTER_TAP >> 1) * lumaStride - 1 + lumaStride,
9864 : lumaStride,
9865 : context_ptr->pos_h_buffer[listIndex][0],
9866 : context_ptr->interpolated_stride,
9867 : searchAreaWidthForAsm,
9868 : search_area_height + 1,
9869 : context_ptr->avctemp_buffer,
9870 : EB_FALSE,
9871 : 2,
9872 : 8);
9873 : }
9874 :
9875 0 : if (searchAreaWidthForAsm) {
9876 : // Half pel interpolation of the search region using f1 -> pos_j_buffer
9877 0 : avc_style_luma_interpolation_filter(
9878 0 : context_ptr->pos_b_buffer[listIndex][0] + context_ptr->interpolated_stride,
9879 : context_ptr->interpolated_stride,
9880 : context_ptr->pos_j_buffer[listIndex][0],
9881 : context_ptr->interpolated_stride,
9882 : searchAreaWidthForAsm,
9883 : search_area_height + 1,
9884 : context_ptr->avctemp_buffer,
9885 : EB_FALSE,
9886 : 2,
9887 : 8);
9888 : }
9889 :
9890 0 : return;
9891 : }
9892 :
9893 : /***************************************************************
9894 : * in_loop_me_halfpel_refinement_block
9895 : * performs Half Pel refinement for one block
9896 : ***************************************************************/
9897 0 : static void in_loop_me_halfpel_refinement_block(
9898 : SequenceControlSet *sequence_control_set_ptr, // input parameter, Sequence control set Ptr
9899 : SsMeContext *context_ptr, // input parameter, ME context Ptr, used to get SB Ptr
9900 : uint32_t block_index_in_sb_buffer, // input parameter, PU origin, used to point to source samples
9901 : uint8_t *pos_b_buffer, // input parameter, position "b" interpolated search area Ptr
9902 : uint8_t *pos_h_buffer, // input parameter, position "h" interpolated search area Ptr
9903 : uint8_t *pos_j_buffer, // input parameter, position "j" interpolated search area Ptr
9904 : uint32_t pu_width, // input parameter, PU width
9905 : uint32_t pu_height, // input parameter, PU height
9906 : int16_t x_search_area_origin, // input parameter, search area origin in the horizontal direction, used to point to reference samples
9907 : int16_t y_search_area_origin, // input parameter, search area origin in the vertical direction, used to point to reference samples
9908 : uint32_t *pBestSad,
9909 : uint32_t *pBestMV,
9910 : uint8_t *psubPelDirection
9911 : )
9912 : {
9913 0 : EncodeContext *encode_context_ptr = sequence_control_set_ptr->encode_context_ptr;
9914 :
9915 : int32_t searchRegionIndex;
9916 0 : uint64_t bestHalfSad = 0;
9917 0 : uint64_t distortionLeftPosition = 0;
9918 0 : uint64_t distortionRightPosition = 0;
9919 0 : uint64_t distortionTopPosition = 0;
9920 0 : uint64_t distortionBottomPosition = 0;
9921 0 : uint64_t distortionTopLeftPosition = 0;
9922 0 : uint64_t distortionTopRightPosition = 0;
9923 0 : uint64_t distortionBottomLeftPosition = 0;
9924 0 : uint64_t distortionBottomRightPosition = 0;
9925 :
9926 : int16_t xMvHalf[8];
9927 : int16_t yMvHalf[8];
9928 :
9929 0 : int16_t x_mv = _MVXT(*pBestMV);
9930 0 : int16_t y_mv = _MVYT(*pBestMV);
9931 0 : int16_t xSearchIndex = (x_mv >> 2) - x_search_area_origin;
9932 0 : int16_t ySearchIndex = (y_mv >> 2) - y_search_area_origin;
9933 :
9934 : (void)sequence_control_set_ptr;
9935 : (void)encode_context_ptr;
9936 :
9937 : //TODO : remove these, and update the MV by just shifts
9938 :
9939 0 : xMvHalf[0] = x_mv - 2; // L position
9940 0 : xMvHalf[1] = x_mv + 2; // R position
9941 0 : xMvHalf[2] = x_mv; // T position
9942 0 : xMvHalf[3] = x_mv; // B position
9943 0 : xMvHalf[4] = x_mv - 2; // TL position
9944 0 : xMvHalf[5] = x_mv + 2; // TR position
9945 0 : xMvHalf[6] = x_mv + 2; // BR position
9946 0 : xMvHalf[7] = x_mv - 2; // BL position
9947 :
9948 0 : yMvHalf[0] = y_mv; // L position
9949 0 : yMvHalf[1] = y_mv; // R position
9950 0 : yMvHalf[2] = y_mv - 2; // T position
9951 0 : yMvHalf[3] = y_mv + 2; // B position
9952 0 : yMvHalf[4] = y_mv - 2; // TL position
9953 0 : yMvHalf[5] = y_mv - 2; // TR position
9954 0 : yMvHalf[6] = y_mv + 2; // BR position
9955 0 : yMvHalf[7] = y_mv + 2; // BL position
9956 :
9957 : // L position
9958 0 : searchRegionIndex = xSearchIndex + (int16_t)context_ptr->interpolated_stride * ySearchIndex;
9959 0 : distortionLeftPosition = (nxm_sad_kernel(&(context_ptr->sb_src_ptr[block_index_in_sb_buffer]), context_ptr->sb_src_stride << 1, &(pos_b_buffer[searchRegionIndex]), context_ptr->interpolated_stride << 1, pu_height >> 1, pu_width)) << 1;
9960 0 : if (distortionLeftPosition < *pBestSad) {
9961 0 : *pBestSad = (uint32_t)distortionLeftPosition;
9962 0 : *pBestMV = ((uint16_t)yMvHalf[0] << 16) | ((uint16_t)xMvHalf[0]);
9963 : }
9964 :
9965 : // R position
9966 0 : searchRegionIndex++;
9967 0 : distortionRightPosition = (nxm_sad_kernel(&(context_ptr->sb_src_ptr[block_index_in_sb_buffer]), context_ptr->sb_src_stride << 1, &(pos_b_buffer[searchRegionIndex]), context_ptr->interpolated_stride << 1, pu_height >> 1, pu_width)) << 1;
9968 :
9969 0 : if (distortionRightPosition < *pBestSad) {
9970 0 : *pBestSad = (uint32_t)distortionRightPosition;
9971 0 : *pBestMV = ((uint16_t)yMvHalf[1] << 16) | ((uint16_t)xMvHalf[1]);
9972 : }
9973 :
9974 : // T position
9975 0 : searchRegionIndex = xSearchIndex + (int16_t)context_ptr->interpolated_stride * ySearchIndex;
9976 0 : distortionTopPosition = (nxm_sad_kernel(&(context_ptr->sb_src_ptr[block_index_in_sb_buffer]), context_ptr->sb_src_stride << 1, &(pos_h_buffer[searchRegionIndex]), context_ptr->interpolated_stride << 1, pu_height >> 1, pu_width)) << 1;
9977 0 : if (distortionTopPosition < *pBestSad) {
9978 0 : *pBestSad = (uint32_t)distortionTopPosition;
9979 0 : *pBestMV = ((uint16_t)yMvHalf[2] << 16) | ((uint16_t)xMvHalf[2]);
9980 : }
9981 :
9982 : // B position
9983 0 : searchRegionIndex += (int16_t)context_ptr->interpolated_stride;
9984 0 : distortionBottomPosition = (nxm_sad_kernel(&(context_ptr->sb_src_ptr[block_index_in_sb_buffer]), context_ptr->sb_src_stride << 1, &(pos_h_buffer[searchRegionIndex]), context_ptr->interpolated_stride << 1, pu_height >> 1, pu_width)) << 1;
9985 0 : if (distortionBottomPosition < *pBestSad) {
9986 0 : *pBestSad = (uint32_t)distortionBottomPosition;
9987 0 : *pBestMV = ((uint16_t)yMvHalf[3] << 16) | ((uint16_t)xMvHalf[3]);
9988 : }
9989 :
9990 : //TL position
9991 0 : searchRegionIndex = xSearchIndex + (int16_t)context_ptr->interpolated_stride * ySearchIndex;
9992 0 : distortionTopLeftPosition = (nxm_sad_kernel(&(context_ptr->sb_src_ptr[block_index_in_sb_buffer]), context_ptr->sb_src_stride << 1, &(pos_j_buffer[searchRegionIndex]), context_ptr->interpolated_stride << 1, pu_height >> 1, pu_width)) << 1;
9993 0 : if (distortionTopLeftPosition < *pBestSad) {
9994 0 : *pBestSad = (uint32_t)distortionTopLeftPosition;
9995 0 : *pBestMV = ((uint16_t)yMvHalf[4] << 16) | ((uint16_t)xMvHalf[4]);
9996 : }
9997 :
9998 : //TR position
9999 0 : searchRegionIndex++;
10000 0 : distortionTopRightPosition = (nxm_sad_kernel(&(context_ptr->sb_src_ptr[block_index_in_sb_buffer]), context_ptr->sb_src_stride << 1, &(pos_j_buffer[searchRegionIndex]), context_ptr->interpolated_stride << 1, pu_height >> 1, pu_width)) << 1;
10001 0 : if (distortionTopRightPosition < *pBestSad) {
10002 0 : *pBestSad = (uint32_t)distortionTopRightPosition;
10003 0 : *pBestMV = ((uint16_t)yMvHalf[5] << 16) | ((uint16_t)xMvHalf[5]);
10004 : }
10005 :
10006 : //BR position
10007 0 : searchRegionIndex += (int16_t)context_ptr->interpolated_stride;
10008 0 : distortionBottomRightPosition = (nxm_sad_kernel(&(context_ptr->sb_src_ptr[block_index_in_sb_buffer]), context_ptr->sb_src_stride << 1, &(pos_j_buffer[searchRegionIndex]), context_ptr->interpolated_stride << 1, pu_height >> 1, pu_width)) << 1;
10009 0 : if (distortionBottomRightPosition < *pBestSad) {
10010 0 : *pBestSad = (uint32_t)distortionBottomRightPosition;
10011 0 : *pBestMV = ((uint16_t)yMvHalf[6] << 16) | ((uint16_t)xMvHalf[6]);
10012 : }
10013 :
10014 : //BL position
10015 0 : searchRegionIndex--;
10016 0 : distortionBottomLeftPosition = (nxm_sad_kernel(&(context_ptr->sb_src_ptr[block_index_in_sb_buffer]), context_ptr->sb_src_stride << 1, &(pos_j_buffer[searchRegionIndex]), context_ptr->interpolated_stride << 1, pu_height >> 1, pu_width)) << 1;
10017 0 : if (distortionBottomLeftPosition < *pBestSad) {
10018 0 : *pBestSad = (uint32_t)distortionBottomLeftPosition;
10019 0 : *pBestMV = ((uint16_t)yMvHalf[7] << 16) | ((uint16_t)xMvHalf[7]);
10020 : }
10021 :
10022 0 : bestHalfSad = MIN(distortionLeftPosition, MIN(distortionRightPosition, MIN(distortionTopPosition, MIN(distortionBottomPosition, MIN(distortionTopLeftPosition, MIN(distortionTopRightPosition, MIN(distortionBottomLeftPosition, distortionBottomRightPosition)))))));
10023 :
10024 0 : if (bestHalfSad == distortionLeftPosition)
10025 0 : *psubPelDirection = LEFT_POSITION;
10026 0 : else if (bestHalfSad == distortionRightPosition)
10027 0 : *psubPelDirection = RIGHT_POSITION;
10028 0 : else if (bestHalfSad == distortionTopPosition)
10029 0 : *psubPelDirection = TOP_POSITION;
10030 0 : else if (bestHalfSad == distortionBottomPosition)
10031 0 : *psubPelDirection = BOTTOM_POSITION;
10032 0 : else if (bestHalfSad == distortionTopLeftPosition)
10033 0 : *psubPelDirection = TOP_LEFT_POSITION;
10034 0 : else if (bestHalfSad == distortionTopRightPosition)
10035 0 : *psubPelDirection = TOP_RIGHT_POSITION;
10036 0 : else if (bestHalfSad == distortionBottomLeftPosition)
10037 0 : *psubPelDirection = BOTTOM_LEFT_POSITION;
10038 0 : else if (bestHalfSad == distortionBottomRightPosition)
10039 0 : *psubPelDirection = BOTTOM_RIGHT_POSITION;
10040 0 : return;
10041 : }
10042 :
10043 : /***************************************************************
10044 : * in_loop_me_halfpel_search_sblock
10045 : * performs Half Pel refinement
10046 : ***************************************************************/
10047 0 : void in_loop_me_halfpel_search_sblock(
10048 : SequenceControlSet *sequence_control_set_ptr, // input parameter, Sequence control set Ptr
10049 : SsMeContext *context_ptr, // input/output parameter, ME context Ptr, used to get/update ME results
10050 : uint8_t *pos_b_buffer, // input parameter, position "b" interpolated search area Ptr
10051 : uint8_t *pos_h_buffer, // input parameter, position "h" interpolated search area Ptr
10052 : uint8_t *pos_j_buffer, // input parameter, position "j" interpolated search area Ptr
10053 : int16_t x_search_area_origin, // input parameter, search area origin in the horizontal direction, used to point to reference samples
10054 : int16_t y_search_area_origin) // input parameter, search area origin in the vertical direction, used to point to reference samples
10055 : {
10056 : uint32_t idx;
10057 : uint32_t block_index;
10058 : uint32_t block_shift_x;
10059 : uint32_t block_shift_y;
10060 : uint32_t block_index_in_sb_buffer;
10061 : uint32_t posb_buffer_index;
10062 : uint32_t posh_buffer_index;
10063 : uint32_t posj_buffer_index;
10064 :
10065 0 : uint32_t block_offset = 0;
10066 0 : uint32_t x_offset = 0;
10067 0 : uint32_t y_offset = 0;
10068 0 : uint32_t quad_index = 0;
10069 0 : uint32_t number_of_sb_quad = context_ptr->sb_size == BLOCK_128X128 ? 4 : 1;
10070 :
10071 : // 4x4 [256 4x4 blocks]
10072 :
10073 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
10074 0 : for (block_index = 0; block_index < 256; ++block_index) {
10075 0 : block_offset = (quad_index * 256);
10076 0 : x_offset = (quad_index & 0x01) << 6;
10077 0 : y_offset = (quad_index >> 1) << 6;
10078 0 : idx = tab4x4[block_index] + block_offset;
10079 0 : block_shift_x = ((block_index & 0xf) << 2) + x_offset;
10080 0 : block_shift_y = ((block_index >> 4) << 2) + y_offset;
10081 :
10082 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10083 :
10084 0 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10085 0 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10086 0 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10087 :
10088 0 : in_loop_me_halfpel_refinement_block(
10089 : sequence_control_set_ptr,
10090 : context_ptr,
10091 : block_index_in_sb_buffer,
10092 : &(pos_b_buffer[posb_buffer_index]),
10093 : &(pos_h_buffer[posh_buffer_index]),
10094 : &(pos_j_buffer[posj_buffer_index]),
10095 : 4,
10096 : 4,
10097 : x_search_area_origin,
10098 : y_search_area_origin,
10099 0 : &context_ptr->p_best_sad4x4[idx],
10100 0 : &context_ptr->p_best_mv4x4[idx],
10101 : &context_ptr->psub_pel_direction4x4[idx]);
10102 : }
10103 : }
10104 :
10105 : // 8x4 [128 8x4 blocks]
10106 :
10107 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
10108 0 : for (block_index = 0; block_index < 128; ++block_index) {
10109 0 : block_offset = (quad_index * 128);
10110 0 : x_offset = (quad_index & 0x01) << 6;
10111 0 : y_offset = (quad_index >> 1) << 6;
10112 0 : idx = tab8x4[block_index] + block_offset;
10113 0 : block_shift_x = ((block_index & 0x07) << 3) + x_offset;;
10114 0 : block_shift_y = ((block_index >> 3) << 2) + y_offset;;
10115 :
10116 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10117 :
10118 0 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10119 0 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10120 0 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10121 :
10122 0 : in_loop_me_halfpel_refinement_block(
10123 : sequence_control_set_ptr,
10124 : context_ptr,
10125 : block_index_in_sb_buffer,
10126 : &(pos_b_buffer[posb_buffer_index]),
10127 : &(pos_h_buffer[posh_buffer_index]),
10128 : &(pos_j_buffer[posj_buffer_index]),
10129 : 8,
10130 : 4,
10131 : x_search_area_origin,
10132 : y_search_area_origin,
10133 0 : &context_ptr->p_best_sad8x4[idx],
10134 0 : &context_ptr->p_best_mv8x4[idx],
10135 : &context_ptr->psub_pel_direction8x4[idx]);
10136 : }
10137 : }
10138 :
10139 : // 4x8 [128 4x8 blocks]
10140 :
10141 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
10142 0 : for (block_index = 0; block_index < 128; ++block_index) {
10143 0 : block_offset = (quad_index * 128);
10144 0 : x_offset = (quad_index & 0x01) << 6;
10145 0 : y_offset = (quad_index >> 1) << 6;
10146 0 : idx = tab4x8[block_index] + block_offset;
10147 0 : block_shift_x = ((block_index & 0xf) << 2) + x_offset;
10148 0 : block_shift_y = ((block_index >> 4) << 3) + y_offset;
10149 :
10150 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10151 :
10152 0 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10153 0 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10154 0 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10155 :
10156 0 : in_loop_me_halfpel_refinement_block(
10157 : sequence_control_set_ptr,
10158 : context_ptr,
10159 : block_index_in_sb_buffer,
10160 : &(pos_b_buffer[posb_buffer_index]),
10161 : &(pos_h_buffer[posh_buffer_index]),
10162 : &(pos_j_buffer[posj_buffer_index]),
10163 : 4,
10164 : 8,
10165 : x_search_area_origin,
10166 : y_search_area_origin,
10167 0 : &context_ptr->p_best_sad4x8[idx],
10168 0 : &context_ptr->p_best_mv4x8[idx],
10169 : &context_ptr->psub_pel_direction4x8[idx]);
10170 : }
10171 : }
10172 :
10173 : // 8x8 [64 8x8 blocks]
10174 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
10175 0 : for (block_index = 0; block_index < 64; ++block_index) {
10176 0 : block_offset = (quad_index * 64);
10177 0 : x_offset = (quad_index & 0x01) << 6;
10178 0 : y_offset = (quad_index >> 1) << 6;
10179 0 : idx = tab8x8[block_index] + block_offset;
10180 0 : block_shift_x = ((block_index & 0x07) << 3) + x_offset;
10181 0 : block_shift_y = ((block_index >> 3) << 3) + y_offset;
10182 :
10183 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10184 :
10185 0 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10186 0 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10187 0 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10188 :
10189 0 : in_loop_me_halfpel_refinement_block(
10190 : sequence_control_set_ptr,
10191 : context_ptr,
10192 : block_index_in_sb_buffer,
10193 : &(pos_b_buffer[posb_buffer_index]),
10194 : &(pos_h_buffer[posh_buffer_index]),
10195 : &(pos_j_buffer[posj_buffer_index]),
10196 : 8,
10197 : 8,
10198 : x_search_area_origin,
10199 : y_search_area_origin,
10200 0 : &context_ptr->p_best_sad8x8[idx],
10201 0 : &context_ptr->p_best_mv8x8[idx],
10202 : &context_ptr->psub_pel_direction8x8[idx]);
10203 : }
10204 : }
10205 :
10206 : // 16x8 [32 partitions]
10207 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
10208 0 : for (block_index = 0; block_index < 32; ++block_index) {
10209 0 : block_offset = (quad_index * 32);
10210 0 : x_offset = (quad_index & 0x01) << 6;
10211 0 : y_offset = (quad_index >> 1) << 6;
10212 0 : idx = tab16x8[block_index] + block_offset;
10213 0 : block_shift_x = ((block_index & 0x03) << 4) + x_offset;
10214 0 : block_shift_y = ((block_index >> 2) << 3) + y_offset;
10215 :
10216 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10217 :
10218 0 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10219 0 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10220 0 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10221 :
10222 0 : in_loop_me_halfpel_refinement_block(
10223 : sequence_control_set_ptr,
10224 : context_ptr,
10225 : block_index_in_sb_buffer,
10226 : &(pos_b_buffer[posb_buffer_index]),
10227 : &(pos_h_buffer[posh_buffer_index]),
10228 : &(pos_j_buffer[posj_buffer_index]),
10229 : 16,
10230 : 8,
10231 : x_search_area_origin,
10232 : y_search_area_origin,
10233 0 : &context_ptr->p_best_sad16x8[idx],
10234 0 : &context_ptr->p_best_mv16x8[idx],
10235 : &context_ptr->psub_pel_direction16x8[idx]);
10236 : }
10237 : }
10238 :
10239 : // 8x16 [32 partitions]
10240 :
10241 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
10242 0 : for (block_index = 0; block_index < 32; ++block_index) {
10243 0 : block_offset = (quad_index * 32);
10244 0 : x_offset = (quad_index & 0x01) << 6;
10245 0 : y_offset = (quad_index >> 1) << 6;
10246 0 : idx = tab8x16[block_index] + block_offset;
10247 0 : block_shift_x = ((block_index & 0x07) << 3) + x_offset;
10248 0 : block_shift_y = ((block_index >> 3) << 4) + y_offset;
10249 :
10250 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10251 :
10252 0 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10253 0 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10254 0 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10255 :
10256 0 : in_loop_me_halfpel_refinement_block(
10257 : sequence_control_set_ptr,
10258 : context_ptr,
10259 : block_index_in_sb_buffer,
10260 : &(pos_b_buffer[posb_buffer_index]),
10261 : &(pos_h_buffer[posh_buffer_index]),
10262 : &(pos_j_buffer[posj_buffer_index]),
10263 : 8,
10264 : 16,
10265 : x_search_area_origin,
10266 : y_search_area_origin,
10267 0 : &context_ptr->p_best_sad8x16[idx],
10268 0 : &context_ptr->p_best_mv8x16[idx],
10269 : &context_ptr->psub_pel_direction8x16[idx]);
10270 : }
10271 : }
10272 :
10273 : // 32x8 [16 partitions]
10274 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
10275 0 : for (block_index = 0; block_index < 16; ++block_index) {
10276 0 : block_offset = (quad_index * 16);
10277 0 : x_offset = (quad_index & 0x01) << 6;
10278 0 : y_offset = (quad_index >> 1) << 6;
10279 0 : idx = tab32x8[block_index] + block_offset;
10280 0 : block_shift_x = ((block_index & 0x01) << 5) + x_offset;
10281 0 : block_shift_y = ((block_index >> 1) << 3) + y_offset;
10282 :
10283 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10284 :
10285 0 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10286 0 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10287 0 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10288 :
10289 0 : in_loop_me_halfpel_refinement_block(
10290 : sequence_control_set_ptr,
10291 : context_ptr,
10292 : block_index_in_sb_buffer,
10293 : &(pos_b_buffer[posb_buffer_index]),
10294 : &(pos_h_buffer[posh_buffer_index]),
10295 : &(pos_j_buffer[posj_buffer_index]),
10296 : 32,
10297 : 8,
10298 : x_search_area_origin,
10299 : y_search_area_origin,
10300 0 : &context_ptr->p_best_sad32x8[idx],
10301 0 : &context_ptr->p_best_mv32x8[idx],
10302 : &context_ptr->psub_pel_direction32x8[idx]);
10303 : }
10304 : }
10305 :
10306 : // 8x32 [16 partitions]
10307 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
10308 0 : for (block_index = 0; block_index < 16; ++block_index) {
10309 0 : block_offset = (quad_index * 16);
10310 0 : idx = tab8x32[block_index] + block_offset;
10311 0 : x_offset = (quad_index & 0x01) << 6;
10312 0 : y_offset = (quad_index >> 1) << 6;
10313 0 : block_shift_x = ((block_index & 0x07) << 3) + x_offset;
10314 0 : block_shift_y = ((block_index >> 3) << 5) + y_offset;
10315 :
10316 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10317 :
10318 0 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10319 0 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10320 0 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10321 :
10322 0 : in_loop_me_halfpel_refinement_block(
10323 : sequence_control_set_ptr,
10324 : context_ptr,
10325 : block_index_in_sb_buffer,
10326 : &(pos_b_buffer[posb_buffer_index]),
10327 : &(pos_h_buffer[posh_buffer_index]),
10328 : &(pos_j_buffer[posj_buffer_index]),
10329 : 8,
10330 : 32,
10331 : x_search_area_origin,
10332 : y_search_area_origin,
10333 0 : &context_ptr->p_best_sad8x32[idx],
10334 0 : &context_ptr->p_best_mv8x32[idx],
10335 : &context_ptr->psub_pel_direction8x32[idx]);
10336 : }
10337 : }
10338 :
10339 : // 16x16 [16 partitions]
10340 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
10341 0 : for (block_index = 0; block_index < 16; ++block_index) {
10342 0 : block_offset = (quad_index * 16);
10343 0 : x_offset = (quad_index & 0x01) << 6;
10344 0 : y_offset = (quad_index >> 1) << 6;
10345 0 : idx = tab16x16[block_index] + block_offset;
10346 0 : block_shift_x = ((block_index & 0x03) << 4) + x_offset;
10347 0 : block_shift_y = ((block_index >> 2) << 4) + y_offset;
10348 :
10349 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10350 :
10351 0 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10352 0 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10353 0 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10354 :
10355 0 : in_loop_me_halfpel_refinement_block(
10356 : sequence_control_set_ptr,
10357 : context_ptr,
10358 : block_index_in_sb_buffer,
10359 : &(pos_b_buffer[posb_buffer_index]),
10360 : &(pos_h_buffer[posh_buffer_index]),
10361 : &(pos_j_buffer[posj_buffer_index]),
10362 : 16,
10363 : 16,
10364 : x_search_area_origin,
10365 : y_search_area_origin,
10366 0 : &context_ptr->p_best_sad16x16[idx],
10367 0 : &context_ptr->p_best_mv16x16[idx],
10368 : &context_ptr->psub_pel_direction16x16[idx]);
10369 : }
10370 : }
10371 :
10372 : // 32x16 [8 partitions]
10373 :
10374 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
10375 0 : for (block_index = 0; block_index < 8; ++block_index) {
10376 0 : block_offset = (quad_index * 8);
10377 0 : x_offset = (quad_index & 0x01) << 6;
10378 0 : y_offset = (quad_index >> 1) << 6;
10379 0 : idx = tab32x16[block_index] + block_offset;
10380 0 : block_shift_x = ((block_index & 0x01) << 5) + x_offset;
10381 0 : block_shift_y = ((block_index >> 1) << 4) + y_offset;
10382 :
10383 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10384 :
10385 0 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10386 0 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10387 0 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10388 :
10389 0 : in_loop_me_halfpel_refinement_block(
10390 : sequence_control_set_ptr,
10391 : context_ptr,
10392 : block_index_in_sb_buffer,
10393 : &(pos_b_buffer[posb_buffer_index]),
10394 : &(pos_h_buffer[posh_buffer_index]),
10395 : &(pos_j_buffer[posj_buffer_index]),
10396 : 32,
10397 : 16,
10398 : x_search_area_origin,
10399 : y_search_area_origin,
10400 0 : &context_ptr->p_best_sad32x16[idx],
10401 0 : &context_ptr->p_best_mv32x16[idx],
10402 : &context_ptr->psub_pel_direction32x16[idx]);
10403 : }
10404 : }
10405 :
10406 : // 16x32 [8 partitions]
10407 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
10408 0 : for (block_index = 0; block_index < 8; ++block_index) {
10409 0 : block_offset = (quad_index * 8);
10410 0 : x_offset = (quad_index & 0x01) << 6;
10411 0 : y_offset = (quad_index >> 1) << 6;
10412 0 : idx = tab16x32[block_index] + block_offset;
10413 0 : block_shift_x = ((block_index & 0x03) << 4) + x_offset;
10414 0 : block_shift_y = ((block_index >> 2) << 5) + y_offset;
10415 :
10416 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10417 :
10418 0 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10419 0 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10420 0 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10421 :
10422 0 : in_loop_me_halfpel_refinement_block(
10423 : sequence_control_set_ptr,
10424 : context_ptr,
10425 : block_index_in_sb_buffer,
10426 : &(pos_b_buffer[posb_buffer_index]),
10427 : &(pos_h_buffer[posh_buffer_index]),
10428 : &(pos_j_buffer[posj_buffer_index]),
10429 : 16,
10430 : 32,
10431 : x_search_area_origin,
10432 : y_search_area_origin,
10433 0 : &context_ptr->p_best_sad16x32[idx],
10434 0 : &context_ptr->p_best_mv16x32[idx],
10435 : &context_ptr->psub_pel_direction16x32[idx]);
10436 : }
10437 : }
10438 :
10439 : // 32x32 [4 partitions]
10440 :
10441 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
10442 0 : for (block_index = 0; block_index < 4; ++block_index) {
10443 0 : block_offset = (quad_index * 4);
10444 0 : x_offset = (quad_index & 0x01) << 6;
10445 0 : y_offset = (quad_index >> 1) << 6;
10446 0 : idx = tab32x32[block_index] + block_offset;
10447 0 : block_shift_x = ((block_index & 0x01) << 5) + x_offset;
10448 0 : block_shift_y = ((block_index >> 1) << 5) + y_offset;
10449 :
10450 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10451 :
10452 0 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10453 0 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10454 0 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10455 :
10456 0 : in_loop_me_halfpel_refinement_block(
10457 : sequence_control_set_ptr,
10458 : context_ptr,
10459 : block_index_in_sb_buffer,
10460 : &(pos_b_buffer[posb_buffer_index]),
10461 : &(pos_h_buffer[posh_buffer_index]),
10462 : &(pos_j_buffer[posj_buffer_index]),
10463 : 32,
10464 : 32,
10465 : x_search_area_origin,
10466 : y_search_area_origin,
10467 0 : &context_ptr->p_best_sad32x32[idx],
10468 0 : &context_ptr->p_best_mv32x32[idx],
10469 : &context_ptr->psub_pel_direction32x32[idx]);
10470 : }
10471 : }
10472 :
10473 : // 64x32 [2 partitions]
10474 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
10475 0 : for (block_index = 0; block_index < 2; ++block_index) {
10476 0 : block_offset = (quad_index * 2);
10477 0 : x_offset = (quad_index & 0x01) << 6;
10478 0 : y_offset = (quad_index >> 1) << 6;
10479 0 : idx = tab64x32[block_index] + block_offset;
10480 0 : block_shift_x = x_offset;
10481 0 : block_shift_y = (block_index << 5) + y_offset;
10482 :
10483 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10484 :
10485 0 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10486 0 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10487 0 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10488 :
10489 0 : in_loop_me_halfpel_refinement_block(
10490 : sequence_control_set_ptr,
10491 : context_ptr,
10492 : block_index_in_sb_buffer,
10493 : &(pos_b_buffer[posb_buffer_index]),
10494 : &(pos_h_buffer[posh_buffer_index]),
10495 : &(pos_j_buffer[posj_buffer_index]),
10496 : 64,
10497 : 32,
10498 : x_search_area_origin,
10499 : y_search_area_origin,
10500 0 : &context_ptr->p_best_sad64x32[idx],
10501 0 : &context_ptr->p_best_mv64x32[idx],
10502 : &context_ptr->psub_pel_direction64x32[idx]);
10503 : }
10504 : }
10505 :
10506 : // 32x64 [2 partitions]
10507 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
10508 0 : for (block_index = 0; block_index < 2; ++block_index) {
10509 0 : block_offset = (quad_index * 2);
10510 0 : x_offset = (quad_index & 0x01) << 6;
10511 0 : y_offset = (quad_index >> 1) << 6;
10512 0 : idx = tab32x64[block_index] + block_offset;
10513 0 : block_shift_x = (block_index << 5) + x_offset;
10514 0 : block_shift_y = y_offset;
10515 :
10516 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10517 :
10518 0 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10519 0 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10520 0 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10521 :
10522 0 : in_loop_me_halfpel_refinement_block(
10523 : sequence_control_set_ptr,
10524 : context_ptr,
10525 : block_index_in_sb_buffer,
10526 : &(pos_b_buffer[posb_buffer_index]),
10527 : &(pos_h_buffer[posh_buffer_index]),
10528 : &(pos_j_buffer[posj_buffer_index]),
10529 : 32,
10530 : 64,
10531 : x_search_area_origin,
10532 : y_search_area_origin,
10533 0 : &context_ptr->p_best_sad32x64[idx],
10534 0 : &context_ptr->p_best_mv32x64[idx],
10535 : &context_ptr->psub_pel_direction32x64[idx]);
10536 : }
10537 : }
10538 :
10539 : // 64x64 [1 partition]
10540 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
10541 0 : idx = quad_index;
10542 0 : x_offset = (quad_index & 0x01) << 6;
10543 0 : y_offset = (quad_index >> 1) << 6;
10544 0 : block_shift_x = x_offset;
10545 0 : block_shift_y = y_offset;
10546 :
10547 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10548 :
10549 0 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10550 0 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10551 0 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10552 :
10553 0 : in_loop_me_halfpel_refinement_block(
10554 : sequence_control_set_ptr,
10555 : context_ptr,
10556 : block_index_in_sb_buffer,
10557 : &(pos_b_buffer[posb_buffer_index]),
10558 : &(pos_h_buffer[posh_buffer_index]),
10559 : &(pos_j_buffer[posj_buffer_index]),
10560 : 64,
10561 : 64,
10562 : x_search_area_origin,
10563 : y_search_area_origin,
10564 0 : &context_ptr->p_best_sad64x64[idx],
10565 0 : &context_ptr->p_best_mv64x64[idx],
10566 : &context_ptr->psub_pel_direction64x64[idx]);
10567 : }
10568 :
10569 : if (0) {
10570 : // 128x64 [2 partitions]
10571 : for (block_index = 0; block_index < 2; ++block_index) {
10572 : block_shift_x = 0;
10573 : block_shift_y = block_index << 6;
10574 :
10575 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10576 :
10577 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10578 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10579 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10580 :
10581 : in_loop_me_halfpel_refinement_block(
10582 : sequence_control_set_ptr,
10583 : context_ptr,
10584 : block_index_in_sb_buffer,
10585 : &(pos_b_buffer[posb_buffer_index]),
10586 : &(pos_h_buffer[posh_buffer_index]),
10587 : &(pos_j_buffer[posj_buffer_index]),
10588 : 128,
10589 : 64,
10590 : x_search_area_origin,
10591 : y_search_area_origin,
10592 : &context_ptr->p_best_sad128x64[block_index],
10593 : &context_ptr->p_best_mv128x64[block_index],
10594 : &context_ptr->psub_pel_direction128x64[block_index]);
10595 : }
10596 :
10597 : // 64x128 [2 partitions]
10598 : for (block_index = 0; block_index < 2; ++block_index) {
10599 : block_shift_x = block_index << 6;
10600 : block_shift_y = 0;
10601 :
10602 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10603 :
10604 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10605 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10606 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10607 :
10608 : in_loop_me_halfpel_refinement_block(
10609 : sequence_control_set_ptr,
10610 : context_ptr,
10611 : block_index_in_sb_buffer,
10612 : &(pos_b_buffer[posb_buffer_index]),
10613 : &(pos_h_buffer[posh_buffer_index]),
10614 : &(pos_j_buffer[posj_buffer_index]),
10615 : 64,
10616 : 128,
10617 : x_search_area_origin,
10618 : y_search_area_origin,
10619 : &context_ptr->p_best_sad64x128[block_index],
10620 : &context_ptr->p_best_mv64x128[block_index],
10621 : &context_ptr->psub_pel_direction64x128[block_index]);
10622 : }
10623 :
10624 : // 128x128 [1 partition]
10625 : {
10626 : block_index = 0;
10627 : block_shift_x = 0;
10628 : block_shift_y = 0;
10629 :
10630 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10631 :
10632 : posb_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10633 : posh_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10634 : posj_buffer_index = block_shift_x + block_shift_y * context_ptr->interpolated_stride;
10635 :
10636 : in_loop_me_halfpel_refinement_block(
10637 : sequence_control_set_ptr,
10638 : context_ptr,
10639 : block_index_in_sb_buffer,
10640 : &(pos_b_buffer[posb_buffer_index]),
10641 : &(pos_h_buffer[posh_buffer_index]),
10642 : &(pos_j_buffer[posj_buffer_index]),
10643 : 128,
10644 : 128,
10645 : x_search_area_origin,
10646 : y_search_area_origin,
10647 : &context_ptr->p_best_sad128x128[block_index],
10648 : &context_ptr->p_best_mv128x128[block_index],
10649 : &context_ptr->psub_pel_direction128x128);
10650 : }
10651 : }
10652 0 : return;
10653 : }
10654 :
10655 : /***************************************************************
10656 : * in_loop_me_quarterpel_refinement_on_the_fly_block
10657 : * performs Quarter Pel refinement for each block
10658 : ***************************************************************/
10659 0 : static void in_loop_me_quarterpel_refinement_on_the_fly_block(
10660 : SsMeContext *context_ptr, // [IN] ME context Ptr, used to get SB Ptr
10661 : uint32_t block_index_in_sb_buffer, // [IN] PU origin, used to point to source samples
10662 : uint8_t **buf1, // [IN]
10663 : uint32_t *buf1Stride,
10664 : uint8_t **buf2, // [IN]
10665 : uint32_t *buf2Stride,
10666 : uint32_t pu_width, // [IN] PU width
10667 : uint32_t pu_height, // [IN] PU height
10668 : int16_t x_search_area_origin, // [IN] search area origin in the horizontal direction, used to point to reference samples
10669 : int16_t y_search_area_origin, // [IN] search area origin in the vertical direction, used to point to reference samples
10670 : uint32_t *pBestSad,
10671 : uint32_t *pBestMV,
10672 : uint8_t sub_pel_direction)
10673 : {
10674 0 : int16_t x_mv = _MVXT(*pBestMV);
10675 0 : int16_t y_mv = _MVYT(*pBestMV);
10676 :
10677 0 : int16_t xSearchIndex = ((x_mv + 2) >> 2) - x_search_area_origin;
10678 0 : int16_t ySearchIndex = ((y_mv + 2) >> 2) - y_search_area_origin;
10679 :
10680 : uint64_t dist;
10681 :
10682 : EbBool validTL, validT, validTR, validR, validBR, validB, validBL, validL;
10683 :
10684 : int16_t xMvQuarter[8];
10685 : int16_t yMvQuarter[8];
10686 0 : int32_t searchRegionIndex1 = 0;
10687 0 : int32_t searchRegionIndex2 = 0;
10688 :
10689 0 : if ((y_mv & 2) + ((x_mv & 2) >> 1)) {
10690 0 : validTL = (EbBool)(sub_pel_direction == RIGHT_POSITION || sub_pel_direction == BOTTOM_RIGHT_POSITION || sub_pel_direction == BOTTOM_POSITION);
10691 0 : validT = (EbBool)(sub_pel_direction == BOTTOM_RIGHT_POSITION || sub_pel_direction == BOTTOM_POSITION || sub_pel_direction == BOTTOM_LEFT_POSITION);
10692 0 : validTR = (EbBool)(sub_pel_direction == BOTTOM_POSITION || sub_pel_direction == BOTTOM_LEFT_POSITION || sub_pel_direction == LEFT_POSITION);
10693 0 : validR = (EbBool)(sub_pel_direction == BOTTOM_LEFT_POSITION || sub_pel_direction == LEFT_POSITION || sub_pel_direction == TOP_LEFT_POSITION);
10694 0 : validBR = (EbBool)(sub_pel_direction == LEFT_POSITION || sub_pel_direction == TOP_LEFT_POSITION || sub_pel_direction == TOP_POSITION);
10695 0 : validB = (EbBool)(sub_pel_direction == TOP_LEFT_POSITION || sub_pel_direction == TOP_POSITION || sub_pel_direction == TOP_RIGHT_POSITION);
10696 0 : validBL = (EbBool)(sub_pel_direction == TOP_POSITION || sub_pel_direction == TOP_RIGHT_POSITION || sub_pel_direction == RIGHT_POSITION);
10697 0 : validL = (EbBool)(sub_pel_direction == TOP_RIGHT_POSITION || sub_pel_direction == RIGHT_POSITION || sub_pel_direction == BOTTOM_RIGHT_POSITION);
10698 : }
10699 : else {
10700 0 : validTL = (EbBool)(sub_pel_direction == LEFT_POSITION || sub_pel_direction == TOP_LEFT_POSITION || sub_pel_direction == TOP_POSITION);
10701 0 : validT = (EbBool)(sub_pel_direction == TOP_LEFT_POSITION || sub_pel_direction == TOP_POSITION || sub_pel_direction == TOP_RIGHT_POSITION);
10702 0 : validTR = (EbBool)(sub_pel_direction == TOP_POSITION || sub_pel_direction == TOP_RIGHT_POSITION || sub_pel_direction == RIGHT_POSITION);
10703 0 : validR = (EbBool)(sub_pel_direction == TOP_RIGHT_POSITION || sub_pel_direction == RIGHT_POSITION || sub_pel_direction == BOTTOM_RIGHT_POSITION);
10704 0 : validBR = (EbBool)(sub_pel_direction == RIGHT_POSITION || sub_pel_direction == BOTTOM_RIGHT_POSITION || sub_pel_direction == BOTTOM_POSITION);
10705 0 : validB = (EbBool)(sub_pel_direction == BOTTOM_RIGHT_POSITION || sub_pel_direction == BOTTOM_POSITION || sub_pel_direction == BOTTOM_LEFT_POSITION);
10706 0 : validBL = (EbBool)(sub_pel_direction == BOTTOM_POSITION || sub_pel_direction == BOTTOM_LEFT_POSITION || sub_pel_direction == LEFT_POSITION);
10707 0 : validL = (EbBool)(sub_pel_direction == BOTTOM_LEFT_POSITION || sub_pel_direction == LEFT_POSITION || sub_pel_direction == TOP_LEFT_POSITION);
10708 : }
10709 :
10710 0 : xMvQuarter[0] = x_mv - 1; // L position
10711 0 : xMvQuarter[1] = x_mv + 1; // R position
10712 0 : xMvQuarter[2] = x_mv; // T position
10713 0 : xMvQuarter[3] = x_mv; // B position
10714 0 : xMvQuarter[4] = x_mv - 1; // TL position
10715 0 : xMvQuarter[5] = x_mv + 1; // TR position
10716 0 : xMvQuarter[6] = x_mv + 1; // BR position
10717 0 : xMvQuarter[7] = x_mv - 1; // BL position
10718 :
10719 0 : yMvQuarter[0] = y_mv; // L position
10720 0 : yMvQuarter[1] = y_mv; // R position
10721 0 : yMvQuarter[2] = y_mv - 1; // T position
10722 0 : yMvQuarter[3] = y_mv + 1; // B position
10723 0 : yMvQuarter[4] = y_mv - 1; // TL position
10724 0 : yMvQuarter[5] = y_mv - 1; // TR position
10725 0 : yMvQuarter[6] = y_mv + 1; // BR position
10726 0 : yMvQuarter[7] = y_mv + 1; // BL position
10727 :
10728 : // L position
10729 0 : if (validL) {
10730 0 : searchRegionIndex1 = (int32_t)xSearchIndex + (int32_t)buf1Stride[0] * (int32_t)ySearchIndex;
10731 0 : searchRegionIndex2 = (int32_t)xSearchIndex + (int32_t)buf2Stride[0] * (int32_t)ySearchIndex;
10732 :
10733 0 : dist = nxm_sad_avg_kernel(&(context_ptr->sb_buffer[block_index_in_sb_buffer]), context_ptr->sb_src_stride << 1, buf1[0] + searchRegionIndex1, buf1Stride[0] << 1, buf2[0] + searchRegionIndex2, buf2Stride[0] << 1, pu_height >> 1, pu_width);
10734 :
10735 0 : dist = dist << 1;
10736 :
10737 0 : if (dist < *pBestSad) {
10738 0 : *pBestSad = (uint32_t)dist;
10739 0 : *pBestMV = ((uint16_t)yMvQuarter[0] << 16) | ((uint16_t)xMvQuarter[0]);
10740 : }
10741 : }
10742 :
10743 : // R positions
10744 0 : if (validR) {
10745 0 : searchRegionIndex1 = (int32_t)xSearchIndex + (int32_t)buf1Stride[1] * (int32_t)ySearchIndex;
10746 0 : searchRegionIndex2 = (int32_t)xSearchIndex + (int32_t)buf2Stride[1] * (int32_t)ySearchIndex;
10747 0 : dist = nxm_sad_avg_kernel(&(context_ptr->sb_buffer[block_index_in_sb_buffer]), context_ptr->sb_src_stride << 1, buf1[1] + searchRegionIndex1, buf1Stride[1] << 1, buf2[1] + searchRegionIndex2, buf2Stride[1] << 1, pu_height >> 1, pu_width);
10748 0 : dist = dist << 1;
10749 :
10750 0 : if (dist < *pBestSad) {
10751 0 : *pBestSad = (uint32_t)dist;
10752 0 : *pBestMV = ((uint16_t)yMvQuarter[1] << 16) | ((uint16_t)xMvQuarter[1]);
10753 : }
10754 : }
10755 :
10756 : // T position
10757 0 : if (validT) {
10758 0 : searchRegionIndex1 = (int32_t)xSearchIndex + (int32_t)buf1Stride[2] * (int32_t)ySearchIndex;
10759 0 : searchRegionIndex2 = (int32_t)xSearchIndex + (int32_t)buf2Stride[2] * (int32_t)ySearchIndex;
10760 :
10761 0 : dist = nxm_sad_avg_kernel(&(context_ptr->sb_buffer[block_index_in_sb_buffer]), context_ptr->sb_src_stride << 1, buf1[2] + searchRegionIndex1, buf1Stride[2] << 1, buf2[2] + searchRegionIndex2, buf2Stride[2] << 1, pu_height >> 1, pu_width);
10762 0 : dist = dist << 1;
10763 :
10764 0 : if (dist < *pBestSad) {
10765 0 : *pBestSad = (uint32_t)dist;
10766 0 : *pBestMV = ((uint16_t)yMvQuarter[2] << 16) | ((uint16_t)xMvQuarter[2]);
10767 : }
10768 : }
10769 :
10770 : // B position
10771 0 : if (validB) {
10772 0 : searchRegionIndex1 = (int32_t)xSearchIndex + (int32_t)buf1Stride[3] * (int32_t)ySearchIndex;
10773 0 : searchRegionIndex2 = (int32_t)xSearchIndex + (int32_t)buf2Stride[3] * (int32_t)ySearchIndex;
10774 :
10775 0 : dist = nxm_sad_avg_kernel(&(context_ptr->sb_buffer[block_index_in_sb_buffer]), context_ptr->sb_src_stride << 1, buf1[3] + searchRegionIndex1, buf1Stride[3] << 1, buf2[3] + searchRegionIndex2, buf2Stride[3] << 1, pu_height >> 1, pu_width);
10776 0 : dist = dist << 1;
10777 :
10778 0 : if (dist < *pBestSad) {
10779 0 : *pBestSad = (uint32_t)dist;
10780 0 : *pBestMV = ((uint16_t)yMvQuarter[3] << 16) | ((uint16_t)xMvQuarter[3]);
10781 : }
10782 : }
10783 :
10784 : //TL position
10785 0 : if (validTL) {
10786 0 : searchRegionIndex1 = (int32_t)xSearchIndex + (int32_t)buf1Stride[4] * (int32_t)ySearchIndex;
10787 0 : searchRegionIndex2 = (int32_t)xSearchIndex + (int32_t)buf2Stride[4] * (int32_t)ySearchIndex;
10788 :
10789 0 : dist = nxm_sad_avg_kernel(&(context_ptr->sb_buffer[block_index_in_sb_buffer]), context_ptr->sb_src_stride << 1, buf1[4] + searchRegionIndex1, buf1Stride[4] << 1, buf2[4] + searchRegionIndex2, buf2Stride[4] << 1, pu_height >> 1, pu_width);
10790 0 : dist = dist << 1;
10791 :
10792 0 : if (dist < *pBestSad) {
10793 0 : *pBestSad = (uint32_t)dist;
10794 0 : *pBestMV = ((uint16_t)yMvQuarter[4] << 16) | ((uint16_t)xMvQuarter[4]);
10795 : }
10796 : }
10797 :
10798 : //TR position
10799 0 : if (validTR) {
10800 0 : searchRegionIndex1 = (int32_t)xSearchIndex + (int32_t)buf1Stride[5] * (int32_t)ySearchIndex;
10801 0 : searchRegionIndex2 = (int32_t)xSearchIndex + (int32_t)buf2Stride[5] * (int32_t)ySearchIndex;
10802 :
10803 0 : dist = nxm_sad_avg_kernel(&(context_ptr->sb_buffer[block_index_in_sb_buffer]), context_ptr->sb_src_stride << 1, buf1[5] + searchRegionIndex1, buf1Stride[5] << 1, buf2[5] + searchRegionIndex2, buf2Stride[5] << 1, pu_height >> 1, pu_width);
10804 0 : dist = dist << 1;
10805 :
10806 0 : if (dist < *pBestSad) {
10807 0 : *pBestSad = (uint32_t)dist;
10808 0 : *pBestMV = ((uint16_t)yMvQuarter[5] << 16) | ((uint16_t)xMvQuarter[5]);
10809 : }
10810 : }
10811 :
10812 : //BR position
10813 0 : if (validBR) {
10814 0 : searchRegionIndex1 = (int32_t)xSearchIndex + (int32_t)buf1Stride[6] * (int32_t)ySearchIndex;
10815 0 : searchRegionIndex2 = (int32_t)xSearchIndex + (int32_t)buf2Stride[6] * (int32_t)ySearchIndex;
10816 :
10817 0 : dist = nxm_sad_avg_kernel(&(context_ptr->sb_buffer[block_index_in_sb_buffer]), context_ptr->sb_src_stride << 1, buf1[6] + searchRegionIndex1, buf1Stride[6] << 1, buf2[6] + searchRegionIndex2, buf2Stride[6] << 1, pu_height >> 1, pu_width);
10818 0 : dist = dist << 1;
10819 :
10820 0 : if (dist < *pBestSad) {
10821 0 : *pBestSad = (uint32_t)dist;
10822 0 : *pBestMV = ((uint16_t)yMvQuarter[6] << 16) | ((uint16_t)xMvQuarter[6]);
10823 : }
10824 : }
10825 :
10826 : //BL position
10827 0 : if (validBL) {
10828 0 : searchRegionIndex1 = (int32_t)xSearchIndex + (int32_t)buf1Stride[7] * (int32_t)ySearchIndex;
10829 0 : searchRegionIndex2 = (int32_t)xSearchIndex + (int32_t)buf2Stride[7] * (int32_t)ySearchIndex;
10830 :
10831 0 : dist = nxm_sad_avg_kernel(&(context_ptr->sb_buffer[block_index_in_sb_buffer]), context_ptr->sb_src_stride << 1, buf1[7] + searchRegionIndex1, buf1Stride[7] << 1, buf2[7] + searchRegionIndex2, buf2Stride[7] << 1, pu_height >> 1, pu_width);
10832 0 : dist = dist << 1;
10833 :
10834 0 : if (dist < *pBestSad) {
10835 0 : *pBestSad = (uint32_t)dist;
10836 0 : *pBestMV = ((uint16_t)yMvQuarter[7] << 16) | ((uint16_t)xMvQuarter[7]);
10837 : }
10838 : }
10839 :
10840 0 : return;
10841 : }
10842 :
10843 : /***************************************************************
10844 : * set_quarterpel_refinement_inputs_on_the_fly_block
10845 : * determine the 2 half pel buffers to perform the averaging
10846 : * for Quarter Pel Refinement
10847 : ***************************************************************/
10848 0 : static void set_quarterpel_refinement_inputs_on_the_fly_block(
10849 : uint8_t *pos_Full, //[IN] points to A
10850 : uint32_t FullStride, //[IN]
10851 : uint8_t *pos_b, //[IN] points to b
10852 : uint8_t *pos_h, //[IN] points to h
10853 : uint8_t *pos_j, //[IN] points to j
10854 : uint32_t Stride, //[IN]
10855 : int16_t x_mv, //[IN]
10856 : int16_t y_mv, //[IN]
10857 : uint8_t **buf1, //[OUT]
10858 : uint32_t *buf1Stride, //[OUT]
10859 : uint8_t **buf2, //[OUT]
10860 : uint32_t *buf2Stride //[OUT]
10861 : )
10862 : {
10863 0 : uint32_t quarterPelRefinementMethod = (y_mv & 2) + ((x_mv & 2) >> 1);
10864 :
10865 : //for each one of the 8 postions, we need to determine the 2 half pel buffers to do averaging
10866 :
10867 : // A a b c
10868 : // d e f g
10869 : // h i j k
10870 : // n p q r
10871 :
10872 0 : switch (quarterPelRefinementMethod) {
10873 0 : case EB_QUARTER_IN_FULL:
10874 :
10875 0 : /*c=b+A*/ buf1[0] = pos_b; buf1Stride[0] = Stride; buf2[0] = pos_Full; buf2Stride[0] = FullStride;
10876 0 : /*a=A+b*/ buf1[1] = pos_Full; buf1Stride[1] = FullStride; buf2[1] = pos_b + 1; buf2Stride[1] = Stride;
10877 0 : /*n=h+A*/ buf1[2] = pos_h; buf1Stride[2] = Stride; buf2[2] = pos_Full; buf2Stride[2] = FullStride;
10878 0 : /*d=A+h*/ buf1[3] = pos_Full; buf1Stride[3] = FullStride; buf2[3] = pos_h + Stride; buf2Stride[3] = Stride;
10879 0 : /*r=b+h*/ buf1[4] = pos_b; buf1Stride[4] = Stride; buf2[4] = pos_h; buf2Stride[4] = Stride;
10880 0 : /*p=h+b*/ buf1[5] = pos_h; buf1Stride[5] = Stride; buf2[5] = pos_b + 1; buf2Stride[5] = Stride;
10881 0 : /*e=h+b*/ buf1[6] = pos_h + Stride; buf1Stride[6] = Stride; buf2[6] = pos_b + 1; buf2Stride[6] = Stride;
10882 0 : /*g=b+h*/ buf1[7] = pos_b; buf1Stride[7] = Stride; buf2[7] = pos_h + Stride; buf2Stride[7] = Stride;
10883 :
10884 0 : break;
10885 :
10886 0 : case EB_QUARTER_IN_HALF_HORIZONTAL:
10887 :
10888 0 : /*a=A+b*/ buf1[0] = pos_Full - 1; buf1Stride[0] = FullStride; buf2[0] = pos_b; buf2Stride[0] = Stride;
10889 0 : /*c=b+A*/ buf1[1] = pos_b; buf1Stride[1] = Stride; buf2[1] = pos_Full; buf2Stride[1] = FullStride;
10890 0 : /*q=j+b*/ buf1[2] = pos_j; buf1Stride[2] = Stride; buf2[2] = pos_b; buf2Stride[2] = Stride;
10891 0 : /*f=b+j*/ buf1[3] = pos_b; buf1Stride[3] = Stride; buf2[3] = pos_j + Stride; buf2Stride[3] = Stride;
10892 0 : /*p=h+b*/ buf1[4] = pos_h - 1; buf1Stride[4] = Stride; buf2[4] = pos_b; buf2Stride[4] = Stride;
10893 0 : /*r=b+h*/ buf1[5] = pos_b; buf1Stride[5] = Stride; buf2[5] = pos_h; buf2Stride[5] = Stride;
10894 0 : /*g=b+h*/ buf1[6] = pos_b; buf1Stride[6] = Stride; buf2[6] = pos_h + Stride; buf2Stride[6] = Stride;
10895 0 : /*e=h+b*/ buf1[7] = pos_h - 1 + Stride; buf1Stride[7] = Stride; buf2[7] = pos_b; buf2Stride[7] = Stride;
10896 :
10897 0 : break;
10898 :
10899 0 : case EB_QUARTER_IN_HALF_VERTICAL:
10900 :
10901 0 : /*k=j+h*/buf1[0] = pos_j; buf1Stride[0] = Stride; buf2[0] = pos_h; buf2Stride[0] = Stride;
10902 0 : /*i=h+j*/buf1[1] = pos_h; buf1Stride[1] = Stride; buf2[1] = pos_j + 1; buf2Stride[1] = Stride;
10903 0 : /*d=A+h*/buf1[2] = pos_Full - FullStride; buf1Stride[2] = FullStride; buf2[2] = pos_h; buf2Stride[2] = Stride;
10904 0 : /*n=h+A*/buf1[3] = pos_h; buf1Stride[3] = Stride; buf2[3] = pos_Full; buf2Stride[3] = FullStride;
10905 0 : /*g=b+h*/buf1[4] = pos_b - Stride; buf1Stride[4] = Stride; buf2[4] = pos_h; buf2Stride[4] = Stride;
10906 0 : /*e=h+b*/buf1[5] = pos_h; buf1Stride[5] = Stride; buf2[5] = pos_b + 1 - Stride; buf2Stride[5] = Stride;
10907 0 : /*p=h+b*/buf1[6] = pos_h; buf1Stride[6] = Stride; buf2[6] = pos_b + 1; buf2Stride[6] = Stride;
10908 0 : /*r=b+h*/buf1[7] = pos_b; buf1Stride[7] = Stride; buf2[7] = pos_h; buf2Stride[7] = Stride;
10909 :
10910 0 : break;
10911 :
10912 0 : case EB_QUARTER_IN_HALF_DIAGONAL:
10913 :
10914 0 : /*i=h+j*/buf1[0] = pos_h - 1; buf1Stride[0] = Stride; buf2[0] = pos_j; buf2Stride[0] = Stride;
10915 0 : /*k=j+h*/buf1[1] = pos_j; buf1Stride[1] = Stride; buf2[1] = pos_h; buf2Stride[1] = Stride;
10916 0 : /*f=b+j*/buf1[2] = pos_b - Stride; buf1Stride[2] = Stride; buf2[2] = pos_j; buf2Stride[2] = Stride;
10917 0 : /*q=j+b*/buf1[3] = pos_j; buf1Stride[3] = Stride; buf2[3] = pos_b; buf2Stride[3] = Stride;
10918 0 : /*e=h+b*/buf1[4] = pos_h - 1; buf1Stride[4] = Stride; buf2[4] = pos_b - Stride; buf2Stride[4] = Stride;
10919 0 : /*g=b+h*/buf1[5] = pos_b - Stride; buf1Stride[5] = Stride; buf2[5] = pos_h; buf2Stride[5] = Stride;
10920 0 : /*r=b+h*/buf1[6] = pos_b; buf1Stride[6] = Stride; buf2[6] = pos_h; buf2Stride[6] = Stride;
10921 0 : /*p=h+b*/buf1[7] = pos_h - 1; buf1Stride[7] = Stride; buf2[7] = pos_b; buf2Stride[7] = Stride;
10922 :
10923 0 : break;
10924 :
10925 0 : default:
10926 0 : break;
10927 : }
10928 :
10929 0 : return;
10930 : }
10931 :
10932 : /***************************************************************
10933 : * in_loop_me_quarterpel_search_sblock
10934 : * perform the quarter-pel refinement for the whole super-block
10935 : ***************************************************************/
10936 0 : static void in_loop_me_quarterpel_search_sblock(
10937 : SsMeContext *context_ptr, //[IN/OUT] ME context Ptr, used to get/update ME results
10938 : uint8_t *pos_Full, //[IN]
10939 : uint32_t full_stride, //[IN]
10940 : uint8_t *pos_b, //[IN]
10941 : uint8_t *pos_h, //[IN]
10942 : uint8_t *pos_j, //[IN]
10943 : int16_t x_search_area_origin, //[IN] search area origin in the horizontal direction, used to point to reference samples
10944 : int16_t y_search_area_origin) //[IN] search area origin in the vertical direction, used to point to reference samples
10945 : {
10946 : uint32_t block_index;
10947 :
10948 : uint32_t block_shift_x;
10949 : uint32_t block_shift_y;
10950 :
10951 : uint32_t block_index_in_sb_buffer;
10952 :
10953 : //for each one of the 8 positions, we need to determine the 2 buffers to do averaging
10954 : uint8_t *buf1[8];
10955 : uint8_t *buf2[8];
10956 :
10957 : uint32_t buf1Stride[8];
10958 : uint32_t buf2Stride[8];
10959 :
10960 : int16_t x_mv, y_mv;
10961 : uint32_t nidx;
10962 :
10963 0 : uint32_t quad_index = 0;
10964 0 : uint32_t block_offset = 0;
10965 0 : uint32_t x_offset = 0;
10966 0 : uint32_t y_offset = 0;
10967 0 : uint32_t number_of_sb_quad = context_ptr->sb_size == BLOCK_128X128 ? 4 : 1;
10968 :
10969 : // 4x4 [256 partitions]
10970 :
10971 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
10972 0 : for (block_index = 0; block_index < 256; ++block_index) {
10973 0 : block_offset = (quad_index * 256);
10974 0 : x_offset = (quad_index & 0x01) << 6;
10975 0 : y_offset = (quad_index >> 1) << 6;
10976 0 : nidx = tab4x4[block_index] + block_offset;
10977 0 : block_shift_x = ((block_index & 0xf) << 2) + x_offset;
10978 0 : block_shift_y = ((block_index >> 4) << 2) + y_offset;
10979 :
10980 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
10981 :
10982 0 : x_mv = _MVXT(context_ptr->p_best_mv4x4[nidx]);
10983 0 : y_mv = _MVYT(context_ptr->p_best_mv4x4[nidx]);
10984 :
10985 0 : set_quarterpel_refinement_inputs_on_the_fly_block(
10986 : pos_Full,
10987 : full_stride,
10988 : pos_b,
10989 : pos_h,
10990 : pos_j,
10991 : context_ptr->interpolated_stride,
10992 : x_mv,
10993 : y_mv,
10994 : buf1, buf1Stride,
10995 : buf2, buf2Stride);
10996 :
10997 0 : buf1[0] = buf1[0] + block_shift_x + block_shift_y * buf1Stride[0]; buf2[0] = buf2[0] + block_shift_x + block_shift_y * buf2Stride[0];
10998 0 : buf1[1] = buf1[1] + block_shift_x + block_shift_y * buf1Stride[1]; buf2[1] = buf2[1] + block_shift_x + block_shift_y * buf2Stride[1];
10999 0 : buf1[2] = buf1[2] + block_shift_x + block_shift_y * buf1Stride[2]; buf2[2] = buf2[2] + block_shift_x + block_shift_y * buf2Stride[2];
11000 0 : buf1[3] = buf1[3] + block_shift_x + block_shift_y * buf1Stride[3]; buf2[3] = buf2[3] + block_shift_x + block_shift_y * buf2Stride[3];
11001 0 : buf1[4] = buf1[4] + block_shift_x + block_shift_y * buf1Stride[4]; buf2[4] = buf2[4] + block_shift_x + block_shift_y * buf2Stride[4];
11002 0 : buf1[5] = buf1[5] + block_shift_x + block_shift_y * buf1Stride[5]; buf2[5] = buf2[5] + block_shift_x + block_shift_y * buf2Stride[5];
11003 0 : buf1[6] = buf1[6] + block_shift_x + block_shift_y * buf1Stride[6]; buf2[6] = buf2[6] + block_shift_x + block_shift_y * buf2Stride[6];
11004 0 : buf1[7] = buf1[7] + block_shift_x + block_shift_y * buf1Stride[7]; buf2[7] = buf2[7] + block_shift_x + block_shift_y * buf2Stride[7];
11005 :
11006 0 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11007 : context_ptr,
11008 : block_index_in_sb_buffer,
11009 : buf1, buf1Stride,
11010 : buf2, buf2Stride,
11011 : 4, 4,
11012 : x_search_area_origin,
11013 : y_search_area_origin,
11014 0 : &context_ptr->p_best_sad4x4[nidx],
11015 0 : &context_ptr->p_best_mv4x4[nidx],
11016 0 : context_ptr->psub_pel_direction4x4[nidx]);
11017 : }
11018 : }
11019 :
11020 : // 8x4 [128 8x4 blocks]
11021 :
11022 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
11023 0 : for (block_index = 0; block_index < 128; ++block_index) {
11024 0 : block_offset = (quad_index * 128);
11025 0 : x_offset = (quad_index & 0x01) << 6;
11026 0 : y_offset = (quad_index >> 1) << 6;
11027 0 : nidx = tab8x4[block_index] + block_offset;
11028 0 : block_shift_x = ((block_index & 0x07) << 3) + x_offset;
11029 0 : block_shift_y = ((block_index >> 3) << 2) + y_offset;
11030 :
11031 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
11032 :
11033 0 : x_mv = _MVXT(context_ptr->p_best_mv8x4[nidx]);
11034 0 : y_mv = _MVYT(context_ptr->p_best_mv8x4[nidx]);
11035 :
11036 0 : set_quarterpel_refinement_inputs_on_the_fly_block(
11037 : pos_Full,
11038 : full_stride,
11039 : pos_b,
11040 : pos_h,
11041 : pos_j,
11042 : context_ptr->interpolated_stride,
11043 : x_mv,
11044 : y_mv,
11045 : buf1, buf1Stride,
11046 : buf2, buf2Stride);
11047 :
11048 0 : buf1[0] = buf1[0] + block_shift_x + block_shift_y * buf1Stride[0]; buf2[0] = buf2[0] + block_shift_x + block_shift_y * buf2Stride[0];
11049 0 : buf1[1] = buf1[1] + block_shift_x + block_shift_y * buf1Stride[1]; buf2[1] = buf2[1] + block_shift_x + block_shift_y * buf2Stride[1];
11050 0 : buf1[2] = buf1[2] + block_shift_x + block_shift_y * buf1Stride[2]; buf2[2] = buf2[2] + block_shift_x + block_shift_y * buf2Stride[2];
11051 0 : buf1[3] = buf1[3] + block_shift_x + block_shift_y * buf1Stride[3]; buf2[3] = buf2[3] + block_shift_x + block_shift_y * buf2Stride[3];
11052 0 : buf1[4] = buf1[4] + block_shift_x + block_shift_y * buf1Stride[4]; buf2[4] = buf2[4] + block_shift_x + block_shift_y * buf2Stride[4];
11053 0 : buf1[5] = buf1[5] + block_shift_x + block_shift_y * buf1Stride[5]; buf2[5] = buf2[5] + block_shift_x + block_shift_y * buf2Stride[5];
11054 0 : buf1[6] = buf1[6] + block_shift_x + block_shift_y * buf1Stride[6]; buf2[6] = buf2[6] + block_shift_x + block_shift_y * buf2Stride[6];
11055 0 : buf1[7] = buf1[7] + block_shift_x + block_shift_y * buf1Stride[7]; buf2[7] = buf2[7] + block_shift_x + block_shift_y * buf2Stride[7];
11056 :
11057 0 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11058 : context_ptr,
11059 : block_index_in_sb_buffer,
11060 : buf1, buf1Stride,
11061 : buf2, buf2Stride,
11062 : 8, 4,
11063 : x_search_area_origin,
11064 : y_search_area_origin,
11065 0 : &context_ptr->p_best_sad8x4[nidx],
11066 0 : &context_ptr->p_best_mv8x4[nidx],
11067 0 : context_ptr->psub_pel_direction8x4[nidx]);
11068 : }
11069 : }
11070 :
11071 : // 4x8 [128 4x8 blocks]
11072 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
11073 0 : for (block_index = 0; block_index < 128; ++block_index) {
11074 0 : block_offset = (quad_index * 128);
11075 0 : x_offset = (quad_index & 0x01) << 6;
11076 0 : y_offset = (quad_index >> 1) << 6;
11077 0 : nidx = tab4x8[block_index] + block_offset;
11078 0 : block_shift_x = ((block_index & 0xf) << 2) + x_offset;
11079 0 : block_shift_y = ((block_index >> 4) << 3) + y_offset;
11080 :
11081 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
11082 :
11083 0 : x_mv = _MVXT(context_ptr->p_best_mv4x8[nidx]);
11084 0 : y_mv = _MVYT(context_ptr->p_best_mv4x8[nidx]);
11085 :
11086 0 : set_quarterpel_refinement_inputs_on_the_fly_block(
11087 : pos_Full,
11088 : full_stride,
11089 : pos_b,
11090 : pos_h,
11091 : pos_j,
11092 : context_ptr->interpolated_stride,
11093 : x_mv,
11094 : y_mv,
11095 : buf1, buf1Stride,
11096 : buf2, buf2Stride);
11097 :
11098 0 : buf1[0] = buf1[0] + block_shift_x + block_shift_y * buf1Stride[0]; buf2[0] = buf2[0] + block_shift_x + block_shift_y * buf2Stride[0];
11099 0 : buf1[1] = buf1[1] + block_shift_x + block_shift_y * buf1Stride[1]; buf2[1] = buf2[1] + block_shift_x + block_shift_y * buf2Stride[1];
11100 0 : buf1[2] = buf1[2] + block_shift_x + block_shift_y * buf1Stride[2]; buf2[2] = buf2[2] + block_shift_x + block_shift_y * buf2Stride[2];
11101 0 : buf1[3] = buf1[3] + block_shift_x + block_shift_y * buf1Stride[3]; buf2[3] = buf2[3] + block_shift_x + block_shift_y * buf2Stride[3];
11102 0 : buf1[4] = buf1[4] + block_shift_x + block_shift_y * buf1Stride[4]; buf2[4] = buf2[4] + block_shift_x + block_shift_y * buf2Stride[4];
11103 0 : buf1[5] = buf1[5] + block_shift_x + block_shift_y * buf1Stride[5]; buf2[5] = buf2[5] + block_shift_x + block_shift_y * buf2Stride[5];
11104 0 : buf1[6] = buf1[6] + block_shift_x + block_shift_y * buf1Stride[6]; buf2[6] = buf2[6] + block_shift_x + block_shift_y * buf2Stride[6];
11105 0 : buf1[7] = buf1[7] + block_shift_x + block_shift_y * buf1Stride[7]; buf2[7] = buf2[7] + block_shift_x + block_shift_y * buf2Stride[7];
11106 :
11107 0 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11108 : context_ptr,
11109 : block_index_in_sb_buffer,
11110 : buf1, buf1Stride,
11111 : buf2, buf2Stride,
11112 : 4, 8,
11113 : x_search_area_origin,
11114 : y_search_area_origin,
11115 0 : &context_ptr->p_best_sad4x8[nidx],
11116 0 : &context_ptr->p_best_mv4x8[nidx],
11117 0 : context_ptr->psub_pel_direction4x8[nidx]);
11118 : }
11119 : }
11120 :
11121 : // 8x8 [64 8x8 blocks]
11122 :
11123 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
11124 0 : for (block_index = 0; block_index < 64; ++block_index) {
11125 0 : block_offset = (quad_index * 64);
11126 0 : x_offset = (quad_index & 0x01) << 6;
11127 0 : y_offset = (quad_index >> 1) << 6;
11128 0 : nidx = tab8x8[block_index] + block_offset;
11129 0 : block_shift_x = ((block_index & 0x07) << 3) + x_offset;
11130 0 : block_shift_y = ((block_index >> 3) << 3) + y_offset;
11131 :
11132 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
11133 :
11134 0 : x_mv = _MVXT(context_ptr->p_best_mv8x8[nidx]);
11135 0 : y_mv = _MVYT(context_ptr->p_best_mv8x8[nidx]);
11136 :
11137 0 : set_quarterpel_refinement_inputs_on_the_fly_block(
11138 : pos_Full,
11139 : full_stride,
11140 : pos_b,
11141 : pos_h,
11142 : pos_j,
11143 : context_ptr->interpolated_stride,
11144 : x_mv,
11145 : y_mv,
11146 : buf1, buf1Stride,
11147 : buf2, buf2Stride);
11148 :
11149 0 : buf1[0] = buf1[0] + block_shift_x + block_shift_y * buf1Stride[0]; buf2[0] = buf2[0] + block_shift_x + block_shift_y * buf2Stride[0];
11150 0 : buf1[1] = buf1[1] + block_shift_x + block_shift_y * buf1Stride[1]; buf2[1] = buf2[1] + block_shift_x + block_shift_y * buf2Stride[1];
11151 0 : buf1[2] = buf1[2] + block_shift_x + block_shift_y * buf1Stride[2]; buf2[2] = buf2[2] + block_shift_x + block_shift_y * buf2Stride[2];
11152 0 : buf1[3] = buf1[3] + block_shift_x + block_shift_y * buf1Stride[3]; buf2[3] = buf2[3] + block_shift_x + block_shift_y * buf2Stride[3];
11153 0 : buf1[4] = buf1[4] + block_shift_x + block_shift_y * buf1Stride[4]; buf2[4] = buf2[4] + block_shift_x + block_shift_y * buf2Stride[4];
11154 0 : buf1[5] = buf1[5] + block_shift_x + block_shift_y * buf1Stride[5]; buf2[5] = buf2[5] + block_shift_x + block_shift_y * buf2Stride[5];
11155 0 : buf1[6] = buf1[6] + block_shift_x + block_shift_y * buf1Stride[6]; buf2[6] = buf2[6] + block_shift_x + block_shift_y * buf2Stride[6];
11156 0 : buf1[7] = buf1[7] + block_shift_x + block_shift_y * buf1Stride[7]; buf2[7] = buf2[7] + block_shift_x + block_shift_y * buf2Stride[7];
11157 :
11158 0 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11159 : context_ptr,
11160 : block_index_in_sb_buffer,
11161 : buf1, buf1Stride,
11162 : buf2, buf2Stride,
11163 : 8, 8,
11164 : x_search_area_origin,
11165 : y_search_area_origin,
11166 0 : &context_ptr->p_best_sad8x8[nidx],
11167 0 : &context_ptr->p_best_mv8x8[nidx],
11168 0 : context_ptr->psub_pel_direction8x8[nidx]);
11169 : }
11170 : }
11171 :
11172 : // 16x8 [32 partitions]
11173 :
11174 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
11175 0 : for (block_index = 0; block_index < 32; ++block_index) {
11176 0 : block_offset = (quad_index * 32);
11177 0 : x_offset = (quad_index & 0x01) << 6;
11178 0 : y_offset = (quad_index >> 1) << 6;
11179 0 : nidx = tab16x8[block_index] + block_offset;
11180 0 : block_shift_x = ((block_index & 0x03) << 4) + x_offset;
11181 0 : block_shift_y = ((block_index >> 2) << 3) + y_offset;
11182 :
11183 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
11184 :
11185 0 : x_mv = _MVXT(context_ptr->p_best_mv16x8[nidx]);
11186 0 : y_mv = _MVYT(context_ptr->p_best_mv16x8[nidx]);
11187 :
11188 0 : set_quarterpel_refinement_inputs_on_the_fly_block(
11189 : pos_Full,
11190 : full_stride,
11191 : pos_b,
11192 : pos_h,
11193 : pos_j,
11194 : context_ptr->interpolated_stride,
11195 : x_mv,
11196 : y_mv,
11197 : buf1, buf1Stride,
11198 : buf2, buf2Stride);
11199 :
11200 0 : buf1[0] = buf1[0] + block_shift_x + block_shift_y * buf1Stride[0]; buf2[0] = buf2[0] + block_shift_x + block_shift_y * buf2Stride[0];
11201 0 : buf1[1] = buf1[1] + block_shift_x + block_shift_y * buf1Stride[1]; buf2[1] = buf2[1] + block_shift_x + block_shift_y * buf2Stride[1];
11202 0 : buf1[2] = buf1[2] + block_shift_x + block_shift_y * buf1Stride[2]; buf2[2] = buf2[2] + block_shift_x + block_shift_y * buf2Stride[2];
11203 0 : buf1[3] = buf1[3] + block_shift_x + block_shift_y * buf1Stride[3]; buf2[3] = buf2[3] + block_shift_x + block_shift_y * buf2Stride[3];
11204 0 : buf1[4] = buf1[4] + block_shift_x + block_shift_y * buf1Stride[4]; buf2[4] = buf2[4] + block_shift_x + block_shift_y * buf2Stride[4];
11205 0 : buf1[5] = buf1[5] + block_shift_x + block_shift_y * buf1Stride[5]; buf2[5] = buf2[5] + block_shift_x + block_shift_y * buf2Stride[5];
11206 0 : buf1[6] = buf1[6] + block_shift_x + block_shift_y * buf1Stride[6]; buf2[6] = buf2[6] + block_shift_x + block_shift_y * buf2Stride[6];
11207 0 : buf1[7] = buf1[7] + block_shift_x + block_shift_y * buf1Stride[7]; buf2[7] = buf2[7] + block_shift_x + block_shift_y * buf2Stride[7];
11208 :
11209 0 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11210 : context_ptr,
11211 : block_index_in_sb_buffer,
11212 : buf1, buf1Stride,
11213 : buf2, buf2Stride,
11214 : 16, 8,
11215 : x_search_area_origin,
11216 : y_search_area_origin,
11217 0 : &context_ptr->p_best_sad16x8[nidx],
11218 0 : &context_ptr->p_best_mv16x8[nidx],
11219 0 : context_ptr->psub_pel_direction16x8[nidx]);
11220 : }
11221 : }
11222 :
11223 : // 8x16 [32 partitions]
11224 :
11225 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
11226 0 : for (block_index = 0; block_index < 32; ++block_index) {
11227 0 : block_offset = (quad_index * 32);
11228 0 : x_offset = (quad_index & 0x01) << 6;
11229 0 : y_offset = (quad_index >> 1) << 6;
11230 0 : nidx = tab8x16[block_index] + block_offset;
11231 0 : block_shift_x = ((block_index & 0x07) << 3) + x_offset;
11232 0 : block_shift_y = ((block_index >> 3) << 4) + y_offset;
11233 :
11234 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
11235 :
11236 0 : x_mv = _MVXT(context_ptr->p_best_mv8x16[nidx]);
11237 0 : y_mv = _MVYT(context_ptr->p_best_mv8x16[nidx]);
11238 :
11239 0 : set_quarterpel_refinement_inputs_on_the_fly_block(
11240 : pos_Full,
11241 : full_stride,
11242 : pos_b,
11243 : pos_h,
11244 : pos_j,
11245 : context_ptr->interpolated_stride,
11246 : x_mv,
11247 : y_mv,
11248 : buf1, buf1Stride,
11249 : buf2, buf2Stride);
11250 :
11251 0 : buf1[0] = buf1[0] + block_shift_x + block_shift_y * buf1Stride[0]; buf2[0] = buf2[0] + block_shift_x + block_shift_y * buf2Stride[0];
11252 0 : buf1[1] = buf1[1] + block_shift_x + block_shift_y * buf1Stride[1]; buf2[1] = buf2[1] + block_shift_x + block_shift_y * buf2Stride[1];
11253 0 : buf1[2] = buf1[2] + block_shift_x + block_shift_y * buf1Stride[2]; buf2[2] = buf2[2] + block_shift_x + block_shift_y * buf2Stride[2];
11254 0 : buf1[3] = buf1[3] + block_shift_x + block_shift_y * buf1Stride[3]; buf2[3] = buf2[3] + block_shift_x + block_shift_y * buf2Stride[3];
11255 0 : buf1[4] = buf1[4] + block_shift_x + block_shift_y * buf1Stride[4]; buf2[4] = buf2[4] + block_shift_x + block_shift_y * buf2Stride[4];
11256 0 : buf1[5] = buf1[5] + block_shift_x + block_shift_y * buf1Stride[5]; buf2[5] = buf2[5] + block_shift_x + block_shift_y * buf2Stride[5];
11257 0 : buf1[6] = buf1[6] + block_shift_x + block_shift_y * buf1Stride[6]; buf2[6] = buf2[6] + block_shift_x + block_shift_y * buf2Stride[6];
11258 0 : buf1[7] = buf1[7] + block_shift_x + block_shift_y * buf1Stride[7]; buf2[7] = buf2[7] + block_shift_x + block_shift_y * buf2Stride[7];
11259 :
11260 0 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11261 : context_ptr,
11262 : block_index_in_sb_buffer,
11263 : buf1, buf1Stride,
11264 : buf2, buf2Stride,
11265 : 8, 16,
11266 : x_search_area_origin,
11267 : y_search_area_origin,
11268 0 : &context_ptr->p_best_sad8x16[nidx],
11269 0 : &context_ptr->p_best_mv8x16[nidx],
11270 0 : context_ptr->psub_pel_direction8x16[nidx]);
11271 : }
11272 : }
11273 :
11274 : // 32x8 [16 partitions]
11275 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
11276 0 : for (block_index = 0; block_index < 16; ++block_index) {
11277 0 : block_offset = (quad_index * 16);
11278 0 : x_offset = (quad_index & 0x01) << 6;
11279 0 : y_offset = (quad_index >> 1) << 6;
11280 0 : nidx = tab32x8[block_index] + block_offset;
11281 0 : block_shift_x = ((block_index & 0x01) << 5) + x_offset;
11282 0 : block_shift_y = ((block_index >> 1) << 3) + y_offset;
11283 :
11284 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
11285 :
11286 0 : x_mv = _MVXT(context_ptr->p_best_mv32x8[nidx]);
11287 0 : y_mv = _MVYT(context_ptr->p_best_mv32x8[nidx]);
11288 :
11289 0 : set_quarterpel_refinement_inputs_on_the_fly_block(
11290 : pos_Full,
11291 : full_stride,
11292 : pos_b,
11293 : pos_h,
11294 : pos_j,
11295 : context_ptr->interpolated_stride,
11296 : x_mv,
11297 : y_mv,
11298 : buf1, buf1Stride,
11299 : buf2, buf2Stride);
11300 :
11301 0 : buf1[0] = buf1[0] + block_shift_x + block_shift_y * buf1Stride[0]; buf2[0] = buf2[0] + block_shift_x + block_shift_y * buf2Stride[0];
11302 0 : buf1[1] = buf1[1] + block_shift_x + block_shift_y * buf1Stride[1]; buf2[1] = buf2[1] + block_shift_x + block_shift_y * buf2Stride[1];
11303 0 : buf1[2] = buf1[2] + block_shift_x + block_shift_y * buf1Stride[2]; buf2[2] = buf2[2] + block_shift_x + block_shift_y * buf2Stride[2];
11304 0 : buf1[3] = buf1[3] + block_shift_x + block_shift_y * buf1Stride[3]; buf2[3] = buf2[3] + block_shift_x + block_shift_y * buf2Stride[3];
11305 0 : buf1[4] = buf1[4] + block_shift_x + block_shift_y * buf1Stride[4]; buf2[4] = buf2[4] + block_shift_x + block_shift_y * buf2Stride[4];
11306 0 : buf1[5] = buf1[5] + block_shift_x + block_shift_y * buf1Stride[5]; buf2[5] = buf2[5] + block_shift_x + block_shift_y * buf2Stride[5];
11307 0 : buf1[6] = buf1[6] + block_shift_x + block_shift_y * buf1Stride[6]; buf2[6] = buf2[6] + block_shift_x + block_shift_y * buf2Stride[6];
11308 0 : buf1[7] = buf1[7] + block_shift_x + block_shift_y * buf1Stride[7]; buf2[7] = buf2[7] + block_shift_x + block_shift_y * buf2Stride[7];
11309 :
11310 0 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11311 : context_ptr,
11312 : block_index_in_sb_buffer,
11313 : buf1, buf1Stride,
11314 : buf2, buf2Stride,
11315 : 32, 8,
11316 : x_search_area_origin,
11317 : y_search_area_origin,
11318 0 : &context_ptr->p_best_sad32x8[nidx],
11319 0 : &context_ptr->p_best_mv32x8[nidx],
11320 0 : context_ptr->psub_pel_direction32x8[nidx]);
11321 : }
11322 : }
11323 :
11324 : // 8x32 [16 partitions]
11325 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
11326 0 : for (block_index = 0; block_index < 16; ++block_index) {
11327 0 : block_offset = (quad_index * 16);
11328 0 : x_offset = (quad_index & 0x01) << 6;
11329 0 : y_offset = (quad_index >> 1) << 6;
11330 0 : nidx = tab8x32[block_index] + block_offset;
11331 0 : block_shift_x = ((block_index & 0x07) << 3) + x_offset;
11332 0 : block_shift_y = ((block_index >> 3) << 5) + y_offset;
11333 :
11334 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
11335 :
11336 0 : x_mv = _MVXT(context_ptr->p_best_mv8x32[nidx]);
11337 0 : y_mv = _MVYT(context_ptr->p_best_mv8x32[nidx]);
11338 :
11339 0 : set_quarterpel_refinement_inputs_on_the_fly_block(
11340 : pos_Full,
11341 : full_stride,
11342 : pos_b,
11343 : pos_h,
11344 : pos_j,
11345 : context_ptr->interpolated_stride,
11346 : x_mv,
11347 : y_mv,
11348 : buf1, buf1Stride,
11349 : buf2, buf2Stride);
11350 :
11351 0 : buf1[0] = buf1[0] + block_shift_x + block_shift_y * buf1Stride[0]; buf2[0] = buf2[0] + block_shift_x + block_shift_y * buf2Stride[0];
11352 0 : buf1[1] = buf1[1] + block_shift_x + block_shift_y * buf1Stride[1]; buf2[1] = buf2[1] + block_shift_x + block_shift_y * buf2Stride[1];
11353 0 : buf1[2] = buf1[2] + block_shift_x + block_shift_y * buf1Stride[2]; buf2[2] = buf2[2] + block_shift_x + block_shift_y * buf2Stride[2];
11354 0 : buf1[3] = buf1[3] + block_shift_x + block_shift_y * buf1Stride[3]; buf2[3] = buf2[3] + block_shift_x + block_shift_y * buf2Stride[3];
11355 0 : buf1[4] = buf1[4] + block_shift_x + block_shift_y * buf1Stride[4]; buf2[4] = buf2[4] + block_shift_x + block_shift_y * buf2Stride[4];
11356 0 : buf1[5] = buf1[5] + block_shift_x + block_shift_y * buf1Stride[5]; buf2[5] = buf2[5] + block_shift_x + block_shift_y * buf2Stride[5];
11357 0 : buf1[6] = buf1[6] + block_shift_x + block_shift_y * buf1Stride[6]; buf2[6] = buf2[6] + block_shift_x + block_shift_y * buf2Stride[6];
11358 0 : buf1[7] = buf1[7] + block_shift_x + block_shift_y * buf1Stride[7]; buf2[7] = buf2[7] + block_shift_x + block_shift_y * buf2Stride[7];
11359 :
11360 0 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11361 : context_ptr,
11362 : block_index_in_sb_buffer,
11363 : buf1, buf1Stride,
11364 : buf2, buf2Stride,
11365 : 8, 32,
11366 : x_search_area_origin,
11367 : y_search_area_origin,
11368 0 : &context_ptr->p_best_sad8x32[nidx],
11369 0 : &context_ptr->p_best_mv8x32[nidx],
11370 0 : context_ptr->psub_pel_direction8x32[nidx]);
11371 : }
11372 : }
11373 :
11374 : // 16x16 [16 partitions]
11375 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
11376 0 : for (block_index = 0; block_index < 16; ++block_index) {
11377 0 : block_offset = (quad_index * 16);
11378 0 : x_offset = (quad_index & 0x01) << 6;
11379 0 : y_offset = (quad_index >> 1) << 6;
11380 0 : nidx = tab16x16[block_index] + block_offset;
11381 0 : block_shift_x = ((block_index & 0x03) << 4) + x_offset;
11382 0 : block_shift_y = ((block_index >> 2) << 4) + y_offset;
11383 :
11384 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
11385 :
11386 0 : x_mv = _MVXT(context_ptr->p_best_mv16x16[nidx]);
11387 0 : y_mv = _MVYT(context_ptr->p_best_mv16x16[nidx]);
11388 :
11389 0 : set_quarterpel_refinement_inputs_on_the_fly_block(
11390 : pos_Full,
11391 : full_stride,
11392 : pos_b,
11393 : pos_h,
11394 : pos_j,
11395 : context_ptr->interpolated_stride,
11396 : x_mv,
11397 : y_mv,
11398 : buf1, buf1Stride,
11399 : buf2, buf2Stride);
11400 :
11401 0 : buf1[0] = buf1[0] + block_shift_x + block_shift_y * buf1Stride[0]; buf2[0] = buf2[0] + block_shift_x + block_shift_y * buf2Stride[0];
11402 0 : buf1[1] = buf1[1] + block_shift_x + block_shift_y * buf1Stride[1]; buf2[1] = buf2[1] + block_shift_x + block_shift_y * buf2Stride[1];
11403 0 : buf1[2] = buf1[2] + block_shift_x + block_shift_y * buf1Stride[2]; buf2[2] = buf2[2] + block_shift_x + block_shift_y * buf2Stride[2];
11404 0 : buf1[3] = buf1[3] + block_shift_x + block_shift_y * buf1Stride[3]; buf2[3] = buf2[3] + block_shift_x + block_shift_y * buf2Stride[3];
11405 0 : buf1[4] = buf1[4] + block_shift_x + block_shift_y * buf1Stride[4]; buf2[4] = buf2[4] + block_shift_x + block_shift_y * buf2Stride[4];
11406 0 : buf1[5] = buf1[5] + block_shift_x + block_shift_y * buf1Stride[5]; buf2[5] = buf2[5] + block_shift_x + block_shift_y * buf2Stride[5];
11407 0 : buf1[6] = buf1[6] + block_shift_x + block_shift_y * buf1Stride[6]; buf2[6] = buf2[6] + block_shift_x + block_shift_y * buf2Stride[6];
11408 0 : buf1[7] = buf1[7] + block_shift_x + block_shift_y * buf1Stride[7]; buf2[7] = buf2[7] + block_shift_x + block_shift_y * buf2Stride[7];
11409 :
11410 0 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11411 : context_ptr,
11412 : block_index_in_sb_buffer,
11413 : buf1, buf1Stride,
11414 : buf2, buf2Stride,
11415 : 16, 16,
11416 : x_search_area_origin,
11417 : y_search_area_origin,
11418 0 : &context_ptr->p_best_sad16x16[nidx],
11419 0 : &context_ptr->p_best_mv16x16[nidx],
11420 0 : context_ptr->psub_pel_direction16x16[nidx]);
11421 : }
11422 : }
11423 :
11424 : // 32x16 [8 partitions]
11425 :
11426 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
11427 0 : for (block_index = 0; block_index < 8; ++block_index) {
11428 0 : block_offset = (quad_index * 8);
11429 0 : x_offset = (quad_index & 0x01) << 6;
11430 0 : y_offset = (quad_index >> 1) << 6;
11431 0 : nidx = tab32x16[block_index] + block_offset;
11432 0 : block_shift_x = ((block_index & 0x01) << 5) + x_offset;
11433 0 : block_shift_y = ((block_index >> 1) << 4) + y_offset;
11434 :
11435 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
11436 :
11437 0 : x_mv = _MVXT(context_ptr->p_best_mv32x16[nidx]);
11438 0 : y_mv = _MVYT(context_ptr->p_best_mv32x16[nidx]);
11439 :
11440 0 : set_quarterpel_refinement_inputs_on_the_fly_block(
11441 : pos_Full,
11442 : full_stride,
11443 : pos_b,
11444 : pos_h,
11445 : pos_j,
11446 : context_ptr->interpolated_stride,
11447 : x_mv,
11448 : y_mv,
11449 : buf1, buf1Stride,
11450 : buf2, buf2Stride);
11451 :
11452 0 : buf1[0] = buf1[0] + block_shift_x + block_shift_y * buf1Stride[0]; buf2[0] = buf2[0] + block_shift_x + block_shift_y * buf2Stride[0];
11453 0 : buf1[1] = buf1[1] + block_shift_x + block_shift_y * buf1Stride[1]; buf2[1] = buf2[1] + block_shift_x + block_shift_y * buf2Stride[1];
11454 0 : buf1[2] = buf1[2] + block_shift_x + block_shift_y * buf1Stride[2]; buf2[2] = buf2[2] + block_shift_x + block_shift_y * buf2Stride[2];
11455 0 : buf1[3] = buf1[3] + block_shift_x + block_shift_y * buf1Stride[3]; buf2[3] = buf2[3] + block_shift_x + block_shift_y * buf2Stride[3];
11456 0 : buf1[4] = buf1[4] + block_shift_x + block_shift_y * buf1Stride[4]; buf2[4] = buf2[4] + block_shift_x + block_shift_y * buf2Stride[4];
11457 0 : buf1[5] = buf1[5] + block_shift_x + block_shift_y * buf1Stride[5]; buf2[5] = buf2[5] + block_shift_x + block_shift_y * buf2Stride[5];
11458 0 : buf1[6] = buf1[6] + block_shift_x + block_shift_y * buf1Stride[6]; buf2[6] = buf2[6] + block_shift_x + block_shift_y * buf2Stride[6];
11459 0 : buf1[7] = buf1[7] + block_shift_x + block_shift_y * buf1Stride[7]; buf2[7] = buf2[7] + block_shift_x + block_shift_y * buf2Stride[7];
11460 :
11461 0 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11462 : context_ptr,
11463 : block_index_in_sb_buffer,
11464 : buf1, buf1Stride,
11465 : buf2, buf2Stride,
11466 : 32, 16,
11467 : x_search_area_origin,
11468 : y_search_area_origin,
11469 0 : &context_ptr->p_best_sad32x16[nidx],
11470 0 : &context_ptr->p_best_mv32x16[nidx],
11471 0 : context_ptr->psub_pel_direction32x16[nidx]);
11472 : }
11473 : }
11474 :
11475 : // 16x32 [8 partitions]
11476 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
11477 0 : for (block_index = 0; block_index < 8; ++block_index) {
11478 0 : block_offset = (quad_index * 8);
11479 0 : x_offset = (quad_index & 0x01) << 6;
11480 0 : y_offset = (quad_index >> 1) << 6;
11481 0 : nidx = tab16x32[block_index] + block_offset;
11482 0 : block_shift_x = ((block_index & 0x03) << 4) + x_offset;
11483 0 : block_shift_y = ((block_index >> 2) << 5) + y_offset;
11484 :
11485 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
11486 :
11487 0 : x_mv = _MVXT(context_ptr->p_best_mv16x32[nidx]);
11488 0 : y_mv = _MVYT(context_ptr->p_best_mv16x32[nidx]);
11489 :
11490 0 : set_quarterpel_refinement_inputs_on_the_fly_block(
11491 : pos_Full,
11492 : full_stride,
11493 : pos_b,
11494 : pos_h,
11495 : pos_j,
11496 : context_ptr->interpolated_stride,
11497 : x_mv,
11498 : y_mv,
11499 : buf1, buf1Stride,
11500 : buf2, buf2Stride);
11501 :
11502 0 : buf1[0] = buf1[0] + block_shift_x + block_shift_y * buf1Stride[0]; buf2[0] = buf2[0] + block_shift_x + block_shift_y * buf2Stride[0];
11503 0 : buf1[1] = buf1[1] + block_shift_x + block_shift_y * buf1Stride[1]; buf2[1] = buf2[1] + block_shift_x + block_shift_y * buf2Stride[1];
11504 0 : buf1[2] = buf1[2] + block_shift_x + block_shift_y * buf1Stride[2]; buf2[2] = buf2[2] + block_shift_x + block_shift_y * buf2Stride[2];
11505 0 : buf1[3] = buf1[3] + block_shift_x + block_shift_y * buf1Stride[3]; buf2[3] = buf2[3] + block_shift_x + block_shift_y * buf2Stride[3];
11506 0 : buf1[4] = buf1[4] + block_shift_x + block_shift_y * buf1Stride[4]; buf2[4] = buf2[4] + block_shift_x + block_shift_y * buf2Stride[4];
11507 0 : buf1[5] = buf1[5] + block_shift_x + block_shift_y * buf1Stride[5]; buf2[5] = buf2[5] + block_shift_x + block_shift_y * buf2Stride[5];
11508 0 : buf1[6] = buf1[6] + block_shift_x + block_shift_y * buf1Stride[6]; buf2[6] = buf2[6] + block_shift_x + block_shift_y * buf2Stride[6];
11509 0 : buf1[7] = buf1[7] + block_shift_x + block_shift_y * buf1Stride[7]; buf2[7] = buf2[7] + block_shift_x + block_shift_y * buf2Stride[7];
11510 :
11511 0 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11512 : context_ptr,
11513 : block_index_in_sb_buffer,
11514 : buf1, buf1Stride,
11515 : buf2, buf2Stride,
11516 : 16, 32,
11517 : x_search_area_origin,
11518 : y_search_area_origin,
11519 0 : &context_ptr->p_best_sad16x32[nidx],
11520 0 : &context_ptr->p_best_mv16x32[nidx],
11521 0 : context_ptr->psub_pel_direction16x32[nidx]);
11522 : }
11523 : }
11524 :
11525 : // 32x32 [4 partitions]
11526 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
11527 0 : for (block_index = 0; block_index < 4; ++block_index) {
11528 0 : block_offset = (quad_index * 4);
11529 0 : x_offset = (quad_index & 0x01) << 6;
11530 0 : y_offset = (quad_index >> 1) << 6;
11531 0 : nidx = tab32x32[block_index] + block_offset;
11532 0 : block_shift_x = ((block_index & 0x01) << 5) + x_offset;
11533 0 : block_shift_y = ((block_index >> 1)) + y_offset;
11534 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
11535 :
11536 0 : x_mv = _MVXT(context_ptr->p_best_mv32x32[nidx]);
11537 0 : y_mv = _MVYT(context_ptr->p_best_mv32x32[nidx]);
11538 :
11539 0 : set_quarterpel_refinement_inputs_on_the_fly_block(
11540 : pos_Full,
11541 : full_stride,
11542 : pos_b,
11543 : pos_h,
11544 : pos_j,
11545 : context_ptr->interpolated_stride,
11546 : x_mv,
11547 : y_mv,
11548 : buf1, buf1Stride,
11549 : buf2, buf2Stride);
11550 :
11551 0 : buf1[0] = buf1[0] + block_shift_x + block_shift_y * buf1Stride[0]; buf2[0] = buf2[0] + block_shift_x + block_shift_y * buf2Stride[0];
11552 0 : buf1[1] = buf1[1] + block_shift_x + block_shift_y * buf1Stride[1]; buf2[1] = buf2[1] + block_shift_x + block_shift_y * buf2Stride[1];
11553 0 : buf1[2] = buf1[2] + block_shift_x + block_shift_y * buf1Stride[2]; buf2[2] = buf2[2] + block_shift_x + block_shift_y * buf2Stride[2];
11554 0 : buf1[3] = buf1[3] + block_shift_x + block_shift_y * buf1Stride[3]; buf2[3] = buf2[3] + block_shift_x + block_shift_y * buf2Stride[3];
11555 0 : buf1[4] = buf1[4] + block_shift_x + block_shift_y * buf1Stride[4]; buf2[4] = buf2[4] + block_shift_x + block_shift_y * buf2Stride[4];
11556 0 : buf1[5] = buf1[5] + block_shift_x + block_shift_y * buf1Stride[5]; buf2[5] = buf2[5] + block_shift_x + block_shift_y * buf2Stride[5];
11557 0 : buf1[6] = buf1[6] + block_shift_x + block_shift_y * buf1Stride[6]; buf2[6] = buf2[6] + block_shift_x + block_shift_y * buf2Stride[6];
11558 0 : buf1[7] = buf1[7] + block_shift_x + block_shift_y * buf1Stride[7]; buf2[7] = buf2[7] + block_shift_x + block_shift_y * buf2Stride[7];
11559 :
11560 0 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11561 : context_ptr,
11562 : block_index_in_sb_buffer,
11563 : buf1, buf1Stride,
11564 : buf2, buf2Stride,
11565 : 32, 32,
11566 : x_search_area_origin,
11567 : y_search_area_origin,
11568 0 : &context_ptr->p_best_sad32x32[nidx],
11569 0 : &context_ptr->p_best_mv32x32[nidx],
11570 0 : context_ptr->psub_pel_direction32x32[nidx]);
11571 : }
11572 : }
11573 :
11574 : // 64x32 [2 partitions]
11575 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
11576 0 : for (block_index = 0; block_index < 2; ++block_index) {
11577 0 : block_offset = (quad_index * 2);
11578 0 : x_offset = (quad_index & 0x01) << 6;
11579 0 : y_offset = (quad_index >> 1) << 6;
11580 0 : nidx = tab64x32[block_index] + block_offset;
11581 0 : block_shift_x = x_offset;
11582 0 : block_shift_y = (block_index << 5) + y_offset;
11583 :
11584 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
11585 :
11586 0 : x_mv = _MVXT(context_ptr->p_best_mv64x32[nidx]);
11587 0 : y_mv = _MVYT(context_ptr->p_best_mv64x32[nidx]);
11588 :
11589 0 : set_quarterpel_refinement_inputs_on_the_fly_block(
11590 : pos_Full,
11591 : full_stride,
11592 : pos_b,
11593 : pos_h,
11594 : pos_j,
11595 : context_ptr->interpolated_stride,
11596 : x_mv,
11597 : y_mv,
11598 : buf1, buf1Stride,
11599 : buf2, buf2Stride);
11600 :
11601 0 : buf1[0] = buf1[0] + block_shift_x + block_shift_y * buf1Stride[0]; buf2[0] = buf2[0] + block_shift_x + block_shift_y * buf2Stride[0];
11602 0 : buf1[1] = buf1[1] + block_shift_x + block_shift_y * buf1Stride[1]; buf2[1] = buf2[1] + block_shift_x + block_shift_y * buf2Stride[1];
11603 0 : buf1[2] = buf1[2] + block_shift_x + block_shift_y * buf1Stride[2]; buf2[2] = buf2[2] + block_shift_x + block_shift_y * buf2Stride[2];
11604 0 : buf1[3] = buf1[3] + block_shift_x + block_shift_y * buf1Stride[3]; buf2[3] = buf2[3] + block_shift_x + block_shift_y * buf2Stride[3];
11605 0 : buf1[4] = buf1[4] + block_shift_x + block_shift_y * buf1Stride[4]; buf2[4] = buf2[4] + block_shift_x + block_shift_y * buf2Stride[4];
11606 0 : buf1[5] = buf1[5] + block_shift_x + block_shift_y * buf1Stride[5]; buf2[5] = buf2[5] + block_shift_x + block_shift_y * buf2Stride[5];
11607 0 : buf1[6] = buf1[6] + block_shift_x + block_shift_y * buf1Stride[6]; buf2[6] = buf2[6] + block_shift_x + block_shift_y * buf2Stride[6];
11608 0 : buf1[7] = buf1[7] + block_shift_x + block_shift_y * buf1Stride[7]; buf2[7] = buf2[7] + block_shift_x + block_shift_y * buf2Stride[7];
11609 :
11610 0 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11611 : context_ptr,
11612 : block_index_in_sb_buffer,
11613 : buf1, buf1Stride,
11614 : buf2, buf2Stride,
11615 : 64, 32,
11616 : x_search_area_origin,
11617 : y_search_area_origin,
11618 0 : &context_ptr->p_best_sad64x32[nidx],
11619 0 : &context_ptr->p_best_mv64x32[nidx],
11620 0 : context_ptr->psub_pel_direction64x32[nidx]);
11621 : }
11622 : }
11623 :
11624 : // 32x64 [2 partitions]
11625 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
11626 0 : for (block_index = 0; block_index < 2; ++block_index) {
11627 0 : block_offset = (quad_index * 2);
11628 0 : x_offset = (quad_index & 0x01) << 6;
11629 0 : y_offset = (quad_index >> 1) << 6;
11630 0 : nidx = tab32x64[block_index] + block_offset;
11631 0 : block_shift_x = (block_index << 5) + x_offset;
11632 0 : block_shift_y = y_offset;
11633 :
11634 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
11635 :
11636 0 : x_mv = _MVXT(context_ptr->p_best_mv32x64[nidx]);
11637 0 : y_mv = _MVYT(context_ptr->p_best_mv32x64[nidx]);
11638 :
11639 0 : set_quarterpel_refinement_inputs_on_the_fly_block(
11640 : pos_Full,
11641 : full_stride,
11642 : pos_b,
11643 : pos_h,
11644 : pos_j,
11645 : context_ptr->interpolated_stride,
11646 : x_mv,
11647 : y_mv,
11648 : buf1, buf1Stride,
11649 : buf2, buf2Stride);
11650 :
11651 0 : buf1[0] = buf1[0] + block_shift_x + block_shift_y * buf1Stride[0]; buf2[0] = buf2[0] + block_shift_x + block_shift_y * buf2Stride[0];
11652 0 : buf1[1] = buf1[1] + block_shift_x + block_shift_y * buf1Stride[1]; buf2[1] = buf2[1] + block_shift_x + block_shift_y * buf2Stride[1];
11653 0 : buf1[2] = buf1[2] + block_shift_x + block_shift_y * buf1Stride[2]; buf2[2] = buf2[2] + block_shift_x + block_shift_y * buf2Stride[2];
11654 0 : buf1[3] = buf1[3] + block_shift_x + block_shift_y * buf1Stride[3]; buf2[3] = buf2[3] + block_shift_x + block_shift_y * buf2Stride[3];
11655 0 : buf1[4] = buf1[4] + block_shift_x + block_shift_y * buf1Stride[4]; buf2[4] = buf2[4] + block_shift_x + block_shift_y * buf2Stride[4];
11656 0 : buf1[5] = buf1[5] + block_shift_x + block_shift_y * buf1Stride[5]; buf2[5] = buf2[5] + block_shift_x + block_shift_y * buf2Stride[5];
11657 0 : buf1[6] = buf1[6] + block_shift_x + block_shift_y * buf1Stride[6]; buf2[6] = buf2[6] + block_shift_x + block_shift_y * buf2Stride[6];
11658 0 : buf1[7] = buf1[7] + block_shift_x + block_shift_y * buf1Stride[7]; buf2[7] = buf2[7] + block_shift_x + block_shift_y * buf2Stride[7];
11659 :
11660 0 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11661 : context_ptr,
11662 : block_index_in_sb_buffer,
11663 : buf1, buf1Stride,
11664 : buf2, buf2Stride,
11665 : 32, 64,
11666 : x_search_area_origin,
11667 : y_search_area_origin,
11668 0 : &context_ptr->p_best_sad32x64[nidx],
11669 0 : &context_ptr->p_best_mv32x64[nidx],
11670 0 : context_ptr->psub_pel_direction32x64[nidx]);
11671 : }
11672 : }
11673 :
11674 : // 64x64 [1 partitions]
11675 0 : for (quad_index = 0; quad_index < number_of_sb_quad; quad_index++) {
11676 0 : block_index = 0;
11677 :
11678 0 : block_offset = quad_index;
11679 0 : x_offset = (quad_index & 0x01) << 6;
11680 0 : y_offset = (quad_index >> 1) << 6;
11681 0 : nidx = block_offset;
11682 0 : block_shift_x = x_offset;
11683 0 : block_shift_y = y_offset;
11684 :
11685 0 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
11686 :
11687 0 : x_mv = _MVXT(context_ptr->p_best_mv64x64[nidx]);
11688 0 : y_mv = _MVYT(context_ptr->p_best_mv64x64[nidx]);
11689 :
11690 0 : set_quarterpel_refinement_inputs_on_the_fly_block(
11691 : pos_Full,
11692 : full_stride,
11693 : pos_b,
11694 : pos_h,
11695 : pos_j,
11696 : context_ptr->interpolated_stride,
11697 : x_mv,
11698 : y_mv,
11699 : buf1, buf1Stride,
11700 : buf2, buf2Stride);
11701 :
11702 0 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11703 : context_ptr,
11704 : block_index_in_sb_buffer,
11705 : buf1, buf1Stride,
11706 : buf2, buf2Stride,
11707 : 64, 64,
11708 : x_search_area_origin,
11709 : y_search_area_origin,
11710 0 : &context_ptr->p_best_sad64x64[nidx],
11711 0 : &context_ptr->p_best_mv64x64[nidx],
11712 0 : context_ptr->psub_pel_direction64x64[nidx]);
11713 : }
11714 :
11715 : if (0) {
11716 : // 128x64 [2 partitions]
11717 : for (block_index = 0; block_index < 2; ++block_index) {
11718 : block_index = 0;
11719 :
11720 : block_shift_x = 0;
11721 : block_shift_y = block_index << 6;
11722 :
11723 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
11724 :
11725 : x_mv = _MVXT(context_ptr->p_best_mv128x64[block_index]);
11726 : y_mv = _MVYT(context_ptr->p_best_mv128x64[block_index]);
11727 :
11728 : set_quarterpel_refinement_inputs_on_the_fly_block(
11729 : pos_Full,
11730 : full_stride,
11731 : pos_b,
11732 : pos_h,
11733 : pos_j,
11734 : context_ptr->interpolated_stride,
11735 : x_mv,
11736 : y_mv,
11737 : buf1, buf1Stride,
11738 : buf2, buf2Stride);
11739 :
11740 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11741 : context_ptr,
11742 : block_index_in_sb_buffer,
11743 : buf1, buf1Stride,
11744 : buf2, buf2Stride,
11745 : 128, 64,
11746 : x_search_area_origin,
11747 : y_search_area_origin,
11748 : &context_ptr->p_best_sad128x64[block_index],
11749 : &context_ptr->p_best_mv128x64[block_index],
11750 : context_ptr->psub_pel_direction128x64[block_index]);
11751 : }
11752 : // 64x128 [2 partitions]
11753 : for (block_index = 0; block_index < 2; ++block_index) {
11754 : block_index = 0;
11755 :
11756 : block_shift_x = block_index << 6;
11757 : block_shift_y = 0;
11758 :
11759 : block_index_in_sb_buffer = block_shift_x + block_shift_y * context_ptr->sb_src_stride;
11760 :
11761 : x_mv = _MVXT(context_ptr->p_best_mv64x128[block_index]);
11762 : y_mv = _MVYT(context_ptr->p_best_mv64x128[block_index]);
11763 :
11764 : set_quarterpel_refinement_inputs_on_the_fly_block(
11765 : pos_Full,
11766 : full_stride,
11767 : pos_b,
11768 : pos_h,
11769 : pos_j,
11770 : context_ptr->interpolated_stride,
11771 : x_mv,
11772 : y_mv,
11773 : buf1, buf1Stride,
11774 : buf2, buf2Stride);
11775 :
11776 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11777 : context_ptr,
11778 : block_index_in_sb_buffer,
11779 : buf1, buf1Stride,
11780 : buf2, buf2Stride,
11781 : 64, 128,
11782 : x_search_area_origin,
11783 : y_search_area_origin,
11784 : &context_ptr->p_best_sad64x128[block_index],
11785 : &context_ptr->p_best_mv64x128[block_index],
11786 : context_ptr->psub_pel_direction64x128[block_index]);
11787 : }
11788 : // 128x128 [1 partitions]
11789 : block_index = 0;
11790 :
11791 : block_shift_x = 0;
11792 : block_shift_y = 0;
11793 :
11794 : block_index_in_sb_buffer = 0;
11795 :
11796 : x_mv = _MVXT(context_ptr->p_best_mv128x128[block_index]);
11797 : y_mv = _MVYT(context_ptr->p_best_mv128x128[block_index]);
11798 :
11799 : set_quarterpel_refinement_inputs_on_the_fly_block(
11800 : pos_Full,
11801 : full_stride,
11802 : pos_b,
11803 : pos_h,
11804 : pos_j,
11805 : context_ptr->interpolated_stride,
11806 : x_mv,
11807 : y_mv,
11808 : buf1, buf1Stride,
11809 : buf2, buf2Stride);
11810 :
11811 : in_loop_me_quarterpel_refinement_on_the_fly_block(
11812 : context_ptr,
11813 : block_index_in_sb_buffer,
11814 : buf1, buf1Stride,
11815 : buf2, buf2Stride,
11816 : 128, 128,
11817 : x_search_area_origin,
11818 : y_search_area_origin,
11819 : &context_ptr->p_best_sad128x128[block_index],
11820 : &context_ptr->p_best_mv128x128[block_index],
11821 : context_ptr->psub_pel_direction128x128);
11822 : }
11823 0 : return;
11824 : }
11825 :
11826 : #define MAX_SEARCH_POINT_WIDTH 128
11827 : #define MAX_SEARCH_POINT_HEIGHT 128
11828 :
11829 : #define MAX_TATAL_SEARCH_AREA_WIDTH (MAX_SB_SIZE + MAX_SEARCH_POINT_WIDTH + ME_FILTER_TAP)
11830 : #define MAX_TATAL_SEARCH_AREA_HEIGHT (MAX_SB_SIZE + MAX_SEARCH_POINT_HEIGHT + ME_FILTER_TAP)
11831 :
11832 : #define MAX_SEARCH_AREA_SIZE MAX_TATAL_SEARCH_AREA_WIDTH * MAX_TATAL_SEARCH_AREA_HEIGHT
11833 : /***************************************************************
11834 : * in_loop_motion_estimation_sblock
11835 : * perform the full-pel serach for the whole super-block
11836 : * on the reference reconstructed pictures
11837 : ***************************************************************/
11838 0 : EB_EXTERN EbErrorType in_loop_motion_estimation_sblock(
11839 : PictureControlSet *picture_control_set_ptr, // input parameter, Picture Control Set Ptr
11840 : uint32_t sb_origin_x, // input parameter, SB Origin X
11841 : uint32_t sb_origin_y, // input parameter, SB Origin X
11842 : int16_t x_mv_l0,
11843 : int16_t y_mv_l0,
11844 : int16_t x_mv_l1,
11845 : int16_t y_mv_l1,
11846 : SsMeContext *context_ptr) // input parameter, ME Context Ptr, used to store decimated/interpolated LCU/SR
11847 :
11848 : {
11849 0 : EbErrorType return_error = EB_ErrorNone;
11850 :
11851 0 : SequenceControlSet *sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
11852 :
11853 : int16_t xTopLeftSearchRegion;
11854 : int16_t yTopLeftSearchRegion;
11855 : uint32_t searchRegionIndex;
11856 0 : int16_t picture_width = (int16_t)((SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr)->seq_header.max_frame_width;
11857 0 : int16_t picture_height = (int16_t)((SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr)->seq_header.max_frame_height;
11858 :
11859 0 : int16_t padWidth = (int16_t)BLOCK_SIZE_64 - 1;
11860 0 : int16_t padHeight = (int16_t)BLOCK_SIZE_64 - 1;
11861 : int16_t search_area_width;
11862 : int16_t search_area_height;
11863 : int16_t x_search_area_origin;
11864 : int16_t y_search_area_origin;
11865 0 : int16_t origin_x = (int16_t)sb_origin_x;
11866 0 : int16_t origin_y = (int16_t)sb_origin_y;
11867 :
11868 0 : uint8_t refPicIndex = 0;
11869 : // Final ME Search Center
11870 0 : int16_t x_search_center = 0;
11871 0 : int16_t y_search_center = 0;
11872 :
11873 : uint32_t numOfListToSearch;
11874 : uint32_t listIndex;
11875 : EbPictureBufferDesc *refPicPtr;
11876 : EbReferenceObject *referenceObject;
11877 :
11878 0 : uint32_t number_of_sb_quad = sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128 ? 4 : 1;
11879 0 : context_ptr->sb_size = sequence_control_set_ptr->seq_header.sb_size;
11880 0 : context_ptr->sb_side = sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128 ? 128 : 64;
11881 :
11882 0 : const uint32_t start_idx_8x8 = 256 * number_of_sb_quad;
11883 0 : const uint32_t start_idx_16x16 = 320 * number_of_sb_quad;
11884 0 : const uint32_t start_idx_32x32 = 336 * number_of_sb_quad;
11885 0 : const uint32_t start_idx_64x64 = 340 * number_of_sb_quad;
11886 0 : const uint32_t start_idx_8x4 = 341 * number_of_sb_quad;
11887 0 : const uint32_t start_idx_4x8 = 469 * number_of_sb_quad;
11888 0 : const uint32_t start_idx_4x16 = 597 * number_of_sb_quad;
11889 0 : const uint32_t start_idx_16x4 = 661 * number_of_sb_quad;
11890 0 : const uint32_t start_idx_16x8 = 725 * number_of_sb_quad;
11891 0 : const uint32_t start_idx_8x16 = 757 * number_of_sb_quad;
11892 0 : const uint32_t start_idx_32x8 = 789 * number_of_sb_quad;
11893 0 : const uint32_t start_idx_8x32 = 805 * number_of_sb_quad;
11894 0 : const uint32_t start_idx_32x16 = 821 * number_of_sb_quad;
11895 0 : const uint32_t start_idx_16x32 = 829 * number_of_sb_quad;
11896 0 : const uint32_t start_idx_64x16 = 837 * number_of_sb_quad;
11897 0 : const uint32_t start_idx_16x64 = 841 * number_of_sb_quad;
11898 0 : const uint32_t start_idx_64x32 = 845 * number_of_sb_quad;
11899 0 : const uint32_t start_idx_32x64 = 847 * number_of_sb_quad;
11900 0 : const uint32_t start_idx_128x64 = 849 * number_of_sb_quad;
11901 :
11902 0 : context_ptr->fractional_search_method = SSD_SEARCH; // all in-loop
11903 :
11904 0 : numOfListToSearch = (picture_control_set_ptr->slice_type == P_SLICE) ? (uint32_t)REF_LIST_0 : (uint32_t)REF_LIST_1;
11905 :
11906 : // Uni-Prediction motion estimation loop
11907 : // List Loop
11908 0 : for (listIndex = REF_LIST_0; listIndex <= numOfListToSearch; ++listIndex) {
11909 0 : EbBool is16bit = (EbBool)(sequence_control_set_ptr->static_config.encoder_bit_depth > EB_8BIT);
11910 0 : referenceObject = (EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[listIndex][0]->object_ptr;
11911 0 : refPicPtr = is16bit ? (EbPictureBufferDesc*)referenceObject->reference_picture16bit : (EbPictureBufferDesc*)referenceObject->reference_picture;
11912 0 : search_area_width = (int16_t)MIN(context_ptr->search_area_width, 127);
11913 0 : search_area_height = (int16_t)MIN(context_ptr->search_area_height, 127);
11914 0 : x_search_center = listIndex == REF_LIST_0 ? x_mv_l0 : x_mv_l1;
11915 0 : y_search_center = listIndex == REF_LIST_0 ? y_mv_l0 : y_mv_l1;
11916 :
11917 0 : x_search_area_origin = x_search_center - (search_area_width >> 1);
11918 0 : y_search_area_origin = y_search_center - (search_area_height >> 1);
11919 :
11920 : // Correct the left edge of the Search Area if it is not on the reference Picture
11921 0 : x_search_area_origin = ((origin_x + x_search_area_origin) < -padWidth) ?
11922 0 : -padWidth - origin_x :
11923 : x_search_area_origin;
11924 :
11925 0 : search_area_width = ((origin_x + x_search_area_origin) < -padWidth) ?
11926 0 : search_area_width - (-padWidth - (origin_x + x_search_area_origin)) :
11927 : search_area_width;
11928 :
11929 : // Correct the right edge of the Search Area if its not on the reference Picture
11930 0 : x_search_area_origin = ((origin_x + x_search_area_origin) > picture_width - 1) ?
11931 0 : x_search_area_origin - ((origin_x + x_search_area_origin) - (picture_width - 1)) :
11932 : x_search_area_origin;
11933 :
11934 : // //check whether the needed search area is coverd by the reference picture and adjust its origin to satisfy the condition if not.
11935 0 : if (sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128) {
11936 0 : int32_t righ_sa_pos_x = refPicPtr->origin_x + origin_x + x_search_area_origin + search_area_width + (context_ptr->sb_side - 1) + (ME_FILTER_TAP >> 1);
11937 0 : int32_t righ_ref_pos_x = picture_width - 1 + (2 * refPicPtr->origin_x);
11938 :
11939 0 : x_search_area_origin = righ_sa_pos_x > righ_ref_pos_x ? x_search_area_origin - (righ_sa_pos_x - righ_ref_pos_x) : x_search_area_origin;
11940 :
11941 0 : int32_t bottom_sa_pos_x = refPicPtr->origin_y + origin_y + y_search_area_origin + search_area_height + (context_ptr->sb_side - 1) + (ME_FILTER_TAP >> 1);
11942 0 : int32_t bottom_ref_pos_x = picture_height - 1 + (2 * refPicPtr->origin_y);
11943 :
11944 0 : y_search_area_origin = bottom_sa_pos_x > bottom_ref_pos_x ? y_search_area_origin - (bottom_sa_pos_x - bottom_ref_pos_x) : y_search_area_origin;
11945 : }
11946 :
11947 0 : search_area_width = ((origin_x + x_search_area_origin + search_area_width) > picture_width) ?
11948 0 : MAX(1, search_area_width - ((origin_x + x_search_area_origin + search_area_width) - picture_width)) :
11949 : search_area_width;
11950 :
11951 : // Correct the top edge of the Search Area if it is not on the reference Picture
11952 0 : y_search_area_origin = ((origin_y + y_search_area_origin) < -padHeight) ?
11953 0 : -padHeight - origin_y :
11954 : y_search_area_origin;
11955 :
11956 0 : search_area_height = ((origin_y + y_search_area_origin) < -padHeight) ?
11957 0 : search_area_height - (-padHeight - (origin_y + y_search_area_origin)) :
11958 : search_area_height;
11959 :
11960 : // Correct the bottom edge of the Search Area if its not on the reference Picture
11961 0 : y_search_area_origin = ((origin_y + y_search_area_origin) > picture_height - 1) ?
11962 0 : y_search_area_origin - ((origin_y + y_search_area_origin) - (picture_height - 1)) :
11963 : y_search_area_origin;
11964 :
11965 0 : search_area_height = (origin_y + y_search_area_origin + search_area_height > picture_height) ?
11966 0 : MAX(1, search_area_height - ((origin_y + y_search_area_origin + search_area_height) - picture_height)) :
11967 : search_area_height;
11968 :
11969 0 : context_ptr->x_search_area_origin[listIndex][0] = x_search_area_origin;
11970 0 : context_ptr->y_search_area_origin[listIndex][0] = y_search_area_origin;
11971 :
11972 0 : xTopLeftSearchRegion = (int16_t)(refPicPtr->origin_x + sb_origin_x) - (ME_FILTER_TAP >> 1) + x_search_area_origin;
11973 0 : yTopLeftSearchRegion = (int16_t)(refPicPtr->origin_y + sb_origin_y) - (ME_FILTER_TAP >> 1) + y_search_area_origin;
11974 0 : searchRegionIndex = (xTopLeftSearchRegion)+(yTopLeftSearchRegion)* refPicPtr->stride_y;
11975 :
11976 : // Umpack the reference for 16bit reference picture.
11977 0 : if (is16bit) {
11978 0 : uint16_t *ptr16 = (uint16_t *)refPicPtr->buffer_y + searchRegionIndex;
11979 :
11980 : uint8_t searchAreaBuffer[MAX_SEARCH_AREA_SIZE];
11981 :
11982 0 : extract8_bitdata_safe_sub(
11983 : ptr16,
11984 0 : refPicPtr->stride_y,
11985 : searchAreaBuffer,
11986 : MAX_TATAL_SEARCH_AREA_WIDTH,
11987 0 : search_area_width + context_ptr->sb_side + ME_FILTER_TAP,
11988 0 : search_area_height + context_ptr->sb_side + ME_FILTER_TAP,
11989 : EB_FALSE);
11990 :
11991 0 : context_ptr->integer_buffer_ptr[listIndex][0] = &(searchAreaBuffer[0]);
11992 0 : context_ptr->interpolated_full_stride[listIndex][0] = MAX_TATAL_SEARCH_AREA_WIDTH;
11993 : }
11994 : else {
11995 0 : context_ptr->integer_buffer_ptr[listIndex][0] = &(refPicPtr->buffer_y[searchRegionIndex]);
11996 0 : context_ptr->interpolated_full_stride[listIndex][0] = refPicPtr->stride_y;
11997 : }
11998 :
11999 : // Move to the top left of the search region
12000 0 : xTopLeftSearchRegion = (int16_t)(refPicPtr->origin_x + sb_origin_x) + x_search_area_origin;
12001 0 : yTopLeftSearchRegion = (int16_t)(refPicPtr->origin_y + sb_origin_y) + y_search_area_origin;
12002 0 : searchRegionIndex = xTopLeftSearchRegion + yTopLeftSearchRegion * refPicPtr->stride_y;
12003 :
12004 : //849 * 4 + 5 block are supported
12005 0 : initialize_buffer_32bits(context_ptr->p_sb_best_sad[listIndex][refPicIndex], (MAX_SS_ME_PU_COUNT / 4), 1, MAX_SAD_VALUE);
12006 :
12007 0 : context_ptr->p_best_sad4x4 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][0]);
12008 0 : context_ptr->p_best_mv4x4 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][0]);
12009 :
12010 0 : context_ptr->p_best_sad8x8 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][256 * number_of_sb_quad]);
12011 0 : context_ptr->p_best_mv8x8 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][256 * number_of_sb_quad]);
12012 :
12013 0 : context_ptr->p_best_sad16x16 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][320 * number_of_sb_quad]);
12014 0 : context_ptr->p_best_mv16x16 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][320 * number_of_sb_quad]);
12015 :
12016 0 : context_ptr->p_best_sad32x32 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][336 * number_of_sb_quad]);
12017 0 : context_ptr->p_best_mv32x32 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][336 * number_of_sb_quad]);
12018 :
12019 0 : context_ptr->p_best_sad64x64 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][340 * number_of_sb_quad]);
12020 0 : context_ptr->p_best_mv64x64 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][340 * number_of_sb_quad]);
12021 :
12022 0 : context_ptr->p_best_sad8x4 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][341 * number_of_sb_quad]);
12023 0 : context_ptr->p_best_mv8x4 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][341 * number_of_sb_quad]);
12024 :
12025 0 : context_ptr->p_best_sad4x8 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][469 * number_of_sb_quad]);
12026 0 : context_ptr->p_best_mv4x8 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][469 * number_of_sb_quad]);
12027 :
12028 0 : context_ptr->p_best_sad4x16 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][597 * number_of_sb_quad]);
12029 0 : context_ptr->p_best_mv4x16 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][597 * number_of_sb_quad]);
12030 :
12031 0 : context_ptr->p_best_sad16x4 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][661 * number_of_sb_quad]);
12032 0 : context_ptr->p_best_mv16x4 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][661 * number_of_sb_quad]);
12033 :
12034 0 : context_ptr->p_best_sad16x8 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][725 * number_of_sb_quad]);
12035 0 : context_ptr->p_best_mv16x8 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][725 * number_of_sb_quad]);
12036 :
12037 0 : context_ptr->p_best_sad8x16 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][757 * number_of_sb_quad]);
12038 0 : context_ptr->p_best_mv8x16 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][757 * number_of_sb_quad]);
12039 :
12040 0 : context_ptr->p_best_sad32x8 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][789 * number_of_sb_quad]);
12041 0 : context_ptr->p_best_mv32x8 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][789 * number_of_sb_quad]);
12042 :
12043 0 : context_ptr->p_best_sad8x32 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][805 * number_of_sb_quad]);
12044 0 : context_ptr->p_best_mv8x32 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][805 * number_of_sb_quad]);
12045 :
12046 0 : context_ptr->p_best_sad32x16 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][821 * number_of_sb_quad]);
12047 0 : context_ptr->p_best_mv32x16 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][821 * number_of_sb_quad]);
12048 :
12049 0 : context_ptr->p_best_sad16x32 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][829 * number_of_sb_quad]);
12050 0 : context_ptr->p_best_mv16x32 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][829 * number_of_sb_quad]);
12051 :
12052 0 : context_ptr->p_best_sad64x16 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][837 * number_of_sb_quad]);
12053 0 : context_ptr->p_best_mv64x16 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][837 * number_of_sb_quad]);
12054 :
12055 0 : context_ptr->p_best_sad16x64 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][841 * number_of_sb_quad]);
12056 0 : context_ptr->p_best_mv16x64 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][841 * number_of_sb_quad]);
12057 :
12058 0 : context_ptr->p_best_sad64x32 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][845 * number_of_sb_quad]);
12059 0 : context_ptr->p_best_mv64x32 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][845 * number_of_sb_quad]);
12060 :
12061 0 : context_ptr->p_best_sad32x64 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][847 * number_of_sb_quad]);
12062 0 : context_ptr->p_best_mv32x64 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][847 * number_of_sb_quad]);
12063 :
12064 0 : if (sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128) {
12065 0 : context_ptr->p_best_sad128x64 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][849 * number_of_sb_quad]);
12066 0 : context_ptr->p_best_mv128x64 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][849 * number_of_sb_quad]);
12067 :
12068 0 : context_ptr->p_best_sad64x128 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][(849 * number_of_sb_quad) + 2]);
12069 0 : context_ptr->p_best_mv64x128 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][(849 * number_of_sb_quad) + 2]);
12070 :
12071 0 : context_ptr->p_best_sad128x128 = &(context_ptr->p_sb_best_sad[listIndex][refPicIndex][(849 * number_of_sb_quad) + 4]);
12072 0 : context_ptr->p_best_mv128x128 = &(context_ptr->p_sb_best_mv[listIndex][refPicIndex][(849 * number_of_sb_quad) + 4]);
12073 : }
12074 :
12075 0 : in_loop_me_fullpel_search_sblock(
12076 : context_ptr,
12077 : listIndex,
12078 : x_search_area_origin,
12079 : y_search_area_origin,
12080 : search_area_width,
12081 : search_area_height,
12082 : number_of_sb_quad);
12083 :
12084 0 : if (context_ptr->use_subpel_flag == 1) {
12085 : // Move to the top left of the search region
12086 0 : xTopLeftSearchRegion = (int16_t)(refPicPtr->origin_x + sb_origin_x) + x_search_area_origin;
12087 0 : yTopLeftSearchRegion = (int16_t)(refPicPtr->origin_y + sb_origin_y) + y_search_area_origin;
12088 0 : searchRegionIndex = xTopLeftSearchRegion + yTopLeftSearchRegion * refPicPtr->stride_y;
12089 :
12090 : // Interpolate the search region for Half-Pel Refinements
12091 : // H - AVC Style
12092 :
12093 0 : in_loop_me_interpolate_search_region_avc_style(
12094 : context_ptr,
12095 : listIndex,
12096 0 : context_ptr->integer_buffer_ptr[listIndex][0] + (ME_FILTER_TAP >> 1) + ((ME_FILTER_TAP >> 1) * context_ptr->interpolated_full_stride[listIndex][0]),
12097 : context_ptr->interpolated_full_stride[listIndex][0],
12098 0 : (uint32_t)search_area_width + (context_ptr->sb_side - 1),
12099 0 : (uint32_t)search_area_height + (context_ptr->sb_side - 1),
12100 : 8);
12101 :
12102 : // Half-Pel Refinement [8 search positions]
12103 0 : in_loop_me_halfpel_search_sblock(
12104 : sequence_control_set_ptr,
12105 : context_ptr,
12106 0 : &(context_ptr->pos_b_buffer[listIndex][0][(ME_FILTER_TAP >> 1) * context_ptr->interpolated_stride]),
12107 0 : &(context_ptr->pos_h_buffer[listIndex][0][1]),
12108 : &(context_ptr->pos_j_buffer[listIndex][0][0]),
12109 : x_search_area_origin,
12110 : y_search_area_origin);
12111 :
12112 : // Quarter-Pel Refinement [8 search positions]
12113 0 : in_loop_me_quarterpel_search_sblock(
12114 : context_ptr,
12115 0 : context_ptr->integer_buffer_ptr[listIndex][0] + (ME_FILTER_TAP >> 1) + ((ME_FILTER_TAP >> 1) * context_ptr->interpolated_full_stride[listIndex][0]),
12116 : context_ptr->interpolated_full_stride[listIndex][0],
12117 0 : &(context_ptr->pos_b_buffer[listIndex][0][(ME_FILTER_TAP >> 1) * context_ptr->interpolated_stride]), //points to b position of the figure above
12118 0 : &(context_ptr->pos_h_buffer[listIndex][0][1]), //points to h position of the figure above
12119 : &(context_ptr->pos_j_buffer[listIndex][0][0]), //points to j position of the figure above
12120 : x_search_area_origin,
12121 : y_search_area_origin);
12122 : }
12123 : }
12124 :
12125 : // Nader - Bipred candidate can be generated here if needed.
12126 0 : uint32_t max_number_of_block_in_sb = sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128 ? MAX_SS_ME_PU_COUNT : 849;
12127 :
12128 0 : for (listIndex = REF_LIST_0; listIndex <= numOfListToSearch; ++listIndex) {
12129 : uint32_t block_index;
12130 : uint32_t block_offset;
12131 : uint32_t nidx;
12132 0 : uint32_t candidate_cnt = 0;
12133 :
12134 0 : for (block_index = 0; block_index < max_number_of_block_in_sb; ++block_index) {
12135 : //4x4
12136 0 : if (block_index < start_idx_8x8) {
12137 0 : block_offset = (block_index / 256) * 256;
12138 0 : nidx = tab4x4[block_index - block_offset] + block_offset;
12139 : } //8x8
12140 0 : else if (block_index < start_idx_16x16) {
12141 0 : block_offset = ((block_index - start_idx_8x8) / 64) * 64;
12142 0 : nidx = tab8x8[block_index - start_idx_8x8 - block_offset] + block_offset + start_idx_8x8;
12143 : }//16x16
12144 0 : else if (block_index < start_idx_32x32) {
12145 0 : block_offset = ((block_index - start_idx_16x16) / 16) * 16;
12146 0 : nidx = tab16x16[block_index - start_idx_16x16 - block_offset] + block_offset + start_idx_16x16;
12147 : }//32x32
12148 0 : else if (block_index < start_idx_64x64) {
12149 0 : block_offset = ((block_index - start_idx_32x32) / 4) * 4;
12150 0 : nidx = tab32x32[block_index - start_idx_32x32 - block_offset] + block_offset + start_idx_32x32;
12151 : } //64x64
12152 0 : else if (block_index < start_idx_8x4) {
12153 0 : block_offset = (block_index - start_idx_64x64);
12154 0 : nidx = block_offset + start_idx_64x64;
12155 : } //8x4
12156 0 : else if (block_index < start_idx_4x8) {
12157 0 : block_offset = ((block_index - start_idx_8x4) / 128) * 128;
12158 0 : nidx = tab8x4[block_index - start_idx_8x4 - block_offset] + block_offset + start_idx_8x4;
12159 : }//4x8
12160 0 : else if (block_index < start_idx_4x16) {
12161 0 : block_offset = ((block_index - start_idx_4x8) / 128) * 128;
12162 0 : nidx = tab4x8[block_index - start_idx_4x8 - block_offset] + block_offset + start_idx_4x8;
12163 : }//4x16
12164 0 : else if (block_index < start_idx_16x4) {
12165 0 : block_offset = ((block_index - start_idx_4x16) / 64) * 64;
12166 0 : nidx = tab4x16[block_index - start_idx_4x16 - block_offset] + block_offset + start_idx_4x16;
12167 : }//16x4
12168 0 : else if (block_index < start_idx_16x8) {
12169 0 : block_offset = ((block_index - start_idx_16x4) / 64) * 64;
12170 0 : nidx = tab16x4[block_index - start_idx_16x4 - block_offset] + block_offset + start_idx_16x4;
12171 : }//16x8
12172 0 : else if (block_index < start_idx_8x16) {
12173 0 : block_offset = ((block_index - start_idx_16x8) / 32) * 32;
12174 0 : nidx = tab16x8[block_index - start_idx_16x8 - block_offset] + block_offset + start_idx_16x8;
12175 : }//8x16
12176 0 : else if (block_index < start_idx_32x8) {
12177 0 : block_offset = ((block_index - start_idx_8x16) / 32) * 32;
12178 0 : nidx = tab8x16[block_index - start_idx_8x16 - block_offset] + block_offset + start_idx_8x16;
12179 : }//32x8
12180 0 : else if (block_index < start_idx_8x32) {
12181 0 : block_offset = ((block_index - start_idx_32x8) / 16) * 16;
12182 0 : nidx = tab32x8[block_index - start_idx_32x8 - block_offset] + block_offset + start_idx_32x8;
12183 : }//8x32
12184 0 : else if (block_index < start_idx_32x16) {
12185 0 : block_offset = ((block_index - start_idx_8x32) / 16) * 16;
12186 0 : nidx = tab8x32[block_index - start_idx_8x32 - block_offset] + block_offset + start_idx_8x32;
12187 : }//32x16
12188 0 : else if (block_index < start_idx_16x32) {
12189 0 : block_offset = ((block_index - start_idx_32x16) / 8) * 8;
12190 0 : nidx = tab32x16[block_index - start_idx_32x16 - block_offset] + block_offset + start_idx_32x16;
12191 : }//16x32
12192 0 : else if (block_index < start_idx_64x16) {
12193 0 : block_offset = ((block_index - start_idx_16x32) / 8) * 8;
12194 0 : nidx = tab16x32[block_index - start_idx_16x32 - block_offset] + block_offset + start_idx_16x32;
12195 : }//64x16
12196 0 : else if (block_index < start_idx_16x64) {
12197 0 : block_offset = ((block_index - start_idx_64x16) / 4) * 4;
12198 0 : nidx = tab64x16[block_index - start_idx_64x16 - block_offset] + block_offset + start_idx_64x16;
12199 : }//16x64
12200 0 : else if (block_index < start_idx_64x32) {
12201 0 : block_offset = ((block_index - start_idx_16x64) / 4) * 4;
12202 0 : nidx = tab16x64[block_index - start_idx_16x64 - block_offset] + block_offset + start_idx_16x64;
12203 : }//64x32
12204 0 : else if (block_index < start_idx_32x64) {
12205 0 : block_offset = ((block_index - start_idx_64x32) / 2) * 2;
12206 0 : nidx = tab64x32[block_index - start_idx_64x32 - block_offset] + block_offset + start_idx_64x32;
12207 : }//32x64
12208 0 : else if (block_index < start_idx_128x64) {
12209 0 : block_offset = ((block_index - start_idx_32x64) / 2) * 2;
12210 0 : nidx = tab32x64[block_index - start_idx_32x64 - block_offset] + block_offset + start_idx_32x64;
12211 : }//128x64, //64x128 and 128x128
12212 : else
12213 0 : nidx = block_index;
12214 0 : context_ptr->inloop_me_mv[0][0][candidate_cnt][0] = _MVXT(context_ptr->p_sb_best_mv[0][0][nidx]);
12215 0 : context_ptr->inloop_me_mv[0][0][candidate_cnt][1] = _MVYT(context_ptr->p_sb_best_mv[0][0][nidx]);
12216 0 : context_ptr->inloop_me_mv[1][0][candidate_cnt][0] = _MVXT(context_ptr->p_sb_best_mv[1][0][nidx]);
12217 0 : context_ptr->inloop_me_mv[1][0][candidate_cnt][1] = _MVYT(context_ptr->p_sb_best_mv[1][0][nidx]);
12218 0 : candidate_cnt++;
12219 : }
12220 : }
12221 :
12222 0 : return return_error;
12223 : }
12224 :
12225 : #if PREDICT_NSQ_SHAPE
12226 0 : uint64_t spatial_full_distortion_helper(
12227 : uint8_t *input,
12228 : uint32_t input_offset,
12229 : uint32_t input_stride,
12230 : uint8_t *recon,
12231 : uint32_t recon_offset,
12232 : uint32_t recon_stride,
12233 : uint32_t area_width,
12234 : uint32_t area_height,
12235 : uint8_t choice) {
12236 :
12237 0 : uint64_t sfd = 0;
12238 :
12239 0 : switch (choice) {
12240 0 : case 0:
12241 0 : sfd = spatial_full_distortion_kernel4x_n_sse2_intrin(input, input_offset, input_stride, recon, recon_offset, recon_stride, area_width, area_height);break;
12242 0 : case 1:
12243 0 : sfd = spatial_full_distortion_kernel8x_n_sse2_intrin(input, input_offset, input_stride, recon, recon_offset, recon_stride, area_width, area_height);break;
12244 0 : case 2:
12245 0 : sfd = spatial_full_distortion_kernel16x_n_sse2_intrin(input, input_offset, input_stride, recon, recon_offset, recon_stride, area_width, area_height);break;
12246 0 : case 3:
12247 0 : sfd = spatial_full_distortion_kernel32x_n_sse2_intrin(input, input_offset, input_stride, recon, recon_offset, recon_stride, area_width, area_height);break;
12248 0 : case 4:
12249 0 : sfd = spatial_full_distortion_kernel64x_n_sse2_intrin(input, input_offset, input_stride, recon, recon_offset, recon_stride, area_width, area_height);break;
12250 0 : case 5:
12251 0 : sfd = spatial_full_distortion_kernel128x_n_sse2_intrin(input, input_offset, input_stride, recon, recon_offset, recon_stride, area_width, area_height);break;
12252 : }
12253 :
12254 0 : return sfd;
12255 : }
12256 :
12257 0 : uint64_t spatial_full_distortion_avx2_helper(
12258 : uint8_t *input,
12259 : uint32_t input_offset,
12260 : uint32_t input_stride,
12261 : uint8_t *recon,
12262 : uint32_t recon_offset,
12263 : uint32_t recon_stride,
12264 : uint32_t area_width,
12265 : uint32_t area_height,
12266 : uint8_t choice) {
12267 :
12268 0 : uint64_t sfd = 0;
12269 :
12270 0 : switch (choice) {
12271 0 : case 0:
12272 0 : sfd = spatial_full_distortion_kernel4x_n_avx2_intrin(input, input_offset, input_stride, recon, recon_offset, recon_stride, area_width, area_height);break;
12273 0 : case 1:
12274 0 : sfd = spatial_full_distortion_kernel8x_n_avx2_intrin(input, input_offset, input_stride, recon, recon_offset, recon_stride, area_width, area_height);break;
12275 0 : case 2:
12276 0 : sfd = spatial_full_distortion_kernel16x_n_avx2_intrin(input, input_offset, input_stride, recon, recon_offset, recon_stride, area_width, area_height);break;
12277 0 : case 3:
12278 0 : sfd = spatial_full_distortion_kernel32x_n_avx2_intrin(input, input_offset, input_stride, recon, recon_offset, recon_stride, area_width, area_height);break;
12279 0 : case 4:
12280 0 : sfd = spatial_full_distortion_kernel64x_n_avx2_intrin(input, input_offset, input_stride, recon, recon_offset, recon_stride, area_width, area_height);break;
12281 0 : case 5:
12282 0 : sfd = spatial_full_distortion_kernel128x_n_avx2_intrin(input, input_offset, input_stride, recon, recon_offset, recon_stride, area_width, area_height);break;
12283 : }
12284 :
12285 0 : return sfd;
12286 : }
12287 : #endif
|