Line data Source code
1 : /*
2 : * Copyright(c) 2019 Intel Corporation
3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 : */
5 :
6 : /*
7 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
8 : *
9 : * This source code is subject to the terms of the BSD 2 Clause License and
10 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
11 : * was not distributed with this source code in the LICENSE file, you can
12 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
13 : * Media Patent License 1.0 was not distributed with this source code in the
14 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
15 : */
16 :
17 : /***************************************
18 : * Includes
19 : ***************************************/
20 : #include "EbRateDistortionCost.h"
21 : #include "EbCommonUtils.h"
22 : #include "aom_dsp_rtcd.h"
23 :
24 : #include <assert.h>
25 : #if TWO_PASS
26 : #define FIRST_PASS_COST_PENALTY 20 // The penalty is added in cost calculation of the first pass.
27 : #endif
28 : #define AV1_COST_PRECISION 0
29 : #define MV_COST_WEIGHT 108
30 : int av1_get_reference_mode_context_new(const MacroBlockD *xd);
31 : int eb_av1_get_pred_context_uni_comp_ref_p(const MacroBlockD *xd);
32 : int eb_av1_get_pred_context_uni_comp_ref_p1(const MacroBlockD *xd);
33 : int eb_av1_get_pred_context_uni_comp_ref_p2(const MacroBlockD *xd);
34 : int av1_get_comp_reference_type_context_new(const MacroBlockD *xd);
35 :
36 : #if PAL_SUP
37 : int av1_get_palette_bsize_ctx(BlockSize bsize);
38 : int av1_get_palette_mode_ctx(const MacroBlockD *xd);
39 : int write_uniform_cost(int n, int v);
40 : int eb_get_palette_cache(const MacroBlockD *const xd, int plane,uint16_t *cache);
41 : int av1_palette_color_cost_y(const PaletteModeInfo *const pmi,
42 : uint16_t *color_cache, int n_cache,
43 : int bit_depth);
44 : int av1_cost_color_map(PaletteInfo *palette_info, MdRateEstimationContext *rate_table, CodingUnit*cu_ptr, int plane, BlockSize bsize,
45 : COLOR_MAP_TYPE type);
46 : void av1_get_block_dimensions(BlockSize bsize, int plane,
47 : const MacroBlockD *xd, int *width,
48 : int *height,
49 : int *rows_within_bounds,
50 : int *cols_within_bounds);
51 : int av1_allow_palette(int allow_screen_content_tools,
52 : BlockSize sb_type);
53 : #endif
54 0 : BlockSize GetBlockSize(uint8_t cu_size) {
55 0 : return (cu_size == 64 ? BLOCK_64X64 : cu_size == 32 ? BLOCK_32X32 : cu_size == 16 ? BLOCK_16X16 : cu_size == 8 ? BLOCK_8X8 : BLOCK_4X4);
56 : }
57 :
58 : int av1_allow_intrabc(const Av1Common *const cm);
59 :
60 75456500 : uint8_t av1_drl_ctx(const CandidateMv *ref_mv_stack,
61 : int32_t ref_idx) {
62 75456500 : if (ref_mv_stack[ref_idx].weight >= REF_CAT_LEVEL &&
63 16308700 : ref_mv_stack[ref_idx + 1].weight >= REF_CAT_LEVEL)
64 5240980 : return 0;
65 :
66 70215500 : if (ref_mv_stack[ref_idx].weight >= REF_CAT_LEVEL &&
67 11072900 : ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
68 11073100 : return 1;
69 :
70 59142400 : if (ref_mv_stack[ref_idx].weight < REF_CAT_LEVEL &&
71 59233300 : ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
72 59236000 : return 2;
73 :
74 0 : return 0;
75 : }
76 :
77 : /* Symbols for coding which components are zero jointly */
78 : //#define MV_JOINTS 4
79 : //typedef enum {
80 : // MV_JOINT_ZERO = 0, /* Zero vector */
81 : // MV_JOINT_HNZVZ = 1, /* Vert zero, hor nonzero */
82 : // MV_JOINT_HZVNZ = 2, /* Hor zero, vert nonzero */
83 : // MV_JOINT_HNZVNZ = 3, /* Both components nonzero */
84 : //} MvJointType;
85 :
86 248323000 : MvJointType av1_get_mv_joint(const MV *mv) {
87 248323000 : if (mv->row == 0)
88 29801600 : return mv->col == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ;
89 : else
90 218521000 : return mv->col == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ;
91 : }
92 201486000 : int32_t mv_cost(const MV *mv, const int32_t *joint_cost,
93 : int32_t *const comp_cost[2]) {
94 201486000 : int32_t jnC = av1_get_mv_joint(mv);
95 201541000 : int32_t res =
96 201541000 : joint_cost[jnC] + comp_cost[0][mv->row] +
97 201541000 : comp_cost[1][mv->col];
98 :
99 201541000 : return res;
100 : }
101 :
102 201503000 : int32_t eb_av1_mv_bit_cost(const MV *mv, const MV *ref, const int32_t *mvjcost,
103 : int32_t *mvcost[2], int32_t weight) {
104 201503000 : const MV diff = { mv->row - ref->row, mv->col - ref->col };
105 201503000 : return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
106 : }
107 :
108 : /////////////////////////////COEFFICIENT CALCULATION //////////////////////////////////////////////
109 1751740 : static INLINE int32_t get_golomb_cost(int32_t abs_qc) {
110 1751740 : if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
111 1751740 : const int32_t r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
112 1751740 : const int32_t length = get_msb(r) + 1;
113 1751740 : return av1_cost_literal(2 * length - 1);
114 : }
115 0 : return 0;
116 : }
117 :
118 0 : void eb_av1_txb_init_levels_c(
119 : const TranLow *const coeff,
120 : const int32_t width,
121 : const int32_t height,
122 : uint8_t *const levels) {
123 0 : const int32_t stride = width + TX_PAD_HOR;
124 0 : uint8_t *ls = levels;
125 :
126 0 : memset(levels - TX_PAD_TOP * stride, 0,
127 : sizeof(*levels) * TX_PAD_TOP * stride);
128 0 : memset(levels + stride * height, 0,
129 0 : sizeof(*levels) * (TX_PAD_BOTTOM * stride + TX_PAD_END));
130 :
131 0 : for (int32_t i = 0; i < height; i++) {
132 0 : for (int32_t j = 0; j < width; j++)
133 0 : *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, INT8_MAX);
134 0 : for (int32_t j = 0; j < TX_PAD_HOR; j++)
135 0 : *ls++ = 0;
136 : }
137 0 : }
138 :
139 : // TODO(angiebird): use this function whenever it's possible
140 34557800 : int32_t Av1TransformTypeRateEstimation(
141 : uint8_t allow_update_cdf,
142 : FRAME_CONTEXT *fc,
143 : struct ModeDecisionCandidateBuffer *candidate_buffer_ptr,
144 : EbBool is_inter,
145 : #if !FILTER_INTRA_FLAG
146 : EbBool useFilterIntraFlag,
147 : #endif
148 : TxSize transform_size,
149 : TxType transform_type,
150 : EbBool reduced_tx_set_used)
151 : {
152 : #if !FILTER_INTRA_FLAG
153 : uint8_t filterIntraMode = 0; // AMIR to check// NM- hardcoded to zero for the moment until we support different intra filtering modes.
154 : #endif
155 : //const MbModeInfo *mbmi = &xd->mi[0]->mbmi;
156 : //const int32_t is_inter = is_inter_block(mbmi);
157 :
158 34557800 : if (get_ext_tx_types(transform_size, is_inter, reduced_tx_set_used) > 1 /*&& !xd->lossless[xd->mi[0]->mbmi.segment_id] WE ARE NOT LOSSLESS*/) {
159 32732300 : const TxSize square_tx_size = txsize_sqr_map[transform_size];
160 32732300 : assert(square_tx_size < EXT_TX_SIZES);
161 :
162 32732300 : const int32_t ext_tx_set = get_ext_tx_set(transform_size, is_inter, reduced_tx_set_used);
163 32729000 : if (is_inter) {
164 16644500 : if (ext_tx_set > 0)
165 : {
166 16644600 : if (allow_update_cdf) {
167 : const TxSetType tx_set_type =
168 2777 : get_ext_tx_set_type(transform_size, is_inter, reduced_tx_set_used);
169 :
170 2777 : update_cdf(fc->inter_ext_tx_cdf[ext_tx_set][square_tx_size],
171 : av1_ext_tx_ind[tx_set_type][transform_type],
172 : av1_num_ext_tx_set[tx_set_type]);
173 : }
174 16645100 : return candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->inter_tx_type_fac_bits[ext_tx_set][square_tx_size][transform_type];
175 : }
176 : }
177 : else {
178 16084500 : if (ext_tx_set > 0) {
179 : PredictionMode intra_dir;
180 : #if FILTER_INTRA_FLAG
181 16097400 : if (candidate_buffer_ptr->candidate_ptr->filter_intra_mode != FILTER_INTRA_MODES)
182 4494820 : intra_dir = fimode_to_intradir[candidate_buffer_ptr->candidate_ptr->filter_intra_mode];
183 : #else
184 : if (useFilterIntraFlag)
185 : intra_dir = fimode_to_intradir[filterIntraMode];
186 : #endif
187 : else
188 11602600 : intra_dir = candidate_buffer_ptr->candidate_ptr->pred_mode;
189 16097400 : assert(intra_dir < INTRA_MODES);
190 : const TxSetType tx_set_type =
191 16097400 : get_ext_tx_set_type(transform_size, is_inter, reduced_tx_set_used);
192 :
193 16093500 : if (allow_update_cdf) {
194 4282 : update_cdf(
195 4282 : fc->intra_ext_tx_cdf[ext_tx_set][square_tx_size][intra_dir],
196 : av1_ext_tx_ind[tx_set_type][transform_type],
197 : av1_num_ext_tx_set[tx_set_type]);
198 : }
199 16095700 : return candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->intra_tx_type_fac_bits[ext_tx_set][square_tx_size][intra_dir][transform_type];
200 : }
201 : }
202 : }
203 1815780 : return 0;
204 : }
205 :
206 : static const int8_t eob_to_pos_small[33] = {
207 : 0, 1, 2, // 0-2
208 : 3, 3, // 3-4
209 : 4, 4, 4, 4, // 5-8
210 : 5, 5, 5, 5, 5, 5, 5, 5, // 9-16
211 : 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 // 17-32
212 : };
213 :
214 : static const int8_t eob_to_pos_large[17] = {
215 : 6, // place holder
216 : 7, // 33-64
217 : 8, 8, // 65-128
218 : 9, 9, 9, 9, // 129-256
219 : 10, 10, 10, 10, 10, 10, 10, 10, // 257-512
220 : 11 // 513-
221 : };
222 :
223 43885800 : static INLINE int32_t get_eob_pos_token(const int32_t eob, int32_t *const extra) {
224 : int32_t t;
225 :
226 43885800 : if (eob < 33)
227 30095100 : t = eob_to_pos_small[eob];
228 : else {
229 13790600 : const int32_t e = AOMMIN((eob - 1) >> 5, 16);
230 13790600 : t = eob_to_pos_large[e];
231 : }
232 :
233 43885800 : *extra = eob - eb_k_eob_group_start[t];
234 :
235 43885800 : return t;
236 : }
237 : #define TX_SIZE TxSize
238 9342 : static INLINE TX_SIZE get_txsize_entropy_ctx(TX_SIZE txsize) {
239 9342 : return (TX_SIZE)((txsize_sqr_map[txsize] + txsize_sqr_up_map[txsize] + 1) >>
240 : 1);
241 : }
242 9342 : void eb_av1_update_eob_context(int eob, TX_SIZE tx_size, TxClass tx_class,
243 : PlaneType plane, FRAME_CONTEXT *ec_ctx,
244 : uint8_t allow_update_cdf) {
245 : int eob_extra;
246 9342 : const int eob_pt = get_eob_pos_token(eob, &eob_extra);
247 9342 : TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
248 9342 : assert(txs_ctx < TX_SIZES);
249 9342 : const int eob_multi_size = txsize_log2_minus4[tx_size];
250 9342 : const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
251 :
252 9342 : switch (eob_multi_size) {
253 2974 : case 0:
254 : #if CONFIG_ENTROPY_STATS
255 : ++counts->eob_multi16[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
256 : #endif
257 2974 : if (allow_update_cdf)
258 2974 : update_cdf(ec_ctx->eob_flag_cdf16[plane][eob_multi_ctx], eob_pt - 1, 5);
259 2974 : break;
260 1668 : case 1:
261 : #if CONFIG_ENTROPY_STATS
262 : ++counts->eob_multi32[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
263 : #endif
264 1668 : if (allow_update_cdf)
265 1668 : update_cdf(ec_ctx->eob_flag_cdf32[plane][eob_multi_ctx], eob_pt - 1, 6);
266 1668 : break;
267 3068 : case 2:
268 : #if CONFIG_ENTROPY_STATS
269 : ++counts->eob_multi64[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
270 : #endif
271 3068 : if (allow_update_cdf)
272 3068 : update_cdf(ec_ctx->eob_flag_cdf64[plane][eob_multi_ctx], eob_pt - 1, 7);
273 3068 : break;
274 677 : case 3:
275 : #if CONFIG_ENTROPY_STATS
276 : ++counts->eob_multi128[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
277 : #endif
278 677 : if (allow_update_cdf) {
279 677 : update_cdf(ec_ctx->eob_flag_cdf128[plane][eob_multi_ctx], eob_pt - 1,
280 : 8);
281 : }
282 677 : break;
283 690 : case 4:
284 : #if CONFIG_ENTROPY_STATS
285 : ++counts->eob_multi256[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
286 : #endif
287 690 : if (allow_update_cdf) {
288 690 : update_cdf(ec_ctx->eob_flag_cdf256[plane][eob_multi_ctx], eob_pt - 1,
289 : 9);
290 : }
291 690 : break;
292 96 : case 5:
293 : #if CONFIG_ENTROPY_STATS
294 : ++counts->eob_multi512[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
295 : #endif
296 96 : if (allow_update_cdf) {
297 96 : update_cdf(ec_ctx->eob_flag_cdf512[plane][eob_multi_ctx], eob_pt - 1,
298 : 10);
299 : }
300 96 : break;
301 169 : case 6:
302 : default:
303 : #if CONFIG_ENTROPY_STATS
304 : ++counts->eob_multi1024[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
305 : #endif
306 169 : if (allow_update_cdf) {
307 169 : update_cdf(ec_ctx->eob_flag_cdf1024[plane][eob_multi_ctx], eob_pt - 1,
308 : 11);
309 : }
310 169 : break;
311 : }
312 :
313 9342 : if (eb_k_eob_offset_bits[eob_pt] > 0) {
314 6499 : int eob_ctx = eob_pt - 3;
315 6499 : int eob_shift = eb_k_eob_offset_bits[eob_pt] - 1;
316 6499 : int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
317 : #if CONFIG_ENTROPY_STATS
318 : counts->eob_extra[cdf_idx][txs_ctx][plane][eob_pt][bit]++;
319 : #endif // CONFIG_ENTROPY_STATS
320 6499 : if (allow_update_cdf)
321 6499 : update_cdf(ec_ctx->eob_extra_cdf[txs_ctx][plane][eob_ctx], bit, 2);
322 : }
323 9342 : }
324 43877700 : static int32_t get_eob_cost(int32_t eob, const LvMapEobCost *txb_eob_costs,
325 : const LvMapCoeffCost *txb_costs, TxType tx_type) {
326 : int32_t eob_extra;
327 43877700 : const int32_t eob_pt = get_eob_pos_token(eob, &eob_extra);
328 43878700 : int32_t eob_cost = 0;
329 43878700 : const int32_t eob_multi_ctx = (tx_type_to_class[tx_type] == TX_CLASS_2D) ? 0 : 1;
330 43878700 : eob_cost = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1];
331 :
332 43878700 : if (eb_k_eob_offset_bits[eob_pt] > 0) {
333 36413800 : const int32_t eob_shift = eb_k_eob_offset_bits[eob_pt] - 1;
334 36413800 : const int32_t bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
335 36413800 : eob_cost += txb_costs->eob_extra_cost[eob_pt][bit];
336 36413800 : const int32_t offset_bits = eb_k_eob_offset_bits[eob_pt];
337 36413800 : if (offset_bits > 1) eob_cost += av1_cost_literal(offset_bits - 1);
338 : }
339 43878700 : return eob_cost;
340 : }
341 :
342 : #if ADD_MDC_FULL_COST
343 45131600 : int32_t av1_cost_skip_txb(
344 : #else
345 : static INLINE int32_t av1_cost_skip_txb(
346 : #endif
347 : uint8_t allow_update_cdf,
348 : FRAME_CONTEXT *ec_ctx,
349 : struct ModeDecisionCandidateBuffer *candidate_buffer_ptr,
350 : TxSize transform_size,
351 : PlaneType plane_type,
352 : int16_t txb_skip_ctx)
353 : {
354 45131600 : const TxSize txs_ctx = (TxSize)((txsize_sqr_map[transform_size] + txsize_sqr_up_map[transform_size] + 1) >> 1);
355 45131600 : assert(txs_ctx < TX_SIZES);
356 45131600 : const LvMapCoeffCost *const coeff_costs = &candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->coeff_fac_bits[txs_ctx][plane_type];
357 45131600 : if (allow_update_cdf)
358 31060 : update_cdf(ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], 1, 2);
359 45138800 : return coeff_costs->txb_skip_cost[txb_skip_ctx][1];
360 : }
361 :
362 43871700 : static INLINE int32_t av1_cost_coeffs_txb_loop_cost_eob(uint16_t eob,
363 : const int16_t *const scan, const TranLow *const qcoeff,
364 : int8_t *const coeff_contexts, const LvMapCoeffCost *coeff_costs,
365 : int16_t dc_sign_ctx, uint8_t *const levels,
366 : const int32_t bwl,
367 : TxType transform_type) {
368 43871700 : const uint32_t cost_literal = av1_cost_literal(1);
369 43871700 : int32_t cost = 0;
370 : int32_t c;
371 :
372 : /* Loop reduced to touch only first (eob - 1) and last (0) index */
373 43871700 : int32_t decr = eob - 1;
374 43871700 : if (decr < 1)
375 6455920 : decr = 1;
376 125210000 : for (c = eob - 1; c >= 0; c -= decr) {
377 81337900 : const int32_t pos = scan[c];
378 81337900 : const TranLow v = qcoeff[pos];
379 81337900 : const int32_t is_nz = (v != 0);
380 81337900 : const int32_t level = abs(v);
381 81337900 : const int32_t coeff_ctx = coeff_contexts[pos];
382 :
383 81337900 : if (c == eob - 1) {
384 43871000 : assert((AOMMIN(level, 3) - 1) >= 0);
385 43871000 : cost += coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1];
386 : }
387 : else {
388 37466900 : cost += coeff_costs->base_cost[coeff_ctx][AOMMIN(level, 3)];
389 : }
390 :
391 81337900 : if (is_nz) {
392 70331200 : if (c == 0) {
393 32922300 : const int32_t sign = (v < 0) ? 1 : 0;
394 : // sign bit cost
395 :
396 32922300 : cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign];
397 : }
398 : else {
399 37408900 : cost += cost_literal;
400 : }
401 :
402 70331200 : if (level > NUM_BASE_LEVELS) {
403 : int32_t ctx;
404 11454500 : ctx = get_br_ctx(levels, pos, bwl, transform_type);
405 :
406 11454800 : const int32_t base_range = level - 1 - NUM_BASE_LEVELS;
407 :
408 11454800 : if (base_range < COEFF_BASE_RANGE)
409 10355100 : cost += coeff_costs->lps_cost[ctx][base_range];
410 : else
411 1099690 : cost += coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE];
412 :
413 :
414 11454800 : if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE)
415 1100080 : cost += get_golomb_cost(level);
416 : }
417 : }
418 : }
419 :
420 : /* Optimized Loop, omitted first (eob - 1) and last (0) index */
421 1819190000 : for (c = eob - 2; c >= 1; --c) {
422 1775290000 : const int32_t pos = scan[c];
423 1775290000 : const int32_t level = abs(qcoeff[pos]);
424 1775290000 : if (level > NUM_BASE_LEVELS) {
425 72960400 : const int32_t ctx = get_br_ctx(levels, pos, bwl, transform_type);
426 72963600 : const int32_t base_range = level - 1 - NUM_BASE_LEVELS;
427 :
428 72963600 : if (base_range < COEFF_BASE_RANGE) {
429 72334700 : cost += cost_literal + coeff_costs->lps_cost[ctx][base_range]
430 72334700 : + coeff_costs->base_cost[coeff_contexts[pos]][3];
431 : }
432 : else {
433 628890 : cost += get_golomb_cost(level) + cost_literal
434 651756 : + coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE]
435 651756 : + coeff_costs->base_cost[coeff_contexts[pos]][3];
436 : }
437 : }
438 1702330000 : else if (level) {
439 482349000 : cost += cost_literal
440 482349000 : + coeff_costs->base_cost[coeff_contexts[pos]][level];
441 : }
442 : else {
443 1219980000 : cost += coeff_costs->base_cost[coeff_contexts[pos]][0];
444 : }
445 : }
446 43898100 : return cost;
447 : }
448 :
449 : // Note: don't call this function when eob is 0.
450 43889300 : uint64_t eb_av1_cost_coeffs_txb(
451 : uint8_t allow_update_cdf,
452 : FRAME_CONTEXT *ec_ctx,
453 : struct ModeDecisionCandidateBuffer *candidate_buffer_ptr,
454 : const TranLow *const qcoeff,
455 : uint16_t eob,
456 : PlaneType plane_type,
457 : TxSize transform_size,
458 : TxType transform_type,
459 : int16_t txb_skip_ctx,
460 : int16_t dc_sign_ctx,
461 : EbBool reducedTransformSetFlag)
462 :
463 : {
464 : //Note: there is a different version of this function in AOM that seems to be efficient as its name is:
465 : //warehouse_efficients_txb
466 :
467 43889300 : const TxSize txs_ctx = (TxSize)((txsize_sqr_map[transform_size] + txsize_sqr_up_map[transform_size] + 1) >> 1);
468 43889300 : const TxClass tx_class = tx_type_to_class[transform_type];
469 : int32_t cost;
470 43889300 : const int32_t bwl = get_txb_bwl(transform_size);
471 43879400 : const int32_t width = get_txb_wide(transform_size);
472 43856400 : const int32_t height = get_txb_high(transform_size);
473 43835900 : const ScanOrder *const scan_order = &av1_scan_orders[transform_size][transform_type]; // get_scan(tx_size, tx_type);
474 43835900 : const int16_t *const scan = scan_order->scan;
475 : uint8_t levels_buf[TX_PAD_2D];
476 43835900 : uint8_t *const levels = set_levels(levels_buf, width);
477 : DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
478 43839400 : assert(txs_ctx < TX_SIZES);
479 43839400 : const LvMapCoeffCost *const coeff_costs = &candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->coeff_fac_bits[txs_ctx][plane_type];
480 :
481 43839400 : const int32_t eob_multi_size = txsize_log2_minus4[transform_size];
482 43839400 : const LvMapEobCost *const eobBits = &candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->eob_frac_bits[eob_multi_size][plane_type];
483 : // eob must be greater than 0 here.
484 43839400 : assert(eob > 0);
485 43839400 : cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0];
486 :
487 43839400 : if (allow_update_cdf)
488 9342 : update_cdf(ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], eob == 0, 2);
489 43839400 : eb_av1_txb_init_levels(qcoeff, width, height, levels); // NM - Needs to be optimized - to be combined with the quantisation.
490 :
491 : // Transform type bit estimation
492 43864100 : cost += plane_type > PLANE_TYPE_Y ? 0 :
493 34520200 : Av1TransformTypeRateEstimation(
494 : allow_update_cdf,
495 : ec_ctx,
496 : candidate_buffer_ptr,
497 34520200 : candidate_buffer_ptr->candidate_ptr->type == INTER_MODE ? EB_TRUE : EB_FALSE,
498 : #if !FILTER_INTRA_FLAG
499 : EB_FALSE, // NM - Hardcoded to false for the moment until we support the intra filtering
500 : #endif
501 : transform_size,
502 : transform_type,
503 : reducedTransformSetFlag);
504 :
505 : // Transform ebo bit estimation
506 43900500 : int32_t eob_cost = get_eob_cost(eob, eobBits, coeff_costs, transform_type);
507 43883900 : cost += eob_cost;
508 43883900 : if (allow_update_cdf)
509 9342 : eb_av1_update_eob_context(eob, transform_size, tx_class,
510 : plane_type, ec_ctx, allow_update_cdf);
511 : // Transform non-zero coeff bit estimation
512 43883900 : eb_av1_get_nz_map_contexts(
513 : levels,
514 : scan,
515 : eob,
516 : transform_size,
517 : tx_class,
518 : coeff_contexts); // NM - Assembly version is available in AOM
519 :
520 43885000 : if (allow_update_cdf)
521 : {
522 164958 : for (int c = eob - 1; c >= 0; --c) {
523 155616 : const int pos = scan[c];
524 155616 : const int coeff_ctx = coeff_contexts[pos];
525 155616 : const TranLow v = qcoeff[pos];
526 155616 : const TranLow level = abs(v);
527 :
528 155616 : if (allow_update_cdf) {
529 155616 : if (c == eob - 1) {
530 9342 : assert(coeff_ctx < 4);
531 9342 : update_cdf(
532 9342 : ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx],
533 9342 : AOMMIN(level, 3) - 1, 3);
534 : }
535 : else {
536 146274 : update_cdf(ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx],
537 146274 : AOMMIN(level, 3), 4);
538 : }
539 : }
540 :
541 : {
542 155616 : if (c == eob - 1) {
543 9342 : assert(coeff_ctx < 4);
544 : #if CONFIG_ENTROPY_STATS
545 : ++td->counts->coeff_base_eob_multi[cdf_idx][txsize_ctx][plane_type]
546 : [coeff_ctx][AOMMIN(level, 3) - 1];
547 : }
548 : else {
549 : ++td->counts->coeff_base_multi[cdf_idx][txsize_ctx][plane_type]
550 : [coeff_ctx][AOMMIN(level, 3)];
551 : #endif
552 : }
553 : }
554 :
555 155616 : if (level > NUM_BASE_LEVELS) {
556 6994 : const int base_range = level - 1 - NUM_BASE_LEVELS;
557 6994 : const int br_ctx = get_br_ctx(levels, pos, bwl, (const TxType)tx_class);
558 :
559 9457 : for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
560 9315 : const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1);
561 9315 : if (allow_update_cdf) {
562 9315 : update_cdf(ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)]
563 9315 : [plane_type][br_ctx],
564 : k, BR_CDF_SIZE);
565 : }
566 20851 : for (int lps = 0; lps < BR_CDF_SIZE - 1; lps++) {
567 : #if CONFIG_ENTROPY_STATS
568 : ++td->counts->coeff_lps[AOMMIN(txsize_ctx, TX_32X32)][plane_type][lps]
569 : [br_ctx][lps == k];
570 : #endif // CONFIG_ENTROPY_STATS
571 18388 : if (lps == k) break;
572 : }
573 : #if CONFIG_ENTROPY_STATS
574 : ++td->counts->coeff_lps_multi[cdf_idx][AOMMIN(txsize_ctx, TX_32X32)]
575 : [plane_type][br_ctx][k];
576 : #endif
577 9315 : if (k < BR_CDF_SIZE - 1) break;
578 : }
579 : }
580 : }
581 :
582 9342 : if (qcoeff[0] != 0) {
583 6978 : const int dc_sign = (qcoeff[0] < 0) ? 1 : 0;
584 6978 : if (allow_update_cdf)
585 6978 : update_cdf(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], dc_sign, 2);
586 : }
587 :
588 : //TODO: CHKN for 128x128 where we need more than one TXb, we need to update the txb_context(dc_sign+skip_ctx) in a Txb basis.
589 :
590 9342 : return 0;
591 : }
592 :
593 43875700 : cost += av1_cost_coeffs_txb_loop_cost_eob(eob, scan, qcoeff,
594 : coeff_contexts, coeff_costs, dc_sign_ctx, levels, bwl, transform_type);
595 :
596 43835200 : return cost;
597 : }
598 : #if FILTER_INTRA_FLAG
599 : int av1_filter_intra_allowed_bsize(uint8_t enable_filter_intra, BlockSize bs);
600 : #if PAL_SUP
601 : int av1_filter_intra_allowed(
602 : uint8_t enable_filter_intra,
603 : BlockSize bsize,
604 : uint8_t palette_size,
605 : uint32_t mode);
606 : #else
607 : int av1_filter_intra_allowed(uint8_t enable_filter_intra, BlockSize bsize, uint32_t mode);
608 : #endif
609 : #endif
610 : /*static*/ void model_rd_from_sse(
611 : BlockSize bsize,
612 : int16_t quantizer,
613 : //const Av1Comp *const cpi,
614 : //const MacroBlockD *const xd,
615 : //BlockSize bsize,
616 : //int32_t plane,
617 : uint64_t sse,
618 : uint32_t *rate,
619 : uint64_t *dist);
620 :
621 241050000 : uint64_t av1_intra_fast_cost(
622 : CodingUnit *cu_ptr,
623 : ModeDecisionCandidate *candidate_ptr,
624 : uint32_t qp,
625 : uint64_t luma_distortion,
626 : uint64_t chroma_distortion,
627 : uint64_t lambda,
628 : EbBool use_ssd,
629 : PictureControlSet *picture_control_set_ptr,
630 : CandidateMv *ref_mv_stack,
631 : const BlockGeom *blk_geom,
632 : uint32_t miRow,
633 : uint32_t miCol,
634 : uint8_t md_pass,
635 : uint32_t left_neighbor_mode,
636 : uint32_t top_neighbor_mode)
637 :
638 : {
639 : UNUSED(qp);
640 : UNUSED(ref_mv_stack);
641 : UNUSED(miRow);
642 : UNUSED(miCol);
643 : UNUSED(left_neighbor_mode);
644 : UNUSED(top_neighbor_mode);
645 : UNUSED(md_pass);
646 :
647 241050000 : FrameHeader *frm_hdr = &picture_control_set_ptr->parent_pcs_ptr->frm_hdr;
648 241050000 : if (av1_allow_intrabc(picture_control_set_ptr->parent_pcs_ptr->av1_cm) && candidate_ptr->use_intrabc) {
649 0 : uint64_t lumaSad = (LUMA_WEIGHT * luma_distortion) << AV1_COST_PRECISION;
650 0 : uint64_t chromaSad = chroma_distortion << AV1_COST_PRECISION;
651 0 : uint64_t totalDistortion = lumaSad + chromaSad;
652 :
653 0 : uint64_t rate = 0;
654 :
655 0 : EbReflist refListIdx = 0;
656 0 : int16_t predRefX = candidate_ptr->motion_vector_pred_x[refListIdx];
657 0 : int16_t predRefY = candidate_ptr->motion_vector_pred_y[refListIdx];
658 0 : int16_t mvRefX = candidate_ptr->motion_vector_xl0;
659 0 : int16_t mvRefY = candidate_ptr->motion_vector_yl0;
660 : MV mv;
661 0 : mv.row = mvRefY;
662 0 : mv.col = mvRefX;
663 : MV ref_mv;
664 0 : ref_mv.row = predRefY;
665 0 : ref_mv.col = predRefX;
666 :
667 0 : int *dvcost[2] = { (int *)&candidate_ptr->md_rate_estimation_ptr->dv_cost[0][MV_MAX],
668 0 : (int *)&candidate_ptr->md_rate_estimation_ptr->dv_cost[1][MV_MAX] };
669 :
670 0 : int32_t mvRate = eb_av1_mv_bit_cost(
671 : &mv,
672 : &ref_mv,
673 0 : candidate_ptr->md_rate_estimation_ptr->dv_joint_cost,
674 : dvcost, MV_COST_WEIGHT_SUB);
675 :
676 0 : rate = mvRate + candidate_ptr->md_rate_estimation_ptr->intrabc_fac_bits[candidate_ptr->use_intrabc];
677 :
678 0 : candidate_ptr->fast_luma_rate = rate;
679 0 : candidate_ptr->fast_chroma_rate = 0;
680 :
681 0 : lumaSad = (LUMA_WEIGHT * luma_distortion) << AV1_COST_PRECISION;
682 0 : chromaSad = chroma_distortion << AV1_COST_PRECISION;
683 0 : totalDistortion = lumaSad + chromaSad;
684 :
685 0 : return(RDCOST(lambda, rate, totalDistortion));
686 : }
687 : else {
688 241021000 : EbBool isMonochromeFlag = EB_FALSE; // NM - isMonochromeFlag is harcoded to false.
689 241021000 : EbBool isCflAllowed = (blk_geom->bwidth <= 32 && blk_geom->bheight <= 32) ? 1 : 0;
690 :
691 241021000 : uint8_t subSamplingX = 1; // NM - subsampling_x is harcoded to 1 for 420 chroma sampling.
692 241021000 : uint8_t subSamplingY = 1; // NM - subsampling_y is harcoded to 1 for 420 chroma sampling.
693 : // In fast loop CFL alphas are not know yet. The chroma mode bits are calculated based on DC Mode, and if CFL is the winner compared to CFL, ChromaBits are updated
694 241021000 : uint32_t chroma_mode = candidate_ptr->intra_chroma_mode == UV_CFL_PRED ? UV_DC_PRED : candidate_ptr->intra_chroma_mode;
695 :
696 : // Number of bits for each synatax element
697 241021000 : uint64_t intraModeBitsNum = 0;
698 241021000 : uint64_t intraLumaModeBitsNum = 0;
699 241021000 : uint64_t intraLumaAngModeBitsNum = 0;
700 : #if FILTER_INTRA_FLAG
701 241021000 : uint64_t intra_filter_mode_bits_num = 0;
702 : #endif
703 241021000 : uint64_t intraChromaModeBitsNum = 0;
704 241021000 : uint64_t intraChromaAngModeBitsNum = 0;
705 241021000 : uint64_t skipModeRate = 0;
706 241021000 : uint8_t skipModeCtx = cu_ptr->skip_flag_context; // NM - Harcoded to 1 until the skip_mode context is added.
707 241021000 : PredictionMode intra_mode = (PredictionMode)candidate_ptr->pred_mode;
708 : // Luma and chroma rate
709 : uint32_t rate;
710 241021000 : uint32_t lumaRate = 0;
711 241021000 : uint32_t chromaRate = 0;
712 : uint64_t lumaSad, chromaSad;
713 :
714 : // Luma and chroma distortion
715 : uint64_t totalDistortion;
716 241021000 : const int32_t AboveCtx = intra_mode_context[top_neighbor_mode];
717 241021000 : const int32_t LeftCtx = intra_mode_context[left_neighbor_mode];
718 241021000 : intraModeBitsNum = picture_control_set_ptr->slice_type != I_SLICE ? (uint64_t)candidate_ptr->md_rate_estimation_ptr->mb_mode_fac_bits[size_group_lookup[blk_geom->bsize]][intra_mode] : ZERO_COST;
719 241021000 : skipModeRate = picture_control_set_ptr->slice_type != I_SLICE ? (uint64_t)candidate_ptr->md_rate_estimation_ptr->skip_mode_fac_bits[skipModeCtx][0] : ZERO_COST;
720 :
721 : // Estimate luma nominal intra mode bits
722 241021000 : intraLumaModeBitsNum = picture_control_set_ptr->slice_type == I_SLICE ? (uint64_t)candidate_ptr->md_rate_estimation_ptr->y_mode_fac_bits[AboveCtx][LeftCtx][intra_mode] : ZERO_COST;
723 : // Estimate luma angular mode bits
724 241021000 : if (blk_geom->bsize >= BLOCK_8X8 && candidate_ptr->is_directional_mode_flag) {
725 217096000 : assert((intra_mode - V_PRED) < 8);
726 217096000 : assert((intra_mode - V_PRED) >= 0);
727 217096000 : intraLumaAngModeBitsNum = candidate_ptr->md_rate_estimation_ptr->angle_delta_fac_bits[intra_mode - V_PRED][MAX_ANGLE_DELTA + candidate_ptr->angle_delta[PLANE_TYPE_Y]];
728 : }
729 : #if PAL_SUP
730 241021000 : if (av1_allow_palette(picture_control_set_ptr->parent_pcs_ptr->frm_hdr.allow_screen_content_tools, blk_geom->bsize) && intra_mode == DC_PRED) {
731 0 : const int use_palette = candidate_ptr->palette_info.pmi.palette_size[0] > 0;
732 0 : const int bsize_ctx = av1_get_palette_bsize_ctx(blk_geom->bsize);
733 0 : const int mode_ctx = av1_get_palette_mode_ctx(cu_ptr->av1xd);
734 0 : intraLumaModeBitsNum += candidate_ptr->md_rate_estimation_ptr->palette_ymode_fac_bits[bsize_ctx][mode_ctx][use_palette];
735 0 : if (use_palette) {
736 0 : const uint8_t *const color_map = candidate_ptr->palette_info.color_idx_map;
737 : int block_width, block_height, rows, cols;
738 0 : av1_get_block_dimensions(blk_geom->bsize, 0, cu_ptr->av1xd, &block_width, &block_height, &rows,
739 : &cols);
740 0 : const int plt_size = candidate_ptr->palette_info.pmi.palette_size[0];
741 0 : int palette_mode_cost =
742 0 : candidate_ptr->md_rate_estimation_ptr->palette_ysize_fac_bits[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
743 0 : write_uniform_cost(plt_size, color_map[0]);
744 : uint16_t color_cache[2 * PALETTE_MAX_SIZE];
745 0 : const int n_cache = eb_get_palette_cache(cu_ptr->av1xd, 0, color_cache);
746 0 : palette_mode_cost +=
747 0 : av1_palette_color_cost_y(&candidate_ptr->palette_info.pmi, color_cache,
748 : n_cache, EB_8BIT);
749 0 : palette_mode_cost +=
750 0 : av1_cost_color_map(&candidate_ptr->palette_info, candidate_ptr->md_rate_estimation_ptr, cu_ptr, 0, blk_geom->bsize, PALETTE_MAP);
751 0 : intraLumaModeBitsNum += palette_mode_cost;
752 : }
753 : }
754 : #endif
755 : #if FILTER_INTRA_FLAG
756 : #if PAL_SUP
757 241364000 : if (av1_filter_intra_allowed(picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.enable_filter_intra, blk_geom->bsize, candidate_ptr->palette_info.pmi.palette_size[0], intra_mode)) {
758 : #else
759 : if (av1_filter_intra_allowed(picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.enable_filter_intra, blk_geom->bsize, intra_mode)) {
760 : #endif
761 4632700 : intra_filter_mode_bits_num = candidate_ptr->md_rate_estimation_ptr->filter_intra_fac_bits[blk_geom->bsize][candidate_ptr->filter_intra_mode != FILTER_INTRA_MODES];
762 4632700 : if (candidate_ptr->filter_intra_mode != FILTER_INTRA_MODES) {
763 1048440 : intra_filter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->filter_intra_mode_fac_bits[candidate_ptr->filter_intra_mode];
764 : }
765 : }
766 : #endif
767 :
768 241338000 : if (blk_geom->has_uv) {
769 239762000 : if (!isMonochromeFlag && is_chroma_reference(miRow, miCol, blk_geom->bsize, subSamplingX, subSamplingY)) {
770 : // Estimate luma nominal intra mode bits
771 239416000 : intraChromaModeBitsNum = (uint64_t)candidate_ptr->md_rate_estimation_ptr->intra_uv_mode_fac_bits[isCflAllowed][intra_mode][chroma_mode];
772 : // Estimate luma angular mode bits
773 239416000 : if (blk_geom->bsize >= BLOCK_8X8 && candidate_ptr->is_directional_chroma_mode_flag) {
774 210981000 : intraChromaAngModeBitsNum = candidate_ptr->md_rate_estimation_ptr->angle_delta_fac_bits[chroma_mode - V_PRED][MAX_ANGLE_DELTA + candidate_ptr->angle_delta[PLANE_TYPE_UV]];
775 : }
776 : #if PAL_SUP
777 239416000 : if (av1_allow_palette(picture_control_set_ptr->parent_pcs_ptr->frm_hdr.allow_screen_content_tools, blk_geom->bsize) && chroma_mode == UV_DC_PRED) {
778 0 : const PaletteModeInfo *pmi = &candidate_ptr->palette_info.pmi;
779 0 : const int use_palette = pmi->palette_size[1] > 0;
780 0 : intraChromaAngModeBitsNum +=
781 0 : candidate_ptr->md_rate_estimation_ptr->palette_uv_mode_fac_bits[pmi->palette_size[0] > 0][use_palette];
782 : }
783 : #endif
784 : }
785 : }
786 :
787 241202000 : uint32_t isInterRate = picture_control_set_ptr->slice_type != I_SLICE ? candidate_ptr->md_rate_estimation_ptr->intra_inter_fac_bits[cu_ptr->is_inter_ctx][0] : 0;
788 : #if FILTER_INTRA_FLAG
789 241202000 : lumaRate = (uint32_t)(intraModeBitsNum + skipModeRate + intraLumaModeBitsNum + intraLumaAngModeBitsNum + isInterRate + intra_filter_mode_bits_num);
790 : #else
791 : lumaRate = (uint32_t)(intraModeBitsNum + skipModeRate + intraLumaModeBitsNum + intraLumaAngModeBitsNum + isInterRate);
792 : #endif
793 241202000 : if (av1_allow_intrabc(picture_control_set_ptr->parent_pcs_ptr->av1_cm))
794 0 : lumaRate += candidate_ptr->md_rate_estimation_ptr->intrabc_fac_bits[candidate_ptr->use_intrabc];
795 :
796 240986000 : chromaRate = (uint32_t)(intraChromaModeBitsNum + intraChromaAngModeBitsNum);
797 :
798 : // Keep the Fast Luma and Chroma rate for future use
799 240986000 : candidate_ptr->fast_luma_rate = lumaRate;
800 240986000 : candidate_ptr->fast_chroma_rate = chromaRate;
801 240986000 : if (use_ssd) {
802 0 : int32_t current_q_index = frm_hdr->quantization_params.base_q_idx;
803 0 : Dequants *const dequants = &picture_control_set_ptr->parent_pcs_ptr->deq;
804 :
805 0 : int16_t quantizer = dequants->y_dequant_Q3[current_q_index][1];
806 0 : rate = 0;
807 0 : model_rd_from_sse(
808 0 : blk_geom->bsize,
809 : quantizer,
810 : luma_distortion,
811 : &rate,
812 : &lumaSad);
813 0 : lumaRate += rate;
814 0 : totalDistortion = lumaSad;
815 :
816 0 : rate = 0;
817 0 : model_rd_from_sse(
818 0 : blk_geom->bsize_uv,
819 : quantizer,
820 : chroma_distortion,
821 : &chromaRate,
822 : &chromaSad);
823 0 : chromaRate += rate;
824 0 : totalDistortion += chromaSad;
825 :
826 0 : rate = lumaRate + chromaRate;
827 :
828 0 : return(RDCOST(lambda, rate, totalDistortion));
829 : }
830 : else {
831 240986000 : lumaSad = (LUMA_WEIGHT * luma_distortion) << AV1_COST_PRECISION;
832 240986000 : chromaSad = chroma_distortion << AV1_COST_PRECISION;
833 240986000 : totalDistortion = lumaSad + chromaSad;
834 :
835 240986000 : rate = lumaRate + chromaRate;
836 :
837 : // Assign fast cost
838 240986000 : return(RDCOST(lambda, rate, totalDistortion));
839 : }
840 : }
841 : }
842 :
843 : //extern INLINE int32_t have_newmv_in_inter_mode(PredictionMode mode);
844 106130000 : static INLINE int32_t have_newmv_in_inter_mode(PredictionMode mode) {
845 98157700 : return (mode == NEWMV || mode == NEW_NEWMV || mode == NEAREST_NEWMV ||
846 204288000 : mode == NEW_NEARESTMV || mode == NEAR_NEWMV || mode == NEW_NEARMV);
847 : }
848 :
849 : extern void av1_set_ref_frame(MvReferenceFrame *rf,
850 : int8_t ref_frame_type);
851 :
852 173456000 : static INLINE int has_second_ref(const MbModeInfo *mbmi) {
853 173456000 : return mbmi->block_mi.ref_frame[1] > INTRA_FRAME;
854 : }
855 :
856 71921200 : static INLINE int has_uni_comp_refs(const MbModeInfo *mbmi) {
857 143841000 : return has_second_ref(mbmi) && (!((mbmi->block_mi.ref_frame[0] >= BWDREF_FRAME) ^
858 71919700 : (mbmi->block_mi.ref_frame[1] >= BWDREF_FRAME)));
859 : }
860 :
861 : // This function encodes the reference frame
862 106443000 : uint64_t EstimateRefFramesNumBits(
863 : PictureControlSet *picture_control_set_ptr,
864 : ModeDecisionCandidate *candidate_ptr,
865 : CodingUnit *cu_ptr,
866 : uint32_t bwidth,
867 : uint32_t bheight,
868 : uint8_t ref_frame_type,
869 : uint8_t md_pass,
870 : EbBool is_compound)
871 : {
872 :
873 106443000 : FrameHeader *frm_hdr = &picture_control_set_ptr->parent_pcs_ptr->frm_hdr;
874 106443000 : uint64_t refRateBits = 0;
875 :
876 106443000 : if (md_pass == 1) {
877 102656000 : uint64_t refRateA = 0;
878 102656000 : uint64_t refRateB = 0;
879 102656000 : uint64_t refRateC = 0;
880 102656000 : uint64_t refRateD = 0;
881 102656000 : uint64_t refRateE = 0;
882 102656000 : uint64_t refRateF = 0;
883 102656000 : uint64_t refRateG = 0;
884 102656000 : uint64_t refRateH = 0;
885 102656000 : uint64_t refRateI = 0;
886 102656000 : uint64_t refRateJ = 0;
887 102656000 : uint64_t refRateK = 0;
888 102656000 : uint64_t refRateL = 0;
889 102656000 : uint64_t refRateM = 0;
890 102656000 : uint64_t refRateN = 0;
891 102656000 : uint64_t refRateO = 0;
892 102656000 : uint64_t refRateP = 0;
893 : // const MbModeInfo *const mbmi = &cu_ptr->av1xd->mi[0]->mbmi;
894 102656000 : MbModeInfo *const mbmi = &cu_ptr->av1xd->mi[0]->mbmi;
895 : MvReferenceFrame refType[2];
896 102656000 : av1_set_ref_frame(refType, ref_frame_type);
897 102683000 : mbmi->block_mi.ref_frame[0] = refType[0];
898 102683000 : mbmi->block_mi.ref_frame[1] = refType[1];
899 : //const int is_compound = has_second_ref(mbmi);
900 : {
901 : // does the feature use compound prediction or not
902 : // (if not specified at the frame/segment level)
903 102683000 : if (frm_hdr->reference_mode == REFERENCE_MODE_SELECT) {
904 101555000 : if (MIN(bwidth, bheight) >= 8) {
905 : //aom_write_symbol(w, is_compound, av1_get_reference_mode_cdf(cu_ptr->av1xd), 2);
906 96824400 : int32_t context = av1_get_reference_mode_context_new(cu_ptr->av1xd);
907 96684300 : refRateA = candidate_ptr->md_rate_estimation_ptr->comp_inter_fac_bits[context][is_compound];
908 : }
909 : }
910 : else {
911 1128420 : assert((!is_compound) ==
912 : (frm_hdr->reference_mode == SINGLE_REFERENCE));
913 : }
914 :
915 102543000 : if (is_compound) {
916 71940200 : const CompReferenceType comp_ref_type = has_uni_comp_refs(mbmi)
917 : ? UNIDIR_COMP_REFERENCE
918 71929600 : : BIDIR_COMP_REFERENCE;
919 :
920 71929600 : const int pred_context = av1_get_comp_reference_type_context_new(cu_ptr->av1xd);
921 71931400 : refRateB = candidate_ptr->md_rate_estimation_ptr->comp_ref_type_fac_bits[pred_context][comp_ref_type];
922 : /*aom_write_symbol(w, comp_ref_type, av1_get_comp_reference_type_cdf(cu_ptr->av1xd),
923 : 2);*/
924 :
925 71931400 : if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
926 : //printf("ERROR[AN]: UNIDIR_COMP_REFERENCE not supported\n");
927 15853800 : const int bit = mbmi->block_mi.ref_frame[0] == BWDREF_FRAME;
928 :
929 15853800 : const int pred_context = eb_av1_get_pred_context_uni_comp_ref_p(cu_ptr->av1xd);
930 15854100 : refRateC = candidate_ptr->md_rate_estimation_ptr->uni_comp_ref_fac_bits[pred_context][0][bit];
931 : //cu_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][0];
932 : //WRITE_REF_BIT(bit, uni_comp_ref_p);
933 :
934 15854100 : if (!bit) {
935 13946500 : assert(mbmi->block_mi.ref_frame[0] == LAST_FRAME);
936 22326100 : const int bit1 = mbmi->block_mi.ref_frame[1] == LAST3_FRAME ||
937 8379630 : mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME;
938 13946500 : const int pred_context = eb_av1_get_pred_context_uni_comp_ref_p1(cu_ptr->av1xd);
939 13946800 : refRateD = candidate_ptr->md_rate_estimation_ptr->uni_comp_ref_fac_bits[pred_context][1][bit1];
940 : //refRateD = cu_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][1];
941 : //WRITE_REF_BIT(bit1, uni_comp_ref_p1);
942 13946800 : if (bit1) {
943 8167340 : const int bit2 = mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME;
944 8167340 : const int pred_context = eb_av1_get_pred_context_uni_comp_ref_p2(cu_ptr->av1xd);
945 8167660 : refRateE = candidate_ptr->md_rate_estimation_ptr->uni_comp_ref_fac_bits[pred_context][2][bit2];
946 :
947 : // refRateE = cu_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][2];
948 : //WRITE_REF_BIT(bit2, uni_comp_ref_p2);
949 : }
950 : }
951 : //else {
952 : // assert(mbmi->block_mi.ref_frame[1] == ALTREF_FRAME);
953 : //}
954 15854700 : refRateBits = refRateA + refRateB + refRateC + refRateD + refRateE + refRateF + refRateG + refRateH + refRateI + refRateJ + refRateK + refRateL + refRateM;
955 15854700 : return refRateBits;
956 : //return;
957 : }
958 :
959 56077600 : assert(comp_ref_type == BIDIR_COMP_REFERENCE);
960 :
961 106458000 : const int bit = (mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME ||
962 50380600 : mbmi->block_mi.ref_frame[0] == LAST3_FRAME);
963 56077600 : const int pred_ctx = eb_av1_get_pred_context_comp_ref_p(cu_ptr->av1xd);
964 56060000 : refRateF = candidate_ptr->md_rate_estimation_ptr->comp_ref_fac_bits[pred_ctx][0][bit];
965 : //refRateF = cu_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_ctx][0];
966 : //WRITE_REF_BIT(bit, comp_ref_p);
967 :
968 56060000 : if (!bit) {
969 38210500 : const int bit1 = mbmi->block_mi.ref_frame[0] == LAST2_FRAME;
970 38210500 : const int pred_context = eb_av1_get_pred_context_comp_ref_p1(cu_ptr->av1xd);
971 38206300 : refRateG = candidate_ptr->md_rate_estimation_ptr->comp_ref_fac_bits[pred_context][1][bit1];
972 : //refRateG = cu_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_context][1];
973 : //WRITE_REF_BIT(bit1, comp_ref_p1);
974 : }
975 : else {
976 17849500 : const int bit2 = mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME;
977 17849500 : const int pred_context = eb_av1_get_pred_context_comp_ref_p2(cu_ptr->av1xd);
978 17935400 : refRateH = candidate_ptr->md_rate_estimation_ptr->comp_ref_fac_bits[pred_context][2][bit2];
979 : //refRateH = cu_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_context][2];
980 : //WRITE_REF_BIT(bit2, comp_ref_p2);
981 : }
982 :
983 56141700 : const int bit_bwd = mbmi->block_mi.ref_frame[1] == ALTREF_FRAME;
984 56141700 : const int pred_ctx_2 = eb_av1_get_pred_context_comp_bwdref_p(cu_ptr->av1xd);
985 56045300 : refRateI = candidate_ptr->md_rate_estimation_ptr->comp_bwd_ref_fac_bits[pred_ctx_2][0][bit_bwd];
986 : //refRateI = cu_ptr->av1xd->tile_ctx->comp_bwdref_cdf[pred_ctx_2][0];
987 : //WRITE_REF_BIT(bit_bwd, comp_bwdref_p);
988 :
989 56045300 : if (!bit_bwd) {
990 50368400 : const int pred_context = eb_av1_get_pred_context_comp_bwdref_p1(cu_ptr->av1xd);
991 50380600 : refRateJ = candidate_ptr->md_rate_estimation_ptr->comp_bwd_ref_fac_bits[pred_context][1][refType[1] == ALTREF2_FRAME];
992 : //refRateJ = cu_ptr->av1xd->tile_ctx->comp_bwdref_cdf[pred_context][1];
993 : //WRITE_REF_BIT(mbmi->block_mi.ref_frame[1] == ALTREF2_FRAME, comp_bwdref_p1);
994 : }
995 : }
996 : else {
997 61384800 : const int bit0 = (mbmi->block_mi.ref_frame[0] <= ALTREF_FRAME &&
998 30781700 : mbmi->block_mi.ref_frame[0] >= BWDREF_FRAME);
999 30603200 : refRateK = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[eb_av1_get_pred_context_single_ref_p1(cu_ptr->av1xd)][0][bit0];
1000 : //refRateK = cu_ptr->av1xd->tile_ctx->single_ref_cdf[eb_av1_get_pred_context_single_ref_p1(cu_ptr->av1xd)][0];
1001 : //WRITE_REF_BIT(bit0, single_ref_p1);
1002 :
1003 30780400 : if (bit0) {
1004 11650800 : const int bit1 = mbmi->block_mi.ref_frame[0] == ALTREF_FRAME;
1005 11650800 : refRateL = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[eb_av1_get_pred_context_single_ref_p2(cu_ptr->av1xd)][1][bit1];
1006 : //refRateL = cu_ptr->av1xd->tile_ctx->single_ref_cdf[eb_av1_get_pred_context_single_ref_p2(cu_ptr->av1xd)][1];
1007 : //WRITE_REF_BIT(bit1, single_ref_p2);
1008 11675700 : if (!bit1) {
1009 10611000 : refRateM = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[eb_av1_get_pred_context_single_ref_p6(cu_ptr->av1xd)][5][ref_frame_type == ALTREF2_FRAME];
1010 : //refRateM = cu_ptr->av1xd->tile_ctx->single_ref_cdf[eb_av1_get_pred_context_single_ref_p6(cu_ptr->av1xd)][5];
1011 : //WRITE_REF_BIT(mbmi->block_mi.ref_frame[0] == ALTREF2_FRAME, single_ref_p6);
1012 : }
1013 : }
1014 : else {
1015 34371200 : const int bit2 = (mbmi->block_mi.ref_frame[0] == LAST3_FRAME ||
1016 15241500 : mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME);
1017 19129600 : refRateN = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[eb_av1_get_pred_context_single_ref_p3(cu_ptr->av1xd)][2][bit2];
1018 : //refRateN = cu_ptr->av1xd->tile_ctx->single_ref_cdf[eb_av1_get_pred_context_single_ref_p3(cu_ptr->av1xd)][2];
1019 : //WRITE_REF_BIT(bit2, single_ref_p3);
1020 19121500 : if (!bit2) {
1021 12968000 : const int bit3 = mbmi->block_mi.ref_frame[0] != LAST_FRAME;
1022 12968000 : refRateO = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[eb_av1_get_pred_context_single_ref_p4(cu_ptr->av1xd)][3][bit3];
1023 : //refRateO = cu_ptr->av1xd->tile_ctx->single_ref_cdf[eb_av1_get_pred_context_single_ref_p4(cu_ptr->av1xd)][3];
1024 : //WRITE_REF_BIT(bit3, single_ref_p4);
1025 : }
1026 : else {
1027 6153560 : const int bit4 = mbmi->block_mi.ref_frame[0] != LAST3_FRAME;
1028 6153560 : refRateP = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[eb_av1_get_pred_context_single_ref_p5(cu_ptr->av1xd)][4][bit4];
1029 : //refRateP = cu_ptr->av1xd->tile_ctx->single_ref_cdf[eb_av1_get_pred_context_single_ref_p5(cu_ptr->av1xd)][4];
1030 : //WRITE_REF_BIT(bit4, single_ref_p5);
1031 : }
1032 : }
1033 : }
1034 : }
1035 86605600 : refRateBits = refRateA + refRateB + refRateC + refRateD + refRateE + refRateF + refRateG + refRateH + refRateI +
1036 86605600 : refRateJ + refRateK + refRateL + refRateM + refRateN + refRateO + refRateP;
1037 : }
1038 : else {
1039 3787850 : uint64_t refRateA = 0;
1040 3787850 : uint64_t refRateB = 0;
1041 3787850 : uint64_t refRateC = 0;
1042 3787850 : uint64_t refRateD = 0;
1043 3787850 : uint64_t refRateE = 0;
1044 3787850 : uint64_t refRateF = 0;
1045 3787850 : uint64_t refRateG = 0;
1046 3787850 : uint64_t refRateH = 0;
1047 3787850 : uint64_t refRateI = 0;
1048 3787850 : uint64_t refRateJ = 0;
1049 3787850 : uint64_t refRateK = 0;
1050 3787850 : uint64_t refRateL = 0;
1051 3787850 : uint64_t refRateM = 0;
1052 :
1053 : // If segment level coding of this signal is disabled...
1054 : // or the segment allows multiple reference frame options
1055 : /*if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
1056 : assert(!is_compound);
1057 : assert(mbmi->block_mi.ref_frame[0] ==
1058 : get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
1059 : }
1060 : else if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP) ||
1061 : segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) {
1062 : assert(!is_compound);
1063 : assert(mbmi->block_mi.ref_frame[0] == LAST_FRAME);
1064 : }
1065 : else*/ {
1066 : // does the feature use compound prediction or not
1067 : // (if not specified at the frame/segment level)
1068 3787850 : if (frm_hdr->reference_mode == REFERENCE_MODE_SELECT) {
1069 3622570 : if (MIN(bwidth, bheight) >= 8) {
1070 1639040 : int32_t context = 0;
1071 1639040 : context = cu_ptr->reference_mode_context;
1072 1639040 : assert(context >= 0 && context < 5);
1073 1639070 : refRateA = candidate_ptr->md_rate_estimation_ptr->comp_inter_fac_bits[context][is_compound];
1074 : }
1075 : }
1076 : else
1077 165285 : assert((!is_compound) == (frm_hdr->reference_mode == SINGLE_REFERENCE));
1078 3787870 : int32_t context = 0;
1079 3787870 : if (is_compound) {
1080 191394 : const CompReferenceType comp_ref_type = /*has_uni_comp_refs(mbmi)
1081 : ? UNIDIR_COMP_REFERENCE
1082 : : */BIDIR_COMP_REFERENCE;
1083 : MvReferenceFrame refType[2];
1084 191394 : av1_set_ref_frame(refType, ref_frame_type);
1085 :
1086 191277 : context = cu_ptr->compoud_reference_type_context;
1087 191277 : assert(context >= 0 && context < 5);
1088 191275 : refRateB = candidate_ptr->md_rate_estimation_ptr->comp_ref_type_fac_bits[context][comp_ref_type];
1089 :
1090 191275 : if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
1091 0 : printf("ERROR[AN]: UNIDIR_COMP_REFERENCE not supported\n");
1092 : //const int32_t bit = mbmi->block_mi.ref_frame[0] == BWDREF_FRAME;
1093 : //WRITE_REF_BIT(bit, uni_comp_ref_p);
1094 :
1095 : //if (!bit) {
1096 : // assert(mbmi->block_mi.ref_frame[0] == LAST_FRAME);
1097 : // const int32_t bit1 = mbmi->block_mi.ref_frame[1] == LAST3_FRAME ||
1098 : // mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME;
1099 : // WRITE_REF_BIT(bit1, uni_comp_ref_p1);
1100 : // if (bit1) {
1101 : // const int32_t bit2 = mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME;
1102 : // WRITE_REF_BIT(bit2, uni_comp_ref_p2);
1103 : // }
1104 : //}
1105 : //else {
1106 : // assert(mbmi->block_mi.ref_frame[1] == ALTREF_FRAME);
1107 : //}
1108 :
1109 : //return;
1110 : }
1111 :
1112 191273 : assert(comp_ref_type == BIDIR_COMP_REFERENCE);
1113 :
1114 382538 : const int32_t bit = (refType[0] == GOLDEN_FRAME ||
1115 191265 : refType[0] == LAST3_FRAME);
1116 :
1117 191273 : context = eb_av1_get_pred_context_comp_ref_p(cu_ptr->av1xd);
1118 191177 : assert(context >= 0 && context < 3);
1119 191179 : refRateC = candidate_ptr->md_rate_estimation_ptr->comp_ref_fac_bits[context][0][bit];
1120 : // WRITE_REF_BIT(bit, comp_ref_p);
1121 :
1122 191179 : if (!bit) {
1123 191188 : const int32_t bit1 = (refType[0] == LAST2_FRAME);
1124 191188 : context = eb_av1_get_pred_context_comp_ref_p1(cu_ptr->av1xd);
1125 : /*aom_write_symbol(ec_writer, bit1, frameContext->comp_ref_cdf[context][1],
1126 : 2);*/
1127 191082 : assert(context >= 0 && context < 3);
1128 191083 : refRateD = candidate_ptr->md_rate_estimation_ptr->comp_ref_fac_bits[context][1][bit1];
1129 :
1130 : //WRITE_REF_BIT(bit1, comp_ref_p1);
1131 : }
1132 : else {
1133 0 : const int32_t bit2 = (refType[0] == GOLDEN_FRAME);
1134 0 : context = eb_av1_get_pred_context_comp_ref_p2(cu_ptr->av1xd);
1135 : /*aom_write_symbol(ec_writer, bit2, frameContext->comp_ref_cdf[context][2],
1136 : 2);*/
1137 0 : assert(context >= 0 && context < 3);
1138 0 : refRateE = candidate_ptr->md_rate_estimation_ptr->comp_ref_fac_bits[context][2][bit2];
1139 :
1140 : //WRITE_REF_BIT(bit2, comp_ref_p2);
1141 : }
1142 :
1143 191083 : const int32_t bit_bwd = (refType[1] == ALTREF_FRAME);
1144 191083 : context = eb_av1_get_pred_context_comp_bwdref_p(cu_ptr->av1xd);
1145 : /*aom_write_symbol(ec_writer, bit_bwd, frameContext->comp_bwdref_cdf[context][0],
1146 : 2);*/
1147 191051 : assert(context >= 0 && context < 3);
1148 191054 : refRateF = candidate_ptr->md_rate_estimation_ptr->comp_bwd_ref_fac_bits[context][0][bit_bwd];
1149 : //WRITE_REF_BIT(bit_bwd, comp_bwdref_p);
1150 :
1151 191054 : if (!bit_bwd) {
1152 191065 : context = eb_av1_get_pred_context_comp_bwdref_p1(cu_ptr->av1xd);
1153 : /*aom_write_symbol(ec_writer, refType[1] == ALTREF2_FRAME, frameContext->comp_bwdref_cdf[context][1],
1154 : 2);*/
1155 191077 : assert(context >= 0 && context < 3);
1156 191080 : refRateG = candidate_ptr->md_rate_estimation_ptr->comp_bwd_ref_fac_bits[context][1][refType[1] == ALTREF2_FRAME];
1157 : //WRITE_REF_BIT(mbmi->block_mi.ref_frame[1] == ALTREF2_FRAME, comp_bwdref_p1);
1158 : }
1159 : }
1160 : else {
1161 3596480 : const int32_t bit0 = (ref_frame_type <= ALTREF_FRAME &&
1162 : ref_frame_type >= BWDREF_FRAME);//0
1163 :
1164 3596480 : context = eb_av1_get_pred_context_single_ref_p1(cu_ptr->av1xd);
1165 : /*aom_write_symbol(ec_writer, bit0, frameContext->single_ref_cdf[context][0],
1166 : 2);*/
1167 3628660 : assert(context >= 0 && context < 3);
1168 3628670 : refRateH = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[context][0][bit0];
1169 : //WRITE_REF_BIT(bit0, single_ref_p1);
1170 :
1171 3628670 : if (bit0) {
1172 0 : const int32_t bit1 = (ref_frame_type == ALTREF_FRAME);
1173 0 : context = eb_av1_get_pred_context_single_ref_p2(cu_ptr->av1xd);
1174 0 : assert(context >= 0 && context < 3);
1175 : /*aom_write_symbol(ec_writer, bit1, frameContext->single_ref_cdf[context][1],
1176 : 2);*/
1177 0 : refRateI = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[context][1][bit1];
1178 : //WRITE_REF_BIT(bit1, single_ref_p2);
1179 :
1180 0 : if (!bit1) {
1181 0 : context = eb_av1_get_pred_context_single_ref_p6(cu_ptr->av1xd);
1182 : /*aom_write_symbol(ec_writer, cu_ptr->prediction_unit_array[0].ref_frame_type == ALTREF2_FRAME, frameContext->single_ref_cdf[context][5],
1183 : 2);*/
1184 0 : assert(context >= 0 && context < 3);
1185 0 : refRateJ = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[context][5][ref_frame_type == ALTREF2_FRAME];
1186 : //WRITE_REF_BIT(mbmi->block_mi.ref_frame[0] == ALTREF2_FRAME, single_ref_p6);
1187 : }
1188 : }
1189 : else {
1190 3628670 : const int32_t bit2 = (ref_frame_type == LAST3_FRAME ||
1191 : ref_frame_type == GOLDEN_FRAME); //0
1192 3628670 : context = eb_av1_get_pred_context_single_ref_p3(cu_ptr->av1xd);
1193 : /*aom_write_symbol(ec_writer, bit2, frameContext->single_ref_cdf[context][2],
1194 : 2);*/
1195 3628350 : assert(context >= 0 && context < 3);
1196 3628360 : refRateK = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[context][2][bit2];
1197 : //WRITE_REF_BIT(bit2, single_ref_p3);
1198 :
1199 3628360 : if (!bit2) {
1200 3101030 : const int32_t bit3 = (ref_frame_type != LAST_FRAME); //0;
1201 3101030 : context = eb_av1_get_pred_context_single_ref_p4(cu_ptr->av1xd);
1202 3100990 : assert(context >= 0 && context < 3);
1203 : /*aom_write_symbol(ec_writer, bit3, frameContext->single_ref_cdf[context][3],
1204 : 2);*/
1205 3100990 : refRateL = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[context][3][bit3];
1206 : //WRITE_REF_BIT(bit3, single_ref_p4);
1207 : }
1208 : else {
1209 527333 : const int32_t bit4 = (ref_frame_type != LAST3_FRAME);
1210 527333 : context = eb_av1_get_pred_context_single_ref_p5(cu_ptr->av1xd);
1211 : /*aom_write_symbol(ec_writer, bit4, frameContext->single_ref_cdf[context][4],
1212 : 2);*/
1213 527892 : assert(context >= 0 && context < 3);
1214 527892 : refRateM = candidate_ptr->md_rate_estimation_ptr->single_ref_fac_bits[context][4][bit4];
1215 : //WRITE_REF_BIT(bit4, single_ref_p5);
1216 : }
1217 : }
1218 : }
1219 : }
1220 :
1221 3819950 : refRateBits = refRateA + refRateB + refRateC + refRateD + refRateE + refRateF + refRateG + refRateH + refRateI + refRateJ + refRateK + refRateL + refRateM;
1222 :
1223 : }
1224 90425500 : return refRateBits;
1225 : }
1226 : //extern INLINE int16_t Av1ModeContextAnalyzer(const int16_t *const mode_context, const MvReferenceFrame *const rf);
1227 :
1228 : extern int8_t av1_ref_frame_type(const MvReferenceFrame *const rf);
1229 : uint16_t compound_mode_ctx_map_2[3][COMP_NEWMV_CTXS] = {
1230 : { 0, 1, 1, 1, 1 },
1231 : { 1, 2, 3, 4, 4 },
1232 : { 4, 4, 5, 6, 7 },
1233 : };
1234 106499000 : static INLINE int16_t Av1ModeContextAnalyzer(
1235 : const int16_t *const mode_context, const MvReferenceFrame *const rf) {
1236 106499000 : const int8_t ref_frame = av1_ref_frame_type(rf);
1237 :
1238 106411000 : if (rf[1] <= INTRA_FRAME) return mode_context[ref_frame];
1239 :
1240 71995700 : const int16_t newmv_ctx = mode_context[ref_frame] & NEWMV_CTX_MASK;
1241 71995700 : const int16_t refmv_ctx =
1242 71995700 : (mode_context[ref_frame] >> REFMV_OFFSET) & REFMV_CTX_MASK;
1243 71995700 : assert((refmv_ctx >> 1) < 3);
1244 71995700 : const int16_t comp_ctx = compound_mode_ctx_map_2[refmv_ctx >> 1][AOMMIN(
1245 : newmv_ctx, COMP_NEWMV_CTXS - 1)];
1246 71995700 : return comp_ctx;
1247 : }
1248 :
1249 : int get_comp_index_context_enc(
1250 : PictureParentControlSet *pcs_ptr,
1251 : int cur_frame_index,
1252 : int bck_frame_index,
1253 : int fwd_frame_index,
1254 : const MacroBlockD *xd);
1255 : int get_comp_group_idx_context_enc(const MacroBlockD *xd);
1256 : int is_any_masked_compound_used(BlockSize sb_type);
1257 102642000 : uint32_t get_compound_mode_rate(
1258 : uint8_t md_pass,
1259 : ModeDecisionCandidate *candidate_ptr,
1260 : CodingUnit *cu_ptr,
1261 : uint8_t ref_frame_type,
1262 : BlockSize bsize,
1263 : SequenceControlSet *sequence_control_set_ptr,
1264 : PictureControlSet *picture_control_set_ptr
1265 : )
1266 : {
1267 102642000 : uint32_t comp_rate = 0;
1268 102642000 : if (md_pass == 0)
1269 202938 : return 0;
1270 :
1271 102439000 : MbModeInfo *const mbmi = &cu_ptr->av1xd->mi[0]->mbmi;
1272 : MvReferenceFrame rf[2];
1273 102439000 : av1_set_ref_frame(rf, ref_frame_type);
1274 102412000 : mbmi->block_mi.ref_frame[0] = rf[0];
1275 102412000 : mbmi->block_mi.ref_frame[1] = rf[1];
1276 :
1277 : //NOTE : Make sure, any cuPtr data is already set before usage
1278 :
1279 102412000 : if (has_second_ref(mbmi)) {
1280 :
1281 143646000 : const int masked_compound_used = is_any_masked_compound_used(bsize) &&
1282 71769300 : sequence_control_set_ptr->seq_header.enable_masked_compound;
1283 :
1284 71767400 : if (masked_compound_used) {
1285 71592300 : const int ctx_comp_group_idx = get_comp_group_idx_context_enc(cu_ptr->av1xd);
1286 71598400 : comp_rate = candidate_ptr->md_rate_estimation_ptr->comp_group_idx_fac_bits[ctx_comp_group_idx][candidate_ptr->comp_group_idx];
1287 : }
1288 : else {
1289 175043 : assert(candidate_ptr->comp_group_idx == 0);
1290 : }
1291 :
1292 71773500 : if (candidate_ptr->comp_group_idx == 0) {
1293 40600900 : if (candidate_ptr->compound_idx)
1294 20418500 : assert(candidate_ptr->interinter_comp.type == COMPOUND_AVERAGE);
1295 :
1296 40600900 : if (sequence_control_set_ptr->seq_header.order_hint_info.enable_jnt_comp) {
1297 40421100 : const int comp_index_ctx = get_comp_index_context_enc(
1298 40421100 : picture_control_set_ptr->parent_pcs_ptr,
1299 40421100 : picture_control_set_ptr->parent_pcs_ptr->cur_order_hint,
1300 40421100 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[0] - 1],
1301 40421100 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[1] - 1],
1302 40421100 : cu_ptr->av1xd);
1303 40418600 : comp_rate += candidate_ptr->md_rate_estimation_ptr->comp_idx_fac_bits[comp_index_ctx][candidate_ptr->compound_idx];
1304 : }
1305 : else {
1306 179787 : assert(candidate_ptr->compound_idx == 1);
1307 : }
1308 : }
1309 : else {
1310 :
1311 31172500 : assert(picture_control_set_ptr->parent_pcs_ptr->frm_hdr.reference_mode != SINGLE_REFERENCE &&
1312 : is_inter_compound_mode(candidate_ptr->pred_mode ));
1313 31353600 : assert(masked_compound_used);
1314 : // compound_diffwtd, wedge
1315 31353600 : assert(candidate_ptr->interinter_comp.type == COMPOUND_WEDGE ||
1316 : candidate_ptr->interinter_comp.type == COMPOUND_DIFFWTD);
1317 :
1318 31353600 : if (is_interinter_compound_used(COMPOUND_WEDGE, bsize))
1319 29081800 : comp_rate += candidate_ptr->md_rate_estimation_ptr->compound_type_fac_bits[bsize][candidate_ptr->interinter_comp.type - COMPOUND_WEDGE];
1320 :
1321 31352400 : if (candidate_ptr->interinter_comp.type == COMPOUND_WEDGE) {
1322 11524400 : assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
1323 11523300 : comp_rate += candidate_ptr->md_rate_estimation_ptr->wedge_idx_fac_bits[bsize][candidate_ptr->interinter_comp.wedge_index];
1324 11523300 : comp_rate += av1_cost_literal(1);
1325 : }
1326 : else {
1327 19827900 : assert(candidate_ptr->interinter_comp.type == COMPOUND_DIFFWTD);
1328 19827900 : comp_rate += av1_cost_literal(1);
1329 : }
1330 : }
1331 : }
1332 :
1333 102407000 : return comp_rate;
1334 : }
1335 : #if II_COMP_FLAG
1336 : int is_interintra_wedge_used(BlockSize sb_type);
1337 : int svt_is_interintra_allowed(
1338 : uint8_t enable_inter_intra,
1339 : BlockSize sb_type,
1340 : PredictionMode mode,
1341 : MvReferenceFrame ref_frame[2]);
1342 : #endif
1343 :
1344 : #if ADD_MDC_FULL_COST
1345 3617500 : uint64_t mdc_av1_inter_fast_cost(
1346 : CodingUnit *cu_ptr,
1347 : ModeDecisionCandidate *candidate_ptr,
1348 : uint64_t luma_distortion,
1349 : uint64_t lambda,
1350 : EbBool use_ssd,
1351 : PictureControlSet *picture_control_set_ptr,
1352 : CandidateMv *ref_mv_stack,
1353 : const BlockGeom *blk_geom)
1354 :
1355 : {
1356 : // Luma rate
1357 3617500 : uint32_t luma_rate = 0;
1358 3617500 : uint32_t chroma_rate = 0;
1359 3617500 : uint64_t mv_rate = 0;
1360 : uint64_t skip_mode_rate;
1361 : // Luma and chroma distortion
1362 : uint64_t luma_sad;
1363 : uint64_t total_distortion;
1364 :
1365 : uint32_t rate;
1366 :
1367 : int16_t pred_ref_x;
1368 : int16_t pred_ref_y;
1369 : int16_t mv_ref_x;
1370 : int16_t mv_ref_y;
1371 :
1372 : EbReflist ref_list_idx;
1373 :
1374 3617500 : candidate_ptr->fast_luma_rate = 0;
1375 :
1376 3617500 : PredictionMode inter_mode = (PredictionMode)candidate_ptr->pred_mode;
1377 :
1378 3617500 : uint64_t inter_mode_bits_num = 0;
1379 :
1380 3617500 : uint8_t skip_mode_ctx = 0;// cu_ptr->skip_flag_context;
1381 : MvReferenceFrame rf[2];
1382 3617500 : av1_set_ref_frame(rf, candidate_ptr->ref_frame_type);
1383 3617420 : const int8_t ref_frame = av1_ref_frame_type(rf);
1384 3617350 : cu_ptr->inter_mode_ctx[ref_frame] = 0;
1385 3617350 : uint32_t mode_ctx = Av1ModeContextAnalyzer(cu_ptr->inter_mode_ctx, rf);
1386 3617090 : skip_mode_rate = candidate_ptr->md_rate_estimation_ptr->skip_mode_fac_bits[skip_mode_ctx][0];
1387 3617090 : uint64_t reference_picture_bits_num = 0;
1388 :
1389 : //Reference Type and Mode Bit estimation
1390 :
1391 3617090 : reference_picture_bits_num = EstimateRefFramesNumBits(
1392 : picture_control_set_ptr,
1393 : candidate_ptr,
1394 : cu_ptr,
1395 3617090 : blk_geom->bwidth,
1396 3617090 : blk_geom->bheight,
1397 3617090 : candidate_ptr->ref_frame_type,
1398 : 0,
1399 3617090 : candidate_ptr->is_compound);
1400 :
1401 3616340 : if (candidate_ptr->is_compound)
1402 0 : inter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->inter_compound_mode_fac_bits[mode_ctx][INTER_COMPOUND_OFFSET(inter_mode)];
1403 : else {
1404 : //uint32_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK;
1405 : //inter_mode_bits_num = candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->new_mv_mode_fac_bits[mode_ctx][0];
1406 :
1407 3616340 : int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK;
1408 : //aom_write_symbol(ec_writer, mode != NEWMV, frameContext->newmv_cdf[newmv_ctx], 2);
1409 3616340 : inter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->new_mv_mode_fac_bits[newmv_ctx][inter_mode != NEWMV];
1410 3616340 : if (inter_mode != NEWMV) {
1411 3616360 : const int16_t zeromvCtx = (mode_ctx >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
1412 : //aom_write_symbol(ec_writer, mode != GLOBALMV, frameContext->zeromv_cdf[zeromvCtx], 2);
1413 3616360 : inter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->zero_mv_mode_fac_bits[zeromvCtx][inter_mode != GLOBALMV];
1414 3616360 : if (inter_mode != GLOBALMV) {
1415 3616340 : int16_t refmvCtx = (mode_ctx >> REFMV_OFFSET) & REFMV_CTX_MASK;
1416 : /*aom_write_symbol(ec_writer, mode != NEARESTMV, frameContext->refmv_cdf[refmv_ctx], 2);*/
1417 3616340 : inter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->ref_mv_mode_fac_bits[refmvCtx][inter_mode != NEARESTMV];
1418 : }
1419 : }
1420 : }
1421 3616340 : if (inter_mode == NEWMV || inter_mode == NEW_NEWMV || have_nearmv_in_inter_mode(inter_mode)) {
1422 : //drLIdex cost estimation
1423 0 : const int32_t new_mv = inter_mode == NEWMV || inter_mode == NEW_NEWMV;
1424 0 : if (new_mv) {
1425 : int32_t idx;
1426 0 : for (idx = 0; idx < 2; ++idx) {
1427 0 : if (cu_ptr->av1xd->ref_mv_count[candidate_ptr->ref_frame_type] > idx + 1) {
1428 : uint8_t drl1Ctx =
1429 0 : av1_drl_ctx(ref_mv_stack, idx);
1430 0 : inter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->drl_mode_fac_bits[drl1Ctx][candidate_ptr->drl_index != idx];
1431 0 : if (candidate_ptr->drl_index == idx) break;
1432 : }
1433 : }
1434 : }
1435 :
1436 0 : if (have_nearmv_in_inter_mode(inter_mode)) {
1437 : int32_t idx;
1438 : // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
1439 0 : for (idx = 1; idx < 3; ++idx) {
1440 0 : if (cu_ptr->av1xd->ref_mv_count[candidate_ptr->ref_frame_type] > idx + 1) {
1441 : uint8_t drl_ctx =
1442 0 : av1_drl_ctx(ref_mv_stack, idx);
1443 0 : inter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->drl_mode_fac_bits[drl_ctx][candidate_ptr->drl_index != (idx - 1)];
1444 :
1445 0 : if (candidate_ptr->drl_index == (idx - 1)) break;
1446 : }
1447 : }
1448 : }
1449 : }
1450 :
1451 3616460 : if (have_newmv_in_inter_mode(inter_mode)) {
1452 0 : if (candidate_ptr->is_compound) {
1453 0 : mv_rate = 0;
1454 :
1455 0 : if (inter_mode == NEW_NEWMV) {
1456 0 : for (ref_list_idx = 0; ref_list_idx < 2; ++ref_list_idx) {
1457 0 : pred_ref_x = candidate_ptr->motion_vector_pred_x[ref_list_idx];
1458 0 : pred_ref_y = candidate_ptr->motion_vector_pred_y[ref_list_idx];
1459 0 : mv_ref_x = ref_list_idx == REF_LIST_1 ? candidate_ptr->motion_vector_xl1 : candidate_ptr->motion_vector_xl0;
1460 0 : mv_ref_y = ref_list_idx == REF_LIST_1 ? candidate_ptr->motion_vector_yl1 : candidate_ptr->motion_vector_yl0;
1461 :
1462 : MV mv;
1463 0 : mv.row = mv_ref_y;
1464 0 : mv.col = mv_ref_x;
1465 :
1466 : MV ref_mv;
1467 0 : ref_mv.row = pred_ref_y;
1468 0 : ref_mv.col = pred_ref_x;
1469 :
1470 0 : mv_rate += eb_av1_mv_bit_cost(
1471 : &mv,
1472 : &ref_mv,
1473 0 : candidate_ptr->md_rate_estimation_ptr->nmv_vec_cost,
1474 0 : candidate_ptr->md_rate_estimation_ptr->nmvcoststack,
1475 : MV_COST_WEIGHT);
1476 : }
1477 : }
1478 0 : else if (inter_mode == NEAREST_NEWMV || inter_mode == NEAR_NEWMV) {
1479 0 : pred_ref_x = candidate_ptr->motion_vector_pred_x[REF_LIST_1];
1480 0 : pred_ref_y = candidate_ptr->motion_vector_pred_y[REF_LIST_1];
1481 0 : mv_ref_x = candidate_ptr->motion_vector_xl1;
1482 0 : mv_ref_y = candidate_ptr->motion_vector_yl1;
1483 :
1484 : MV mv;
1485 0 : mv.row = mv_ref_y;
1486 0 : mv.col = mv_ref_x;
1487 :
1488 : MV ref_mv;
1489 0 : ref_mv.row = pred_ref_y;
1490 0 : ref_mv.col = pred_ref_x;
1491 :
1492 0 : mv_rate += eb_av1_mv_bit_cost(
1493 : &mv,
1494 : &ref_mv,
1495 0 : candidate_ptr->md_rate_estimation_ptr->nmv_vec_cost,
1496 0 : candidate_ptr->md_rate_estimation_ptr->nmvcoststack,
1497 : MV_COST_WEIGHT);
1498 : }
1499 : else {
1500 0 : assert(inter_mode == NEW_NEARESTMV || inter_mode == NEW_NEARMV);
1501 :
1502 0 : pred_ref_x = candidate_ptr->motion_vector_pred_x[REF_LIST_0];
1503 0 : pred_ref_y = candidate_ptr->motion_vector_pred_y[REF_LIST_0];
1504 0 : mv_ref_x = candidate_ptr->motion_vector_xl0;
1505 0 : mv_ref_y = candidate_ptr->motion_vector_yl0;
1506 :
1507 : MV mv;
1508 0 : mv.row = mv_ref_y;
1509 0 : mv.col = mv_ref_x;
1510 :
1511 : MV ref_mv;
1512 0 : ref_mv.row = pred_ref_y;
1513 0 : ref_mv.col = pred_ref_x;
1514 :
1515 0 : mv_rate += eb_av1_mv_bit_cost(
1516 : &mv,
1517 : &ref_mv,
1518 0 : candidate_ptr->md_rate_estimation_ptr->nmv_vec_cost,
1519 0 : candidate_ptr->md_rate_estimation_ptr->nmvcoststack,
1520 : MV_COST_WEIGHT);
1521 : }
1522 : }
1523 : else {
1524 0 : ref_list_idx = candidate_ptr->prediction_direction[0] == 0 ? 0 : 1;
1525 :
1526 0 : pred_ref_x = candidate_ptr->motion_vector_pred_x[ref_list_idx];
1527 0 : pred_ref_y = candidate_ptr->motion_vector_pred_y[ref_list_idx];
1528 :
1529 0 : mv_ref_x = ref_list_idx == 0 ? candidate_ptr->motion_vector_xl0 : candidate_ptr->motion_vector_xl1;
1530 0 : mv_ref_y = ref_list_idx == 0 ? candidate_ptr->motion_vector_yl0 : candidate_ptr->motion_vector_yl1;
1531 :
1532 : MV mv;
1533 0 : mv.row = mv_ref_y;
1534 0 : mv.col = mv_ref_x;
1535 :
1536 : MV ref_mv;
1537 0 : ref_mv.row = pred_ref_y;
1538 0 : ref_mv.col = pred_ref_x;
1539 :
1540 0 : mv_rate = eb_av1_mv_bit_cost(
1541 : &mv,
1542 : &ref_mv,
1543 0 : candidate_ptr->md_rate_estimation_ptr->nmv_vec_cost,
1544 0 : candidate_ptr->md_rate_estimation_ptr->nmvcoststack,
1545 : MV_COST_WEIGHT);
1546 : }
1547 : }
1548 3616480 : EbBool is_inter = inter_mode >= SINGLE_INTER_MODE_START && inter_mode < SINGLE_INTER_MODE_END;
1549 3616480 : if (is_inter
1550 : //&& picture_control_set_ptr->parent_pcs_ptr->switchable_motion_mode
1551 3616580 : && rf[1] != INTRA_FRAME)
1552 : {
1553 3616580 : MotionMode motion_mode_rd = candidate_ptr->motion_mode;
1554 3616580 : BlockSize bsize = blk_geom->bsize;
1555 3616580 : cu_ptr->prediction_unit_array[0].num_proj_ref = candidate_ptr->num_proj_ref;
1556 3616580 : MotionMode last_motion_mode_allowed = motion_mode_allowed(
1557 : picture_control_set_ptr,
1558 : cu_ptr,
1559 : bsize,
1560 3616580 : rf[0],
1561 3616580 : rf[1],
1562 : inter_mode);
1563 :
1564 3616590 : switch (last_motion_mode_allowed) {
1565 3616600 : case SIMPLE_TRANSLATION: break;
1566 0 : case OBMC_CAUSAL:
1567 0 : assert(motion_mode_rd == SIMPLE_TRANSLATION); // TODO: remove when OBMC added
1568 0 : inter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->motion_mode_fac_bits1[bsize][motion_mode_rd];
1569 0 : break;
1570 0 : default:
1571 0 : inter_mode_bits_num += candidate_ptr->md_rate_estimation_ptr->motion_mode_fac_bits[bsize][motion_mode_rd];
1572 : }
1573 0 : }
1574 :
1575 3616500 : uint32_t is_inter_rate = candidate_ptr->md_rate_estimation_ptr->intra_inter_fac_bits[cu_ptr->is_inter_ctx][1];
1576 3616500 : luma_rate = (uint32_t)(reference_picture_bits_num + skip_mode_rate + inter_mode_bits_num + mv_rate + is_inter_rate);
1577 : // Keep the Fast Luma and Chroma rate for future use
1578 3616500 : candidate_ptr->fast_luma_rate = luma_rate;
1579 3616500 : candidate_ptr->fast_chroma_rate = chroma_rate;
1580 :
1581 3616500 : if (use_ssd) {
1582 0 : int32_t current_q_index = MAX(0, MIN(QINDEX_RANGE - 1, picture_control_set_ptr->parent_pcs_ptr->base_qindex));
1583 0 : Dequants *const dequants = &picture_control_set_ptr->parent_pcs_ptr->deq;
1584 :
1585 0 : int16_t quantizer = dequants->y_dequant_Q3[current_q_index][1];
1586 0 : rate = 0;
1587 0 : model_rd_from_sse(
1588 0 : blk_geom->bsize,
1589 : quantizer,
1590 : luma_distortion,
1591 : &rate,
1592 : &luma_sad);
1593 0 : luma_rate += rate;
1594 0 : total_distortion = luma_sad;
1595 0 : rate = luma_rate;
1596 :
1597 0 : if (candidate_ptr->merge_flag) {
1598 0 : uint64_t skip_mode_rate = candidate_ptr->md_rate_estimation_ptr->skip_mode_fac_bits[skip_mode_ctx][1];
1599 0 : if (skip_mode_rate < rate) {
1600 0 : candidate_ptr->fast_luma_rate = skip_mode_rate;
1601 0 : return(RDCOST(lambda, skip_mode_rate, total_distortion));
1602 : }
1603 : }
1604 0 : candidate_ptr->fast_luma_rate = rate;
1605 0 : return(RDCOST(lambda, rate, total_distortion));
1606 : }
1607 : else {
1608 3616500 : luma_sad = (LUMA_WEIGHT * luma_distortion) << AV1_COST_PRECISION;
1609 3616500 : total_distortion = luma_sad;
1610 3616500 : rate = luma_rate;
1611 :
1612 : // Assign fast cost
1613 3616500 : if (candidate_ptr->merge_flag) {
1614 0 : uint64_t skip_mode_rate = candidate_ptr->md_rate_estimation_ptr->skip_mode_fac_bits[skip_mode_ctx][1];
1615 0 : if (skip_mode_rate < rate) {
1616 0 : candidate_ptr->fast_luma_rate = skip_mode_rate;
1617 0 : return(RDCOST(lambda, skip_mode_rate, total_distortion));
1618 : }
1619 : }
1620 3616500 : candidate_ptr->fast_luma_rate = rate;
1621 3616500 : return(RDCOST(lambda, rate, total_distortion));
1622 : }
1623 : }
1624 : #endif
1625 : #if TWO_PASS_IMPROVEMENT
1626 : /* two_pass_cost_update
1627 : * This function adds some biases for distortion and rate.
1628 : * The function is used in the first pass only and for the purpose of data collection */
1629 0 : void two_pass_cost_update(
1630 : PictureControlSet *picture_control_set_ptr,
1631 : ModeDecisionCandidate *candidate_ptr,
1632 : uint32_t *rate,
1633 : uint64_t *distortion) {
1634 :
1635 : MvReferenceFrame ref_type[2];
1636 0 : av1_set_ref_frame(ref_type, candidate_ptr->ref_frame_type);
1637 0 : if ((candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME || ref_type[1] != BWDREF_FRAME)) ||
1638 0 : (!candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME && ref_type[0] != BWDREF_FRAME))) {
1639 0 : *rate += (*rate) * FIRST_PASS_COST_PENALTY / 100;
1640 0 : *distortion += (*distortion) * FIRST_PASS_COST_PENALTY / 100;
1641 : }
1642 0 : EbReferenceObject *refObjL1 = (EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[REF_LIST_1][0]->object_ptr;
1643 0 : if (picture_control_set_ptr->slice_type == B_SLICE &&
1644 0 : (candidate_ptr->is_compound || ref_type[0] == BWDREF_FRAME) &&
1645 0 : (refObjL1->slice_type == I_SLICE && refObjL1->ref_poc > picture_control_set_ptr->picture_number)) {
1646 0 : *rate += (*rate * 2);
1647 0 : *distortion += (*distortion) * 2;
1648 : }
1649 0 : }
1650 0 : void two_pass_cost_update_64bit(
1651 : PictureControlSet *picture_control_set_ptr,
1652 : ModeDecisionCandidate *candidate_ptr,
1653 : uint64_t *rate,
1654 : uint64_t *distortion) {
1655 :
1656 : MvReferenceFrame ref_type[2];
1657 0 : av1_set_ref_frame(ref_type, candidate_ptr->ref_frame_type);
1658 0 : if ((candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME || ref_type[1] != BWDREF_FRAME)) ||
1659 0 : (!candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME && ref_type[0] != BWDREF_FRAME))) {
1660 0 : *rate += (*rate) * FIRST_PASS_COST_PENALTY / 100;
1661 0 : *distortion += (*distortion) * FIRST_PASS_COST_PENALTY / 100;
1662 : }
1663 0 : EbReferenceObject *refObjL1 = (EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[REF_LIST_1][0]->object_ptr;
1664 0 : if (picture_control_set_ptr->slice_type == B_SLICE &&
1665 0 : (candidate_ptr->is_compound || ref_type[0] == BWDREF_FRAME) &&
1666 0 : (refObjL1->slice_type == I_SLICE && refObjL1->ref_poc > picture_control_set_ptr->picture_number)) {
1667 0 : *rate += (*rate * 2);
1668 0 : *distortion += (*distortion) * 2;
1669 : }
1670 0 : }
1671 : #endif
1672 :
1673 102988000 : uint64_t av1_inter_fast_cost(
1674 : CodingUnit *cu_ptr,
1675 : ModeDecisionCandidate *candidate_ptr,
1676 : uint32_t qp,
1677 : uint64_t luma_distortion,
1678 : uint64_t chroma_distortion,
1679 : uint64_t lambda,
1680 : EbBool use_ssd,
1681 : PictureControlSet *picture_control_set_ptr,
1682 : CandidateMv *ref_mv_stack,
1683 : const BlockGeom *blk_geom,
1684 : uint32_t miRow,
1685 : uint32_t miCol,
1686 : uint8_t md_pass,
1687 : uint32_t left_neighbor_mode,
1688 : uint32_t top_neighbor_mode)
1689 :
1690 : {
1691 : UNUSED(top_neighbor_mode);
1692 : UNUSED(left_neighbor_mode);
1693 : UNUSED(miCol);
1694 : UNUSED(miRow);
1695 :
1696 102988000 : FrameHeader *frm_hdr = &picture_control_set_ptr->parent_pcs_ptr->frm_hdr;
1697 :
1698 : // Luma rate
1699 102988000 : uint32_t lumaRate = 0;
1700 102988000 : uint32_t chromaRate = 0;
1701 102988000 : uint64_t mvRate = 0;
1702 : uint64_t skipModeRate;
1703 : // Luma and chroma distortion
1704 : uint64_t lumaSad;
1705 : uint64_t chromaSad;
1706 : uint64_t totalDistortion;
1707 :
1708 : uint32_t rate;
1709 :
1710 : int16_t predRefX;
1711 : int16_t predRefY;
1712 : int16_t mvRefX;
1713 : int16_t mvRefY;
1714 :
1715 : EbReflist refListIdx;
1716 :
1717 : (void)qp;
1718 :
1719 102988000 : PredictionMode inter_mode = (PredictionMode)candidate_ptr->pred_mode;
1720 :
1721 102988000 : uint64_t interModeBitsNum = 0;
1722 :
1723 102988000 : uint8_t skipModeCtx = cu_ptr->skip_flag_context;
1724 : MvReferenceFrame rf[2];
1725 102988000 : av1_set_ref_frame(rf, candidate_ptr->ref_frame_type);
1726 102931000 : uint32_t modeCtx = Av1ModeContextAnalyzer(cu_ptr->inter_mode_ctx, rf);
1727 102843000 : skipModeRate = candidate_ptr->md_rate_estimation_ptr->skip_mode_fac_bits[skipModeCtx][0];
1728 102843000 : uint64_t referencePictureBitsNum = 0;
1729 :
1730 : //Reference Type and Mode Bit estimation
1731 :
1732 102843000 : referencePictureBitsNum = EstimateRefFramesNumBits(
1733 : picture_control_set_ptr,
1734 : candidate_ptr,
1735 : cu_ptr,
1736 102843000 : blk_geom->bwidth,
1737 102843000 : blk_geom->bheight,
1738 102843000 : candidate_ptr->ref_frame_type,
1739 : md_pass,
1740 102843000 : candidate_ptr->is_compound);
1741 :
1742 102822000 : if (candidate_ptr->is_compound)
1743 72041800 : interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->inter_compound_mode_fac_bits[modeCtx][INTER_COMPOUND_OFFSET(inter_mode)];
1744 : else {
1745 : //uint32_t newmv_ctx = modeCtx & NEWMV_CTX_MASK;
1746 : //interModeBitsNum = candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->new_mv_mode_fac_bits[mode_ctx][0];
1747 :
1748 30780000 : int16_t newmv_ctx = modeCtx & NEWMV_CTX_MASK;
1749 : //aom_write_symbol(ec_writer, mode != NEWMV, frameContext->newmv_cdf[newmv_ctx], 2);
1750 30780000 : interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->new_mv_mode_fac_bits[newmv_ctx][inter_mode != NEWMV];
1751 30780000 : if (inter_mode != NEWMV) {
1752 22733500 : const int16_t zeromvCtx = (modeCtx >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
1753 : //aom_write_symbol(ec_writer, mode != GLOBALMV, frameContext->zeromv_cdf[zeromvCtx], 2);
1754 22733500 : interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->zero_mv_mode_fac_bits[zeromvCtx][inter_mode != GLOBALMV];
1755 22733500 : if (inter_mode != GLOBALMV) {
1756 21742300 : int16_t refmvCtx = (modeCtx >> REFMV_OFFSET) & REFMV_CTX_MASK;
1757 : /*aom_write_symbol(ec_writer, mode != NEARESTMV, frameContext->refmv_cdf[refmv_ctx], 2);*/
1758 21742300 : interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->ref_mv_mode_fac_bits[refmvCtx][inter_mode != NEARESTMV];
1759 : }
1760 : }
1761 : }
1762 102822000 : if (inter_mode == NEWMV || inter_mode == NEW_NEWMV || have_nearmv_in_inter_mode(inter_mode)) {
1763 : //drLIdex cost estimation
1764 72612300 : const int32_t new_mv = inter_mode == NEWMV || inter_mode == NEW_NEWMV;
1765 72612300 : if (new_mv) {
1766 : int32_t idx;
1767 47772600 : for (idx = 0; idx < 2; ++idx) {
1768 39149300 : if (cu_ptr->av1xd->ref_mv_count[candidate_ptr->ref_frame_type] > idx + 1) {
1769 : uint8_t drl1Ctx =
1770 33373700 : av1_drl_ctx(ref_mv_stack, idx);
1771 33375500 : interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->drl_mode_fac_bits[drl1Ctx][candidate_ptr->drl_index != idx];
1772 33375500 : if (candidate_ptr->drl_index == idx) break;
1773 : }
1774 : }
1775 : }
1776 :
1777 72614100 : if (have_nearmv_in_inter_mode(inter_mode)) {
1778 : int32_t idx;
1779 : // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
1780 107615000 : for (idx = 1; idx < 3; ++idx) {
1781 79862800 : if (cu_ptr->av1xd->ref_mv_count[candidate_ptr->ref_frame_type] > idx + 1) {
1782 : uint8_t drl_ctx =
1783 42192300 : av1_drl_ctx(ref_mv_stack, idx);
1784 42198900 : interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->drl_mode_fac_bits[drl_ctx][candidate_ptr->drl_index != (idx - 1)];
1785 :
1786 42198900 : if (candidate_ptr->drl_index == (idx - 1)) break;
1787 : }
1788 : }
1789 : }
1790 : }
1791 :
1792 102659000 : if (have_newmv_in_inter_mode(inter_mode)) {
1793 52873900 : if (candidate_ptr->is_compound) {
1794 44807500 : mvRate = 0;
1795 :
1796 44807500 : if (inter_mode == NEW_NEWMV) {
1797 56469100 : for (refListIdx = 0; refListIdx < 2; ++refListIdx) {
1798 37656000 : predRefX = candidate_ptr->motion_vector_pred_x[refListIdx];
1799 37656000 : predRefY = candidate_ptr->motion_vector_pred_y[refListIdx];
1800 37656000 : mvRefX = refListIdx == REF_LIST_1 ? candidate_ptr->motion_vector_xl1 : candidate_ptr->motion_vector_xl0;
1801 37656000 : mvRefY = refListIdx == REF_LIST_1 ? candidate_ptr->motion_vector_yl1 : candidate_ptr->motion_vector_yl0;
1802 :
1803 : MV mv;
1804 37656000 : mv.row = mvRefY;
1805 37656000 : mv.col = mvRefX;
1806 :
1807 : MV ref_mv;
1808 37656000 : ref_mv.row = predRefY;
1809 37656000 : ref_mv.col = predRefX;
1810 :
1811 37643700 : mvRate += eb_av1_mv_bit_cost(
1812 : &mv,
1813 : &ref_mv,
1814 37656000 : candidate_ptr->md_rate_estimation_ptr->nmv_vec_cost,
1815 37656000 : candidate_ptr->md_rate_estimation_ptr->nmvcoststack,
1816 : MV_COST_WEIGHT);
1817 : }
1818 : }
1819 25982200 : else if (inter_mode == NEAREST_NEWMV || inter_mode == NEAR_NEWMV) {
1820 12958400 : predRefX = candidate_ptr->motion_vector_pred_x[REF_LIST_1];
1821 12958400 : predRefY = candidate_ptr->motion_vector_pred_y[REF_LIST_1];
1822 12958400 : mvRefX = candidate_ptr->motion_vector_xl1;
1823 12958400 : mvRefY = candidate_ptr->motion_vector_yl1;
1824 :
1825 : MV mv;
1826 12958400 : mv.row = mvRefY;
1827 12958400 : mv.col = mvRefX;
1828 :
1829 : MV ref_mv;
1830 12958400 : ref_mv.row = predRefY;
1831 12958400 : ref_mv.col = predRefX;
1832 :
1833 13036200 : mvRate += eb_av1_mv_bit_cost(
1834 : &mv,
1835 : &ref_mv,
1836 12958400 : candidate_ptr->md_rate_estimation_ptr->nmv_vec_cost,
1837 12958400 : candidate_ptr->md_rate_estimation_ptr->nmvcoststack,
1838 : MV_COST_WEIGHT);
1839 : }
1840 : else {
1841 13023700 : assert(inter_mode == NEW_NEARESTMV || inter_mode == NEW_NEARMV);
1842 :
1843 13023700 : predRefX = candidate_ptr->motion_vector_pred_x[REF_LIST_0];
1844 13023700 : predRefY = candidate_ptr->motion_vector_pred_y[REF_LIST_0];
1845 13023700 : mvRefX = candidate_ptr->motion_vector_xl0;
1846 13023700 : mvRefY = candidate_ptr->motion_vector_yl0;
1847 :
1848 : MV mv;
1849 13023700 : mv.row = mvRefY;
1850 13023700 : mv.col = mvRefX;
1851 :
1852 : MV ref_mv;
1853 13023700 : ref_mv.row = predRefY;
1854 13023700 : ref_mv.col = predRefX;
1855 :
1856 13026900 : mvRate += eb_av1_mv_bit_cost(
1857 : &mv,
1858 : &ref_mv,
1859 13023700 : candidate_ptr->md_rate_estimation_ptr->nmv_vec_cost,
1860 13023700 : candidate_ptr->md_rate_estimation_ptr->nmvcoststack,
1861 : MV_COST_WEIGHT);
1862 : }
1863 : }
1864 : else {
1865 8066440 : refListIdx = candidate_ptr->prediction_direction[0] == 0 ? 0 : 1;
1866 :
1867 8066440 : predRefX = candidate_ptr->motion_vector_pred_x[refListIdx];
1868 8066440 : predRefY = candidate_ptr->motion_vector_pred_y[refListIdx];
1869 :
1870 8066440 : mvRefX = refListIdx == 0 ? candidate_ptr->motion_vector_xl0 : candidate_ptr->motion_vector_xl1;
1871 8066440 : mvRefY = refListIdx == 0 ? candidate_ptr->motion_vector_yl0 : candidate_ptr->motion_vector_yl1;
1872 :
1873 : MV mv;
1874 8066440 : mv.row = mvRefY;
1875 8066440 : mv.col = mvRefX;
1876 :
1877 : MV ref_mv;
1878 8066440 : ref_mv.row = predRefY;
1879 8066440 : ref_mv.col = predRefX;
1880 :
1881 8067480 : mvRate = eb_av1_mv_bit_cost(
1882 : &mv,
1883 : &ref_mv,
1884 8066440 : candidate_ptr->md_rate_estimation_ptr->nmv_vec_cost,
1885 8066440 : candidate_ptr->md_rate_estimation_ptr->nmvcoststack,
1886 : MV_COST_WEIGHT);
1887 : }
1888 : }
1889 :
1890 : #if II_COMP_FLAG
1891 102637000 : if (md_pass > 0) {
1892 :
1893 : // inter intra mode rate
1894 102493000 : if (picture_control_set_ptr->parent_pcs_ptr->frm_hdr.reference_mode != COMPOUND_REFERENCE &&
1895 204397000 : picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.enable_interintra_compound &&
1896 101907000 : svt_is_interintra_allowed(picture_control_set_ptr->parent_pcs_ptr->enable_inter_intra,blk_geom->bsize, candidate_ptr->inter_mode, rf)) {
1897 21044800 : const int interintra = candidate_ptr->is_interintra_used;
1898 21044800 : const int bsize_group = size_group_lookup[blk_geom->bsize];
1899 :
1900 21044800 : interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->inter_intra_fac_bits[bsize_group][candidate_ptr->is_interintra_used];
1901 :
1902 21044800 : if (interintra) {
1903 8905100 : interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->inter_intra_mode_fac_bits[bsize_group][candidate_ptr->interintra_mode];
1904 :
1905 8905100 : if (is_interintra_wedge_used(blk_geom->bsize)) {
1906 8905080 : interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->wedge_inter_intra_fac_bits[blk_geom->bsize][candidate_ptr->use_wedge_interintra];
1907 :
1908 8905080 : if (candidate_ptr->use_wedge_interintra) {
1909 4453350 : interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->wedge_idx_fac_bits[blk_geom->bsize][candidate_ptr->interintra_wedge_index];
1910 : }
1911 : }
1912 : }
1913 : }
1914 : }
1915 : #endif
1916 102623000 : EbBool is_inter = inter_mode >= SINGLE_INTER_MODE_START && inter_mode < SINGLE_INTER_MODE_END;
1917 102623000 : if (is_inter
1918 30976600 : && frm_hdr->is_motion_mode_switchable
1919 30339200 : && rf[1] != INTRA_FRAME)
1920 : {
1921 30339200 : MotionMode motion_mode_rd = candidate_ptr->motion_mode;
1922 30339200 : BlockSize bsize = blk_geom->bsize;
1923 30339200 : cu_ptr->prediction_unit_array[0].num_proj_ref = candidate_ptr->num_proj_ref;
1924 30339200 : MotionMode last_motion_mode_allowed = motion_mode_allowed(
1925 : picture_control_set_ptr,
1926 : cu_ptr,
1927 : bsize,
1928 30339200 : rf[0],
1929 30339200 : rf[1],
1930 : inter_mode);
1931 :
1932 30340300 : switch (last_motion_mode_allowed) {
1933 6039190 : case SIMPLE_TRANSLATION: break;
1934 21923900 : case OBMC_CAUSAL:
1935 : #if OBMC_FLAG
1936 21923900 : interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->motion_mode_fac_bits1[bsize][motion_mode_rd==OBMC_CAUSAL];
1937 : #else
1938 : assert(motion_mode_rd == SIMPLE_TRANSLATION); // TODO: remove when OBMC added
1939 : interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->motion_mode_fac_bits1[bsize][motion_mode_rd];
1940 : #endif
1941 21923900 : break;
1942 2377220 : default:
1943 2377220 : interModeBitsNum += candidate_ptr->md_rate_estimation_ptr->motion_mode_fac_bits[bsize][motion_mode_rd];
1944 : }
1945 72284100 : }
1946 : //this func return 0 if masked=0 and distance=0
1947 205227000 : interModeBitsNum += get_compound_mode_rate(
1948 : md_pass,
1949 : candidate_ptr,
1950 : cu_ptr,
1951 102624000 : candidate_ptr->ref_frame_type,
1952 102624000 : blk_geom->bsize,
1953 102624000 : picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr,
1954 : picture_control_set_ptr
1955 : );
1956 : // NM - To be added when the overlappable mode is adopted
1957 : // read_compound_type(is_compound)
1958 : // NM - To be added when switchable filter is adopted
1959 : // if (interpolation_filter == SWITCHABLE) {
1960 : // for (dir = 0; dir < (enable_dual_filter ? 2 : 1); dir++) {
1961 : // if (needs_interp_filter()) {
1962 : // interp_filter[dir] S()
1963 : // }
1964 : // else {
1965 : // interp_filter[dir] = EIGHTTAP
1966 : // }
1967 : // }
1968 : // if (!enable_dual_filter)
1969 : // interp_filter[1] = interp_filter[0]
1970 : // }
1971 : // else {
1972 : // for (dir = 0; dir < 2; dir++)
1973 : // interp_filter[dir] = interpolation_filter
1974 : // }
1975 102603000 : uint32_t isInterRate = candidate_ptr->md_rate_estimation_ptr->intra_inter_fac_bits[cu_ptr->is_inter_ctx][1];
1976 102603000 : lumaRate = (uint32_t)(referencePictureBitsNum + skipModeRate + interModeBitsNum + mvRate + isInterRate);
1977 :
1978 : //chromaRate = intraChromaModeBitsNum + intraChromaAngModeBitsNum;
1979 :
1980 : // Keep the Fast Luma and Chroma rate for future use
1981 102603000 : candidate_ptr->fast_luma_rate = lumaRate;
1982 102603000 : candidate_ptr->fast_chroma_rate = chromaRate;
1983 :
1984 102603000 : if (use_ssd) {
1985 0 : int32_t current_q_index = frm_hdr->quantization_params.base_q_idx;
1986 0 : Dequants *const dequants = &picture_control_set_ptr->parent_pcs_ptr->deq;
1987 :
1988 0 : int16_t quantizer = dequants->y_dequant_Q3[current_q_index][1];
1989 0 : rate = 0;
1990 0 : model_rd_from_sse(
1991 0 : blk_geom->bsize,
1992 : quantizer,
1993 : luma_distortion,
1994 : &rate,
1995 : &lumaSad);
1996 0 : lumaRate += rate;
1997 0 : totalDistortion = lumaSad;
1998 :
1999 0 : rate = 0;
2000 0 : model_rd_from_sse(
2001 0 : blk_geom->bsize_uv,
2002 : quantizer,
2003 : chroma_distortion,
2004 : &chromaRate,
2005 : &chromaSad);
2006 0 : chromaRate += rate;
2007 0 : totalDistortion += chromaSad;
2008 :
2009 0 : rate = lumaRate + chromaRate;
2010 :
2011 : #if TWO_PASS
2012 0 : if (picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->use_output_stat_file) {
2013 : #if TWO_PASS_IMPROVEMENT
2014 0 : two_pass_cost_update(
2015 : picture_control_set_ptr,
2016 : candidate_ptr,
2017 : &rate,
2018 : &totalDistortion);
2019 : #else
2020 : MvReferenceFrame ref_type[2];
2021 : av1_set_ref_frame(ref_type, candidate_ptr->ref_frame_type);
2022 : if ((candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME || ref_type[1] != BWDREF_FRAME)) ||
2023 : (!candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME && ref_type[0] != BWDREF_FRAME))) {
2024 : rate += rate * FIRST_PASS_COST_PENALTY / 100;
2025 : totalDistortion += totalDistortion * FIRST_PASS_COST_PENALTY / 100;
2026 : }
2027 : #endif
2028 : }
2029 : #endif
2030 0 : if (candidate_ptr->merge_flag) {
2031 0 : uint64_t skipModeRate = candidate_ptr->md_rate_estimation_ptr->skip_mode_fac_bits[skipModeCtx][1];
2032 0 : if (skipModeRate < rate)
2033 0 : return(RDCOST(lambda, skipModeRate, totalDistortion));
2034 : }
2035 0 : return(RDCOST(lambda, rate, totalDistortion));
2036 : }
2037 : else {
2038 102603000 : lumaSad = (LUMA_WEIGHT * luma_distortion) << AV1_COST_PRECISION;
2039 102603000 : chromaSad = chroma_distortion << AV1_COST_PRECISION;
2040 102603000 : totalDistortion = lumaSad + chromaSad;
2041 102603000 : if (blk_geom->has_uv == 0 && chromaSad != 0)
2042 0 : printf("av1_inter_fast_cost: Chroma error");
2043 102603000 : rate = lumaRate + chromaRate;
2044 : #if TWO_PASS
2045 102603000 : if (picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->use_output_stat_file) {
2046 : #if TWO_PASS_IMPROVEMENT
2047 0 : two_pass_cost_update(
2048 : picture_control_set_ptr,
2049 : candidate_ptr,
2050 : &rate,
2051 : &totalDistortion);
2052 : #else
2053 : MvReferenceFrame ref_type[2];
2054 : av1_set_ref_frame(ref_type, candidate_ptr->ref_frame_type);
2055 : if ((candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME || ref_type[1] != BWDREF_FRAME)) ||
2056 : (!candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME && ref_type[0] != BWDREF_FRAME))) {
2057 : rate += rate * FIRST_PASS_COST_PENALTY / 100;
2058 : totalDistortion += totalDistortion * FIRST_PASS_COST_PENALTY / 100;
2059 : }
2060 : #endif
2061 : }
2062 : #endif
2063 : // Assign fast cost
2064 102622000 : if (candidate_ptr->merge_flag) {
2065 410064 : uint64_t skipModeRate = candidate_ptr->md_rate_estimation_ptr->skip_mode_fac_bits[skipModeCtx][1];
2066 410064 : if (skipModeRate < rate)
2067 349767 : return(RDCOST(lambda, skipModeRate, totalDistortion));
2068 : }
2069 102272000 : return(RDCOST(lambda, rate, totalDistortion));
2070 : }
2071 : }
2072 :
2073 :
2074 77874300 : EbErrorType av1_tu_estimate_coeff_bits(
2075 : struct ModeDecisionContext *md_context,
2076 : uint8_t allow_update_cdf,
2077 : FRAME_CONTEXT *ec_ctx,
2078 : PictureControlSet *picture_control_set_ptr,
2079 : struct ModeDecisionCandidateBuffer *candidate_buffer_ptr,
2080 : uint32_t tu_origin_index,
2081 : uint32_t tu_chroma_origin_index,
2082 : EntropyCoder *entropy_coder_ptr,
2083 : EbPictureBufferDesc *coeff_buffer_sb,
2084 : uint32_t y_eob,
2085 : uint32_t cb_eob,
2086 : uint32_t cr_eob,
2087 : uint64_t *y_tu_coeff_bits,
2088 : uint64_t *cb_tu_coeff_bits,
2089 : uint64_t *cr_tu_coeff_bits,
2090 : TxSize txsize,
2091 : TxSize txsize_uv,
2092 : TxType tx_type,
2093 : TxType tx_type_uv,
2094 : COMPONENT_TYPE component_type)
2095 : {
2096 : (void)entropy_coder_ptr;
2097 77874300 : EbErrorType return_error = EB_ErrorNone;
2098 :
2099 77874300 : FrameHeader *frm_hdr = &picture_control_set_ptr->parent_pcs_ptr->frm_hdr;
2100 :
2101 : int32_t *coeff_buffer;
2102 77874300 : int16_t luma_txb_skip_context = md_context->luma_txb_skip_context;
2103 77874300 : int16_t luma_dc_sign_context = md_context->luma_dc_sign_context;
2104 77874300 : int16_t cb_txb_skip_context = md_context->cb_txb_skip_context;
2105 77874300 : int16_t cb_dc_sign_context = md_context->cb_dc_sign_context;
2106 77874300 : int16_t cr_txb_skip_context = md_context->cr_txb_skip_context;
2107 77874300 : int16_t cr_dc_sign_context = md_context->cr_dc_sign_context;
2108 :
2109 77874300 : EbBool reducedTransformSetFlag = frm_hdr->reduced_tx_set ? EB_TRUE : EB_FALSE;
2110 :
2111 : //Estimate the rate of the transform type and coefficient for Luma
2112 :
2113 77874300 : if (component_type == COMPONENT_LUMA || component_type == COMPONENT_ALL) {
2114 60760500 : if (y_eob) {
2115 34052100 : coeff_buffer = (int32_t*)&coeff_buffer_sb->buffer_y[tu_origin_index * sizeof(int32_t)];
2116 :
2117 34019600 : *y_tu_coeff_bits = eb_av1_cost_coeffs_txb(
2118 : allow_update_cdf,
2119 : ec_ctx,
2120 : candidate_buffer_ptr,
2121 : coeff_buffer,
2122 34052100 : (uint16_t)y_eob,
2123 : PLANE_TYPE_Y,
2124 : txsize,
2125 : tx_type,
2126 : luma_txb_skip_context,
2127 : luma_dc_sign_context,
2128 : reducedTransformSetFlag);
2129 : }
2130 : else {
2131 26708400 : *y_tu_coeff_bits = av1_cost_skip_txb(
2132 : allow_update_cdf,
2133 : ec_ctx,
2134 : candidate_buffer_ptr,
2135 : txsize,
2136 : PLANE_TYPE_Y,
2137 : luma_txb_skip_context);
2138 : }
2139 : }
2140 : //Estimate the rate of the transform type and coefficient for chroma Cb
2141 :
2142 77843500 : if (component_type == COMPONENT_CHROMA_CB || component_type == COMPONENT_CHROMA || component_type == COMPONENT_ALL) {
2143 12531800 : if (cb_eob) {
2144 5622360 : coeff_buffer = (int32_t*)&coeff_buffer_sb->buffer_cb[tu_chroma_origin_index * sizeof(int32_t)];
2145 :
2146 5621170 : *cb_tu_coeff_bits = eb_av1_cost_coeffs_txb(
2147 : allow_update_cdf,
2148 : ec_ctx,
2149 : candidate_buffer_ptr,
2150 : coeff_buffer,
2151 5622360 : (uint16_t)cb_eob,
2152 : PLANE_TYPE_UV,
2153 : txsize_uv,
2154 : tx_type_uv,
2155 : cb_txb_skip_context,
2156 : cb_dc_sign_context,
2157 : reducedTransformSetFlag);
2158 : }
2159 : else {
2160 6909470 : *cb_tu_coeff_bits = av1_cost_skip_txb(
2161 : allow_update_cdf,
2162 : ec_ctx,
2163 : candidate_buffer_ptr,
2164 : txsize_uv,
2165 : PLANE_TYPE_UV,
2166 : cb_txb_skip_context);
2167 : }
2168 : }
2169 :
2170 77842300 : if (component_type == COMPONENT_CHROMA_CR || component_type == COMPONENT_CHROMA || component_type == COMPONENT_ALL) {
2171 : //Estimate the rate of the transform type and coefficient for chroma Cr
2172 12165600 : if (cr_eob) {
2173 3725450 : coeff_buffer = (int32_t*)&coeff_buffer_sb->buffer_cr[tu_chroma_origin_index * sizeof(int32_t)];
2174 :
2175 3725330 : *cr_tu_coeff_bits = eb_av1_cost_coeffs_txb(
2176 : allow_update_cdf,
2177 : ec_ctx,
2178 : candidate_buffer_ptr,
2179 : coeff_buffer,
2180 3725450 : (uint16_t)cr_eob,
2181 : PLANE_TYPE_UV,
2182 : txsize_uv,
2183 : tx_type_uv,
2184 : cr_txb_skip_context,
2185 : cr_dc_sign_context,
2186 : reducedTransformSetFlag);
2187 : }
2188 : else {
2189 8440150 : *cr_tu_coeff_bits = av1_cost_skip_txb(
2190 : allow_update_cdf,
2191 : ec_ctx,
2192 : candidate_buffer_ptr,
2193 : txsize_uv,
2194 : PLANE_TYPE_UV,
2195 : cr_txb_skip_context);
2196 : }
2197 : }
2198 :
2199 77842000 : return return_error;
2200 : }
2201 :
2202 : /*********************************************************************************
2203 : * av1_intra_full_cost function is used to estimate the cost of an intra candidate mode
2204 : * for full mode decisoion module.
2205 : *
2206 : * @param *cu_ptr(input)
2207 : * cu_ptr is the pointer of the target CU.
2208 : * @param *candidate_buffer_ptr(input)
2209 : * chromaBufferPtr is the buffer pointer of the candidate luma mode.
2210 : * @param qp(input)
2211 : * qp is the quantizer parameter.
2212 : * @param luma_distortion (input)
2213 : * luma_distortion is the intra condidate luma distortion.
2214 : * @param lambda(input)
2215 : * lambda is the Lagrange multiplier
2216 : **********************************************************************************/
2217 36879900 : EbErrorType Av1FullCost(
2218 : PictureControlSet *picture_control_set_ptr,
2219 : ModeDecisionContext *context_ptr,
2220 : struct ModeDecisionCandidateBuffer *candidate_buffer_ptr,
2221 : CodingUnit *cu_ptr,
2222 : uint64_t *y_distortion,
2223 : uint64_t *cb_distortion,
2224 : uint64_t *cr_distortion,
2225 : uint64_t lambda,
2226 : uint64_t *y_coeff_bits,
2227 : uint64_t *cb_coeff_bits,
2228 : uint64_t *cr_coeff_bits,
2229 : BlockSize bsize)
2230 : {
2231 : UNUSED(picture_control_set_ptr);
2232 : UNUSED(bsize);
2233 : UNUSED(cu_ptr);
2234 36879900 : EbErrorType return_error = EB_ErrorNone;
2235 :
2236 : // Luma and chroma rate
2237 36879900 : uint64_t lumaRate = 0;
2238 36879900 : uint64_t chromaRate = 0;
2239 36879900 : uint64_t coeffRate = 0;
2240 :
2241 : // Luma and chroma SSE
2242 : uint64_t luma_sse;
2243 : uint64_t chromaSse;
2244 : uint64_t totalDistortion;
2245 : uint64_t rate;
2246 :
2247 : //Estimate the rate of the transform type and coefficient for Luma
2248 : // Add fast rate to get the total rate of the subject mode
2249 36879900 : lumaRate += candidate_buffer_ptr->candidate_ptr->fast_luma_rate;
2250 36879900 : chromaRate += candidate_buffer_ptr->candidate_ptr->fast_chroma_rate;
2251 :
2252 : // For CFL, costs of alphas are not computed in fast loop, since they are computed in the full loop. The rate costs are added to the full loop.
2253 : // In fast loop CFL alphas are not know yet. The chroma mode bits are calculated based on DC Mode, and if CFL is the winner compared to CFL, ChromaBits are updated in Full loop
2254 36879900 : if (context_ptr->blk_geom->has_uv) {
2255 32548800 : if (candidate_buffer_ptr->candidate_ptr->type == INTRA_MODE && candidate_buffer_ptr->candidate_ptr->intra_chroma_mode == UV_CFL_PRED) {
2256 7634110 : EbBool isCflAllowed = (context_ptr->blk_geom->bwidth <= 32 &&
2257 3817060 : context_ptr->blk_geom->bheight <= 32) ? 1 : 0;
2258 :
2259 3817060 : chromaRate += candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->cfl_alpha_fac_bits[candidate_buffer_ptr->candidate_ptr->cfl_alpha_signs][CFL_PRED_U][CFL_IDX_U(candidate_buffer_ptr->candidate_ptr->cfl_alpha_idx)] +
2260 3817060 : candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->cfl_alpha_fac_bits[candidate_buffer_ptr->candidate_ptr->cfl_alpha_signs][CFL_PRED_V][CFL_IDX_V(candidate_buffer_ptr->candidate_ptr->cfl_alpha_idx)];
2261 :
2262 3817060 : chromaRate += (uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->intra_uv_mode_fac_bits[isCflAllowed][candidate_buffer_ptr->candidate_ptr->intra_luma_mode][UV_CFL_PRED];
2263 3817060 : chromaRate -= (uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->intra_uv_mode_fac_bits[isCflAllowed][candidate_buffer_ptr->candidate_ptr->intra_luma_mode][UV_DC_PRED];
2264 : }
2265 : }
2266 :
2267 : #if ENHANCE_ATB
2268 36879900 : uint64_t tx_size_bits = 0;
2269 36879900 : if (picture_control_set_ptr->parent_pcs_ptr->frm_hdr.tx_mode == TX_MODE_SELECT)
2270 7567970 : tx_size_bits = get_tx_size_bits(
2271 : candidate_buffer_ptr,
2272 : context_ptr,
2273 : picture_control_set_ptr,
2274 7567970 : candidate_buffer_ptr->candidate_ptr->tx_depth,
2275 7567970 : candidate_buffer_ptr->candidate_ptr->block_has_coeff);
2276 : #endif
2277 :
2278 : // Coeff rate
2279 :
2280 67876900 : if (context_ptr->blk_skip_decision && candidate_buffer_ptr->candidate_ptr->type != INTRA_MODE) {
2281 : #if ENHANCE_ATB
2282 30980500 : uint64_t non_skip_cost = RDCOST(lambda, (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + tx_size_bits + (uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->skip_fac_bits[cu_ptr->skip_coeff_context][0]), (y_distortion[0] + cb_distortion[0] + cr_distortion[0]));
2283 : #else
2284 : uint64_t non_skip_cost = RDCOST(lambda, (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + (uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->skip_fac_bits[cu_ptr->skip_coeff_context][0]), (y_distortion[0] + cb_distortion[0] + cr_distortion[0]));
2285 : #endif
2286 30980500 : uint64_t skip_cost = RDCOST(lambda, ((uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->skip_fac_bits[cu_ptr->skip_coeff_context][1]), (y_distortion[1] + cb_distortion[1] + cr_distortion[1]));
2287 30980500 : if ((candidate_buffer_ptr->candidate_ptr->block_has_coeff == 0) || (skip_cost < non_skip_cost)) {
2288 24338500 : y_distortion[0] = y_distortion[1];
2289 24338500 : cb_distortion[0] = cb_distortion[1];
2290 24338500 : cr_distortion[0] = cr_distortion[1];
2291 24338500 : candidate_buffer_ptr->candidate_ptr->block_has_coeff = 0;
2292 : }
2293 30980500 : if (candidate_buffer_ptr->candidate_ptr->block_has_coeff)
2294 6652490 : coeffRate = (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + (uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->skip_fac_bits[cu_ptr->skip_coeff_context][0]);
2295 : else
2296 24328000 : coeffRate = MIN((uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->skip_fac_bits[cu_ptr->skip_coeff_context][1],
2297 : (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + (uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->skip_fac_bits[cu_ptr->skip_coeff_context][0]));
2298 : }
2299 : else
2300 5915980 : coeffRate = (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + (uint64_t)candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->skip_fac_bits[cu_ptr->skip_coeff_context][0]);
2301 36896400 : luma_sse = y_distortion[0];
2302 36896400 : chromaSse = cb_distortion[0] + cr_distortion[0];
2303 36896400 : totalDistortion = luma_sse + chromaSse;
2304 :
2305 36896400 : rate = lumaRate + chromaRate + coeffRate;
2306 : #if ENHANCE_ATB
2307 36896400 : if (candidate_buffer_ptr->candidate_ptr->block_has_coeff)
2308 11598200 : rate += tx_size_bits;
2309 : #endif
2310 :
2311 : #if TWO_PASS
2312 36896400 : if (picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->use_output_stat_file && candidate_buffer_ptr->candidate_ptr->type != INTRA_MODE) {
2313 : #if TWO_PASS_IMPROVEMENT
2314 0 : two_pass_cost_update_64bit(
2315 : picture_control_set_ptr,
2316 : candidate_buffer_ptr->candidate_ptr,
2317 : &rate,
2318 : &totalDistortion);
2319 : #else
2320 : MvReferenceFrame ref_type[2];
2321 : av1_set_ref_frame(ref_type, candidate_buffer_ptr->candidate_ptr->ref_frame_type);
2322 : if ((candidate_buffer_ptr->candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME || ref_type[1] != BWDREF_FRAME)) ||
2323 : (!candidate_buffer_ptr->candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME && ref_type[0] != BWDREF_FRAME))) {
2324 : rate += rate * FIRST_PASS_COST_PENALTY / 100;
2325 : totalDistortion += totalDistortion * FIRST_PASS_COST_PENALTY / 100;
2326 : }
2327 : #endif
2328 : }
2329 : #endif
2330 : // Assign full cost
2331 36895400 : *(candidate_buffer_ptr->full_cost_ptr) = RDCOST(lambda, rate, totalDistortion);
2332 :
2333 36895400 : candidate_buffer_ptr->full_lambda_rate = *candidate_buffer_ptr->full_cost_ptr - totalDistortion;
2334 36895400 : coeffRate = *y_coeff_bits;
2335 36895400 : candidate_buffer_ptr->full_cost_luma = RDCOST(lambda, lumaRate + *y_coeff_bits, luma_sse);
2336 :
2337 36895400 : return return_error;
2338 : }
2339 :
2340 : /*********************************************************************************
2341 : * merge_skip_full_cost function is used to estimate the cost of an AMVPSkip candidate
2342 : * mode for full mode decisoion module.
2343 : *
2344 : * @param *cu_ptr(input)
2345 : * cu_ptr is the pointer of the target CU.
2346 : * @param *candidate_buffer_ptr(input)
2347 : * chromaBufferPtr is the buffer pointer of the candidate luma mode.
2348 : * @param qp(input)
2349 : * qp is the quantizer parameter.
2350 : * @param luma_distortion (input)
2351 : * luma_distortion is the inter condidate luma distortion.
2352 : * @param lambda(input)
2353 : * lambda is the Lagrange multiplier
2354 : **********************************************************************************/
2355 652229 : EbErrorType Av1MergeSkipFullCost(
2356 : PictureControlSet *picture_control_set_ptr,
2357 : ModeDecisionContext *context_ptr,
2358 : struct ModeDecisionCandidateBuffer *candidate_buffer_ptr,
2359 : CodingUnit *cu_ptr,
2360 : uint64_t *y_distortion,
2361 : uint64_t *cb_distortion,
2362 : uint64_t *cr_distortion,
2363 : uint64_t lambda,
2364 : uint64_t *y_coeff_bits,
2365 : uint64_t *cb_coeff_bits,
2366 : uint64_t *cr_coeff_bits,
2367 : BlockSize bsize)
2368 : {
2369 : UNUSED(bsize);
2370 : UNUSED(context_ptr);
2371 : UNUSED(picture_control_set_ptr);
2372 :
2373 652229 : EbErrorType return_error = EB_ErrorNone;
2374 652229 : uint64_t skipModeCtx = cu_ptr->skip_flag_context;
2375 652229 : uint64_t mergeRate = 0;
2376 652229 : uint64_t skipRate = 0;
2377 : // Merge
2378 : //uint64_t mergeChromaRate;
2379 : uint64_t mergeDistortion;
2380 : uint64_t merge_cost;
2381 : //uint64_t mergeLumaCost;
2382 : uint64_t mergeLumaSse;
2383 : uint64_t mergeChromaSse;
2384 : uint64_t coeffRate;
2385 : //uint64_t lumaCoeffRate;
2386 :
2387 : // SKIP
2388 : uint64_t skipDistortion;
2389 : uint64_t skip_cost;
2390 : //uint64_t skipLumaCost;
2391 :
2392 : // Luma and chroma transform size shift for the distortion
2393 : uint64_t skipLumaSse;
2394 : uint64_t skipChromaSse;
2395 :
2396 652229 : uint64_t skipModeRate = candidate_buffer_ptr->candidate_ptr->md_rate_estimation_ptr->skip_mode_fac_bits[skipModeCtx][1];
2397 :
2398 : // Coeff rate
2399 652229 : coeffRate = (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits);
2400 :
2401 : // Compute Merge Cost
2402 652229 : mergeLumaSse = y_distortion[0] << AV1_COST_PRECISION;
2403 652229 : mergeChromaSse = (cb_distortion[0] + cr_distortion[0]) << AV1_COST_PRECISION;
2404 :
2405 652229 : skipLumaSse = y_distortion[1] << AV1_COST_PRECISION;
2406 652229 : skipChromaSse = (cb_distortion[1] + cr_distortion[1]) << AV1_COST_PRECISION;
2407 :
2408 : // *Note - As in JCTVC-G1102, the JCT-VC uses the Mode Decision forumula where the chromaSse has been weighted
2409 : // CostMode = (luma_sse + wchroma * chromaSse) + lambdaSse * rateMode
2410 :
2411 : //if (picture_control_set_ptr->parent_pcs_ptr->pred_structure == EB_PRED_RANDOM_ACCESS) {
2412 : // // Random Access
2413 : // if (picture_control_set_ptr->temporal_layer_index == 0) {
2414 : // mergeChromaSse = (((mergeChromaSse * chroma_weight_factor_ra[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
2415 : // }
2416 : // else if (picture_control_set_ptr->temporal_layer_index < 3) {
2417 : // mergeChromaSse = (((mergeChromaSse * chroma_weight_factor_ra_qp_scaling_l1[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
2418 : // }
2419 : // else {
2420 : // mergeChromaSse = (((mergeChromaSse * chroma_weight_factor_ra_qp_scaling_l3[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
2421 : // }
2422 : //}
2423 : //else {
2424 : // // Low delay
2425 : // if (picture_control_set_ptr->temporal_layer_index == 0) {
2426 : // mergeChromaSse = (((mergeChromaSse * chroma_weight_factor_ld[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
2427 : // }
2428 : // else {
2429 : // mergeChromaSse = (((mergeChromaSse * chroma_weight_factor_ld_qp_scaling[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
2430 : // }
2431 : //}
2432 :
2433 : // Add fast rate to get the total rate of the subject mode
2434 652229 : mergeRate += candidate_buffer_ptr->candidate_ptr->fast_luma_rate;
2435 652229 : mergeRate += candidate_buffer_ptr->candidate_ptr->fast_chroma_rate;
2436 :
2437 652229 : mergeRate += coeffRate;
2438 : #if ENHANCE_ATB
2439 652229 : uint64_t tx_size_bits = 0;
2440 652229 : if (picture_control_set_ptr->parent_pcs_ptr->frm_hdr.tx_mode == TX_MODE_SELECT)
2441 0 : tx_size_bits = get_tx_size_bits(
2442 : candidate_buffer_ptr,
2443 : context_ptr,
2444 : picture_control_set_ptr,
2445 0 : candidate_buffer_ptr->candidate_ptr->tx_depth,
2446 0 : candidate_buffer_ptr->candidate_ptr->block_has_coeff);
2447 652285 : mergeRate += tx_size_bits;
2448 : #endif
2449 :
2450 652285 : mergeDistortion = (mergeLumaSse + mergeChromaSse);
2451 :
2452 : //merge_cost = mergeDistortion + (((lambda * coeffRate + lambda * mergeLumaRate + lambda_chroma * mergeChromaRate) + MD_OFFSET) >> MD_SHIFT);
2453 :
2454 652285 : merge_cost = RDCOST(lambda, mergeRate, mergeDistortion);
2455 : // mergeLumaCost = mergeLumaSse + (((lambda * lumaCoeffRate + lambda * mergeLumaRate) + MD_OFFSET) >> MD_SHIFT);
2456 :
2457 : // *Note - As in JCTVC-G1102, the JCT-VC uses the Mode Decision forumula where the chromaSse has been weighted
2458 : // CostMode = (luma_sse + wchroma * chromaSse) + lambdaSse * rateMode
2459 :
2460 : //if (picture_control_set_ptr->parent_pcs_ptr->pred_structure == EB_PRED_RANDOM_ACCESS) {
2461 : // if (picture_control_set_ptr->temporal_layer_index == 0) {
2462 : // skipChromaSse = (((skipChromaSse * chroma_weight_factor_ra[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
2463 : // }
2464 : // else if (picture_control_set_ptr->temporal_layer_index < 3) {
2465 : // skipChromaSse = (((skipChromaSse * chroma_weight_factor_ra_qp_scaling_l1[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
2466 : // }
2467 : // else {
2468 : // skipChromaSse = (((skipChromaSse * chroma_weight_factor_ra_qp_scaling_l3[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
2469 : // }
2470 : //}
2471 : //else {
2472 : // // Low Delay
2473 : // if (picture_control_set_ptr->temporal_layer_index == 0) {
2474 : // skipChromaSse = (((skipChromaSse * chroma_weight_factor_ld[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
2475 : // }
2476 : // else {
2477 : // skipChromaSse = (((skipChromaSse * chroma_weight_factor_ld_qp_scaling[qp]) + CHROMA_WEIGHT_OFFSET) >> CHROMA_WEIGHT_SHIFT);
2478 : // }
2479 : //}
2480 :
2481 652285 : skipDistortion = skipLumaSse + skipChromaSse;
2482 652285 : skipRate = skipModeRate;
2483 652285 : skip_cost = RDCOST(lambda, skipRate, skipDistortion);
2484 : #if TWO_PASS
2485 652285 : if (picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->use_output_stat_file) {
2486 : MvReferenceFrame ref_type[2];
2487 0 : av1_set_ref_frame(ref_type, candidate_buffer_ptr->candidate_ptr->ref_frame_type);
2488 0 : if ((candidate_buffer_ptr->candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME || ref_type[1] != BWDREF_FRAME)) ||
2489 0 : (!candidate_buffer_ptr->candidate_ptr->is_compound && (ref_type[0] != LAST_FRAME && ref_type[0] != BWDREF_FRAME))) {
2490 0 : skip_cost += skip_cost * FIRST_PASS_COST_PENALTY / 100;
2491 0 : merge_cost += merge_cost * FIRST_PASS_COST_PENALTY / 100;
2492 : }
2493 : #if TWO_PASS_IMPROVEMENT
2494 0 : EbReferenceObject *refObjL1 = (EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[REF_LIST_1][0]->object_ptr;
2495 0 : if (picture_control_set_ptr->slice_type == B_SLICE &&
2496 0 : (candidate_buffer_ptr->candidate_ptr->is_compound || ref_type[0] == BWDREF_FRAME)
2497 0 : && refObjL1->slice_type == I_SLICE && refObjL1->ref_poc > picture_control_set_ptr->picture_number) {
2498 0 : skip_cost += skip_cost * 2;
2499 0 : merge_cost += merge_cost * 2;
2500 : }
2501 : #endif
2502 : }
2503 : #endif
2504 : // Assigne full cost
2505 652285 : *candidate_buffer_ptr->full_cost_ptr = (skip_cost <= merge_cost) ? skip_cost : merge_cost;
2506 :
2507 : uint64_t tempDistortion;
2508 652285 : tempDistortion = (skip_cost <= merge_cost) ? skipDistortion : mergeDistortion;
2509 652285 : candidate_buffer_ptr->full_lambda_rate = *candidate_buffer_ptr->full_cost_ptr - tempDistortion;
2510 652285 : *candidate_buffer_ptr->full_cost_merge_ptr = merge_cost;
2511 652285 : *candidate_buffer_ptr->full_cost_skip_ptr = skip_cost;
2512 : // Assigne merge flag
2513 652285 : candidate_buffer_ptr->candidate_ptr->merge_flag = EB_TRUE;
2514 : // Assigne skip flag
2515 :
2516 652285 : candidate_buffer_ptr->candidate_ptr->skip_flag = (skip_cost <= merge_cost) ? EB_TRUE : EB_FALSE;
2517 :
2518 : //CHKN: skip_flag context is not accurate as MD does not keep skip info in sync with EncDec.
2519 :
2520 652285 : return return_error;
2521 : }
2522 : /*********************************************************************************
2523 : * av1_intra_full_cost function is used to estimate the cost of an intra candidate mode
2524 : * for full mode decisoion module.
2525 : *
2526 : * @param *cu_ptr(input)
2527 : * cu_ptr is the pointer of the target CU.
2528 : * @param *candidate_buffer_ptr(input)
2529 : * chromaBufferPtr is the buffer pointer of the candidate luma mode.
2530 : * @param qp(input)
2531 : * qp is the quantizer parameter.
2532 : * @param luma_distortion (input)
2533 : * luma_distortion is the intra condidate luma distortion.
2534 : * @param lambda(input)
2535 : * lambda is the Lagrange multiplier
2536 : **********************************************************************************/
2537 5853270 : EbErrorType av1_intra_full_cost(
2538 : PictureControlSet *picture_control_set_ptr,
2539 : ModeDecisionContext *context_ptr,
2540 : struct ModeDecisionCandidateBuffer *candidate_buffer_ptr,
2541 : CodingUnit *cu_ptr,
2542 : uint64_t *y_distortion,
2543 : uint64_t *cb_distortion,
2544 : uint64_t *cr_distortion,
2545 : uint64_t lambda,
2546 : uint64_t *y_coeff_bits,
2547 : uint64_t *cb_coeff_bits,
2548 : uint64_t *cr_coeff_bits,
2549 : BlockSize bsize)
2550 :
2551 : {
2552 5853270 : EbErrorType return_error = EB_ErrorNone;
2553 :
2554 5853270 : Av1FullCost(
2555 : picture_control_set_ptr,
2556 : context_ptr,
2557 : candidate_buffer_ptr,
2558 : cu_ptr,
2559 : y_distortion,
2560 : cb_distortion,
2561 : cr_distortion,
2562 : lambda,
2563 : y_coeff_bits,
2564 : cb_coeff_bits,
2565 : cr_coeff_bits,
2566 : bsize);
2567 :
2568 5853160 : return return_error;
2569 : }
2570 :
2571 : /*********************************************************************************
2572 : * av1_inter_full_cost function is used to estimate the cost of an inter candidate mode
2573 : * for full mode decisoion module in inter frames.
2574 : *
2575 : * @param *cu_ptr(input)
2576 : * cu_ptr is the pointer of the target CU.
2577 : * @param *candidate_buffer_ptr(input)
2578 : * chromaBufferPtr is the buffer pointer of the candidate luma mode.
2579 : * @param qp(input)
2580 : * qp is the quantizer parameter.
2581 : * @param luma_distortion (input)
2582 : * luma_distortion is the inter condidate luma distortion.
2583 : * @param lambda(input)
2584 : * lambda is the Lagrange multiplier
2585 : **********************************************************************************/
2586 31690400 : EbErrorType av1_inter_full_cost(
2587 : PictureControlSet *picture_control_set_ptr,
2588 : ModeDecisionContext *context_ptr,
2589 : struct ModeDecisionCandidateBuffer *candidate_buffer_ptr,
2590 : CodingUnit *cu_ptr,
2591 : uint64_t *y_distortion,
2592 : uint64_t *cb_distortion,
2593 : uint64_t *cr_distortion,
2594 : uint64_t lambda,
2595 : uint64_t *y_coeff_bits,
2596 : uint64_t *cb_coeff_bits,
2597 : uint64_t *cr_coeff_bits,
2598 : BlockSize bsize
2599 : )
2600 : {
2601 31690400 : EbErrorType return_error = EB_ErrorNone;
2602 :
2603 31690400 : if (candidate_buffer_ptr->candidate_ptr->merge_flag == EB_TRUE) {
2604 652242 : Av1MergeSkipFullCost(
2605 : picture_control_set_ptr,
2606 : context_ptr,
2607 : candidate_buffer_ptr,
2608 : cu_ptr,
2609 : y_distortion,
2610 : cb_distortion,
2611 : cr_distortion,
2612 : lambda,
2613 : y_coeff_bits,
2614 : cb_coeff_bits,
2615 : cr_coeff_bits,
2616 : bsize);
2617 : }
2618 : else {
2619 31038200 : Av1FullCost(
2620 : picture_control_set_ptr,
2621 : context_ptr,
2622 : candidate_buffer_ptr,
2623 : cu_ptr,
2624 : y_distortion,
2625 : cb_distortion,
2626 : cr_distortion,
2627 : lambda,
2628 : y_coeff_bits,
2629 : cb_coeff_bits,
2630 : cr_coeff_bits,
2631 : bsize);
2632 : }
2633 31696300 : return return_error;
2634 : }
2635 :
2636 : /************************************************************
2637 : * Coding Loop Context Generation
2638 : ************************************************************/
2639 811343 : void coding_loop_context_generation(
2640 : ModeDecisionContext *context_ptr,
2641 : CodingUnit *cu_ptr,
2642 : uint32_t cu_origin_x,
2643 : uint32_t cu_origin_y,
2644 : uint32_t sb_sz,
2645 : NeighborArrayUnit *skip_coeff_neighbor_array,
2646 : NeighborArrayUnit *inter_pred_dir_neighbor_array,
2647 : NeighborArrayUnit *ref_frame_type_neighbor_array,
2648 : NeighborArrayUnit *intra_luma_mode_neighbor_array,
2649 : NeighborArrayUnit *skip_flag_neighbor_array,
2650 : NeighborArrayUnit *mode_type_neighbor_array,
2651 : NeighborArrayUnit *leaf_depth_neighbor_array,
2652 : NeighborArrayUnit *leaf_partition_neighbor_array)
2653 : {
2654 : (void)sb_sz;
2655 : UNUSED(ref_frame_type_neighbor_array);
2656 811343 : uint32_t modeTypeLeftNeighborIndex = get_neighbor_array_unit_left_index(
2657 : mode_type_neighbor_array,
2658 : cu_origin_y);
2659 811342 : uint32_t modeTypeTopNeighborIndex = get_neighbor_array_unit_top_index(
2660 : mode_type_neighbor_array,
2661 : cu_origin_x);
2662 811361 : uint32_t leafDepthLeftNeighborIndex = get_neighbor_array_unit_left_index(
2663 : leaf_depth_neighbor_array,
2664 : cu_origin_y);
2665 811385 : uint32_t leafDepthTopNeighborIndex = get_neighbor_array_unit_top_index(
2666 : leaf_depth_neighbor_array,
2667 : cu_origin_x);
2668 811397 : uint32_t skipFlagLeftNeighborIndex = get_neighbor_array_unit_left_index(
2669 : skip_flag_neighbor_array,
2670 : cu_origin_y);
2671 811400 : uint32_t skipFlagTopNeighborIndex = get_neighbor_array_unit_top_index(
2672 : skip_flag_neighbor_array,
2673 : cu_origin_x);
2674 811409 : uint32_t intraLumaModeLeftNeighborIndex = get_neighbor_array_unit_left_index(
2675 : intra_luma_mode_neighbor_array,
2676 : cu_origin_y);
2677 811399 : uint32_t intraLumaModeTopNeighborIndex = get_neighbor_array_unit_top_index(
2678 : intra_luma_mode_neighbor_array,
2679 : cu_origin_x);
2680 :
2681 811385 : uint32_t partition_left_neighbor_index = get_neighbor_array_unit_left_index(
2682 : leaf_partition_neighbor_array,
2683 : cu_origin_y);
2684 811381 : uint32_t partition_above_neighbor_index = get_neighbor_array_unit_top_index(
2685 : leaf_partition_neighbor_array,
2686 : cu_origin_x);
2687 :
2688 : // Intra Luma Neighbor Modes
2689 :
2690 736610 : cu_ptr->prediction_unit_array->intra_luma_left_mode = (uint32_t)(
2691 811379 : (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] != INTRA_MODE) ? (uint32_t)DC_PRED :
2692 74769 : intra_luma_mode_neighbor_array->left_array[intraLumaModeLeftNeighborIndex]);
2693 :
2694 737531 : cu_ptr->prediction_unit_array->intra_luma_top_mode = (uint32_t)(
2695 811379 : (mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] != INTRA_MODE) ? (uint32_t)DC_PRED :
2696 73848 : intra_luma_mode_neighbor_array->top_array[intraLumaModeTopNeighborIndex]);
2697 :
2698 : int32_t contextIndex;
2699 811379 : if (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] != (uint8_t)INVALID_MODE && mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] != (uint8_t)INVALID_MODE) {
2700 1420890 : contextIndex = (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] == (uint8_t)INTRA_MODE && mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] == (uint8_t)INTRA_MODE) ? 3 :
2701 676253 : (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] == (uint8_t)INTRA_MODE || mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] == (uint8_t)INTRA_MODE) ? 1 : 0;
2702 : }
2703 66740 : else if (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] != (uint8_t)INVALID_MODE)
2704 35458 : contextIndex = (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] == (uint8_t)INTRA_MODE) ? 2 : 0;
2705 31282 : else if (mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] != (uint8_t)INVALID_MODE)
2706 30046 : contextIndex = (mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] == (uint8_t)INTRA_MODE) ? 2 : 0;
2707 : else
2708 1236 : contextIndex = 0;
2709 811379 : cu_ptr->is_inter_ctx = contextIndex;
2710 : // if(cu_ptr->is_inter_ctx!=0) //
2711 : // printf("ctx:%i \n",cu_ptr->is_inter_ctx);
2712 :
2713 : // Top Intra Mode Neighbor Array instead of a Full
2714 : // Skip Flag Context
2715 811379 : cu_ptr->skip_flag_context =
2716 1591470 : (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] == (uint8_t)INVALID_MODE) ? 0 :
2717 780094 : (skip_flag_neighbor_array->left_array[skipFlagLeftNeighborIndex] == EB_TRUE) ? 1 : 0;
2718 1622760 : cu_ptr->skip_flag_context +=
2719 1586070 : (mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] == (uint8_t)INVALID_MODE) ? 0 :
2720 774688 : (skip_flag_neighbor_array->top_array[skipFlagTopNeighborIndex] == EB_TRUE) ? 1 : 0;
2721 :
2722 : // Split Flag Context (neighbor info)
2723 736609 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].left_neighbor_mode = (uint32_t)(
2724 811379 : (mode_type_neighbor_array->left_array[modeTypeLeftNeighborIndex] != INTRA_MODE) ? (uint32_t)DC_PRED :
2725 74770 : intra_luma_mode_neighbor_array->left_array[intraLumaModeLeftNeighborIndex]);
2726 811379 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].left_neighbor_depth = leaf_depth_neighbor_array->left_array[leafDepthLeftNeighborIndex];
2727 737530 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].top_neighbor_mode = (uint32_t)(
2728 811379 : (mode_type_neighbor_array->top_array[modeTypeTopNeighborIndex] != INTRA_MODE) ? (uint32_t)DC_PRED :
2729 73849 : intra_luma_mode_neighbor_array->top_array[intraLumaModeTopNeighborIndex]);
2730 811379 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].top_neighbor_depth = leaf_depth_neighbor_array->top_array[leafDepthTopNeighborIndex];
2731 :
2732 : // Generate Partition context
2733 811379 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].above_neighbor_partition = (((PartitionContext*)leaf_partition_neighbor_array->top_array)[partition_above_neighbor_index].above == (int8_t)INVALID_NEIGHBOR_DATA) ?
2734 774706 : 0 : ((PartitionContext*)leaf_partition_neighbor_array->top_array)[partition_above_neighbor_index].above;
2735 :
2736 811379 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].left_neighbor_partition = (((PartitionContext*)leaf_partition_neighbor_array->left_array)[partition_left_neighbor_index].left == (int8_t)INVALID_NEIGHBOR_DATA) ?
2737 780116 : 0 : ((PartitionContext*)leaf_partition_neighbor_array->left_array)[partition_left_neighbor_index].left;
2738 : // Skip Coeff AV1 Context
2739 811379 : uint32_t skipCoeffLeftNeighborIndex = get_neighbor_array_unit_left_index(
2740 : skip_coeff_neighbor_array,
2741 : cu_origin_y);
2742 811400 : uint32_t skipCoeffTopNeighborIndex = get_neighbor_array_unit_top_index(
2743 : skip_coeff_neighbor_array,
2744 : cu_origin_x);
2745 :
2746 811395 : cu_ptr->skip_coeff_context =
2747 811395 : (skip_coeff_neighbor_array->left_array[skipCoeffLeftNeighborIndex] == (uint8_t)INVALID_NEIGHBOR_DATA) ? 0 :
2748 0 : (skip_coeff_neighbor_array->left_array[skipCoeffLeftNeighborIndex]) ? 1 : 0;
2749 :
2750 1622790 : cu_ptr->skip_coeff_context +=
2751 811395 : (skip_coeff_neighbor_array->top_array[skipCoeffTopNeighborIndex] == (uint8_t)INVALID_NEIGHBOR_DATA) ? 0 :
2752 0 : (skip_coeff_neighbor_array->top_array[skipCoeffTopNeighborIndex]) ? 1 : 0;
2753 : // Generate reference mode context
2754 :
2755 811395 : cu_ptr->reference_mode_context = (uint8_t)eb_av1_get_reference_mode_context(
2756 : cu_origin_x,
2757 : cu_origin_y,
2758 : mode_type_neighbor_array,
2759 : inter_pred_dir_neighbor_array);
2760 :
2761 811366 : cu_ptr->compoud_reference_type_context = (uint8_t)eb_av1_get_comp_reference_type_context(
2762 : cu_origin_x,
2763 : cu_origin_y,
2764 : mode_type_neighbor_array,
2765 : inter_pred_dir_neighbor_array);
2766 :
2767 : //Collect Neighbor ref cout
2768 811366 : av1_collect_neighbors_ref_counts_new(cu_ptr->av1xd);
2769 :
2770 811355 : return;
2771 : }
2772 :
2773 : /********************************************
2774 : * tu_calc_cost
2775 : * computes TU Cost and generetes TU Cbf
2776 : ********************************************/
2777 17194900 : EbErrorType av1_tu_calc_cost(
2778 : ModeDecisionCandidate *candidate_ptr, // input parameter, prediction result Ptr
2779 : int16_t txb_skip_ctx,
2780 : uint32_t tu_index, // input parameter, TU index inside the CU
2781 : uint32_t y_count_non_zero_coeffs, // input parameter, number of non zero Y quantized coefficients
2782 : uint32_t cb_count_non_zero_coeffs, // input parameter, number of non zero cb quantized coefficients
2783 : uint32_t cr_count_non_zero_coeffs, // input parameter, number of non zero cr quantized coefficients
2784 : uint64_t y_tu_distortion[DIST_CALC_TOTAL], // input parameter, Y distortion for both Normal and Cbf zero modes
2785 : uint64_t cb_tu_distortion[DIST_CALC_TOTAL], // input parameter, Cb distortion for both Normal and Cbf zero modes
2786 : uint64_t cr_tu_distortion[DIST_CALC_TOTAL], // input parameter, Cr distortion for both Normal and Cbf zero modes
2787 : COMPONENT_TYPE component_type,
2788 : uint64_t *y_tu_coeff_bits, // input parameter, Y quantized coefficients rate
2789 : uint64_t *cb_tu_coeff_bits, // input parameter, Cb quantized coefficients rate
2790 : uint64_t *cr_tu_coeff_bits, // input parameter, Cr quantized coefficients rate
2791 : TxSize txsize,
2792 : uint64_t lambda) // input parameter, lambda for Luma
2793 :
2794 : {
2795 : (void)cr_tu_coeff_bits;
2796 : (void)cb_tu_coeff_bits;
2797 : (void)cr_tu_distortion;
2798 : (void)cb_tu_distortion;
2799 17194900 : EbErrorType return_error = EB_ErrorNone;
2800 : // Non Zero coeff mode variables
2801 17194900 : uint64_t y_nonzero_coeff_distortion = y_tu_distortion[DIST_CALC_RESIDUAL];
2802 : uint64_t y_nonzero_coeff_rate;
2803 :
2804 17194900 : uint64_t y_nonzero_coeff_cost = 0;
2805 :
2806 : // Zero Cbf mode variables
2807 17194900 : uint64_t y_zero_coeff_distortion = y_tu_distortion[DIST_CALC_PREDICTION];
2808 :
2809 17194900 : uint64_t y_zero_coeff_luma_flag_bits_num = 0;
2810 :
2811 : uint64_t y_zero_coeff_rate;
2812 :
2813 17194900 : uint64_t y_zero_coeff_cost = 0;
2814 17194900 : if (component_type == COMPONENT_LUMA || component_type == COMPONENT_ALL) {
2815 : // Non Zero Distortion
2816 : // *Note - As of Oct 2011, the JCT-VC uses the PSNR forumula
2817 : // PSNR = (LUMA_WEIGHT * PSNRy + PSNRu + PSNRv) / (2+LUMA_WEIGHT)
2818 0 : y_nonzero_coeff_distortion = LUMA_WEIGHT * (y_nonzero_coeff_distortion << AV1_COST_PRECISION);
2819 :
2820 : // Zero distortion
2821 : // *Note - As of Oct 2011, the JCT-VC uses the PSNR forumula
2822 : // PSNR = (LUMA_WEIGHT * PSNRy + PSNRu + PSNRv) / (2+LUMA_WEIGHT)
2823 0 : y_zero_coeff_distortion = LUMA_WEIGHT * (y_zero_coeff_distortion << AV1_COST_PRECISION);
2824 :
2825 : // **Compute Rate
2826 :
2827 : // Esimate Cbf's Bits
2828 :
2829 0 : const TxSize txs_ctx = (TxSize)((txsize_sqr_map[txsize] + txsize_sqr_up_map[txsize] + 1) >> 1);
2830 0 : assert(txs_ctx < TX_SIZES);
2831 0 : const LvMapCoeffCost *const coeff_costs = &candidate_ptr->md_rate_estimation_ptr->coeff_fac_bits[txs_ctx][0];
2832 :
2833 0 : y_zero_coeff_luma_flag_bits_num = coeff_costs->txb_skip_cost[txb_skip_ctx][1];
2834 :
2835 0 : y_nonzero_coeff_rate = *y_tu_coeff_bits; // yNonZeroCbfLumaFlagBitsNum is already calculated inside y_tu_coeff_bits
2836 :
2837 0 : y_zero_coeff_rate = y_zero_coeff_luma_flag_bits_num;
2838 :
2839 : if (1)
2840 0 : y_zero_coeff_cost = 0xFFFFFFFFFFFFFFFFull;
2841 : else
2842 : y_zero_coeff_cost = RDCOST(lambda, y_zero_coeff_rate, y_zero_coeff_distortion);
2843 : // **Compute Cost
2844 0 : y_nonzero_coeff_cost = RDCOST(lambda, y_nonzero_coeff_rate, y_nonzero_coeff_distortion);
2845 :
2846 0 : candidate_ptr->y_has_coeff |= (((y_count_non_zero_coeffs != 0) && (y_nonzero_coeff_cost < y_zero_coeff_cost)) << tu_index);
2847 0 : *y_tu_coeff_bits = (y_nonzero_coeff_cost < y_zero_coeff_cost) ? *y_tu_coeff_bits : 0;
2848 0 : y_tu_distortion[DIST_CALC_RESIDUAL] = (y_nonzero_coeff_cost < y_zero_coeff_cost) ? y_tu_distortion[DIST_CALC_RESIDUAL] : y_tu_distortion[DIST_CALC_PREDICTION];
2849 : }
2850 17194900 : if (component_type == COMPONENT_CHROMA_CB || component_type == COMPONENT_CHROMA || component_type == COMPONENT_ALL)
2851 12494100 : candidate_ptr->u_has_coeff |= ((cb_count_non_zero_coeffs != 0) << tu_index);
2852 17194900 : if (component_type == COMPONENT_CHROMA_CR || component_type == COMPONENT_CHROMA || component_type == COMPONENT_ALL)
2853 12131400 : candidate_ptr->v_has_coeff |= ((cr_count_non_zero_coeffs != 0) << tu_index);
2854 17194900 : return return_error;
2855 : }
2856 :
2857 : /********************************************
2858 : * tu_calc_cost
2859 : * computes TU Cost and generetes TU Cbf
2860 : ********************************************/
2861 :
2862 48202900 : EbErrorType av1_tu_calc_cost_luma(
2863 : int16_t txb_skip_ctx,
2864 : ModeDecisionCandidate *candidate_ptr, // input parameter, prediction result Ptr
2865 : uint32_t tu_index, // input parameter, TU index inside the CU
2866 : TxSize tx_size,
2867 : uint32_t y_count_non_zero_coeffs, // input parameter, number of non zero Y quantized coefficients
2868 : uint64_t y_tu_distortion[DIST_CALC_TOTAL], // input parameter, Y distortion for both Normal and Cbf zero modes
2869 : uint64_t *y_tu_coeff_bits, // input parameter, Y quantized coefficients rate
2870 : uint64_t *y_full_cost,
2871 : uint64_t lambda) // input parameter, lambda for Luma
2872 :
2873 : {
2874 48202900 : EbErrorType return_error = EB_ErrorNone;
2875 :
2876 : // Non Zero Cbf mode variables
2877 48202900 : uint64_t yNonZeroCbfDistortion = y_tu_distortion[DIST_CALC_RESIDUAL];
2878 :
2879 : uint64_t yNonZeroCbfRate;
2880 :
2881 48202900 : uint64_t yNonZeroCbfCost = 0;
2882 :
2883 : // Zero Cbf mode variables
2884 48202900 : uint64_t yZeroCbfDistortion = y_tu_distortion[DIST_CALC_PREDICTION];
2885 :
2886 48202900 : uint64_t yZeroCbfLumaFlagBitsNum = 0;
2887 :
2888 : uint64_t yZeroCbfRate;
2889 :
2890 48202900 : uint64_t yZeroCbfCost = 0;
2891 :
2892 : // **Compute distortion
2893 : // Non Zero Distortion
2894 : // *Note - As of Oct 2011, the JCT-VC uses the PSNR forumula
2895 : // PSNR = (LUMA_WEIGHT * PSNRy + PSNRu + PSNRv) / (2+LUMA_WEIGHT)
2896 48202900 : yNonZeroCbfDistortion = LUMA_WEIGHT * (yNonZeroCbfDistortion << AV1_COST_PRECISION);
2897 :
2898 : // Zero distortion
2899 : // *Note - As of Oct 2011, the JCT-VC uses the PSNR forumula
2900 : // PSNR = (LUMA_WEIGHT * PSNRy + PSNRu + PSNRv) / (2+LUMA_WEIGHT)
2901 48202900 : yZeroCbfDistortion = LUMA_WEIGHT * (yZeroCbfDistortion << AV1_COST_PRECISION);
2902 :
2903 : // **Compute Rate
2904 :
2905 : // Esimate Cbf's Bits
2906 :
2907 48202900 : const TxSize txs_ctx = (TxSize)((txsize_sqr_map[tx_size] + txsize_sqr_up_map[tx_size] + 1) >> 1);
2908 48202900 : assert(txs_ctx < TX_SIZES);
2909 48202900 : const LvMapCoeffCost *const coeff_costs = &candidate_ptr->md_rate_estimation_ptr->coeff_fac_bits[txs_ctx][0];
2910 :
2911 48202900 : yZeroCbfLumaFlagBitsNum = coeff_costs->txb_skip_cost[txb_skip_ctx][1];
2912 :
2913 48202900 : yNonZeroCbfRate = *y_tu_coeff_bits; // yNonZeroCbfLumaFlagBitsNum is already calculated inside y_tu_coeff_bits
2914 :
2915 48202900 : yZeroCbfRate = yZeroCbfLumaFlagBitsNum;
2916 :
2917 : if (1)
2918 48202900 : yZeroCbfCost = 0xFFFFFFFFFFFFFFFFull;
2919 : else
2920 : yZeroCbfCost = RDCOST(lambda, yZeroCbfRate, yZeroCbfDistortion);
2921 : // **Compute Cost
2922 48202900 : yNonZeroCbfCost = RDCOST(lambda, yNonZeroCbfRate, yNonZeroCbfDistortion);
2923 48202900 : candidate_ptr->y_has_coeff |= ((y_count_non_zero_coeffs != 0) << tu_index);
2924 48202900 : *y_tu_coeff_bits = (yNonZeroCbfCost < yZeroCbfCost) ? *y_tu_coeff_bits : 0;
2925 48202900 : y_tu_distortion[DIST_CALC_RESIDUAL] = (yNonZeroCbfCost < yZeroCbfCost) ? y_tu_distortion[DIST_CALC_RESIDUAL] : y_tu_distortion[DIST_CALC_PREDICTION];
2926 :
2927 48202900 : *y_full_cost = MIN(yNonZeroCbfCost, yZeroCbfCost);
2928 :
2929 48202900 : return return_error;
2930 : }
2931 :
2932 : //static INLINE int32_t partition_plane_context(const MacroBlockD *xd, int32_t mi_row,
2933 : // int32_t mi_col, BlockSize bsize) {
2934 : // const PartitionContextType *above_ctx = xd->above_seg_context + mi_col;
2935 : // const PartitionContextType *left_ctx =
2936 : // xd->left_seg_context + (mi_row & MAX_MIB_MASK);
2937 : // // Minimum partition point is 8x8. Offset the bsl accordingly.
2938 : // const int32_t bsl = mi_size_wide_log2[bsize] - mi_size_wide_log2[BLOCK_8X8];
2939 : // int32_t above = (*above_ctx >> bsl) & 1, left = (*left_ctx >> bsl) & 1;
2940 : //
2941 : // assert(mi_size_wide_log2[bsize] == mi_size_high_log2[bsize]);
2942 : // assert(bsl >= 0);
2943 : //
2944 : // return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
2945 : //}
2946 :
2947 : /*********************************************************************************
2948 : * split_flag_rate function is used to generate the Split rate
2949 : *
2950 : * @param *cu_ptr(input)
2951 : * cu_ptr is the pointer of the target CU.
2952 : * @param split_flag(input)
2953 : * split_flag is the split flag value.
2954 : * @param split_rate(output)
2955 : * split_rate contains rate.
2956 : * @param lambda(input)
2957 : * lambda is the Lagrange multiplier
2958 : * @param md_rate_estimation_ptr(input)
2959 : * md_rate_estimation_ptr is pointer to MD rate Estimation Tables
2960 : **********************************************************************************/
2961 1427770 : EbErrorType av1_split_flag_rate(
2962 : SequenceControlSet *sequence_control_set_ptr,
2963 : ModeDecisionContext *context_ptr,
2964 : CodingUnit *cu_ptr,
2965 : uint32_t leaf_index,
2966 : PartitionType partitionType,
2967 : uint64_t *split_rate,
2968 : uint64_t lambda,
2969 : MdRateEstimationContext *md_rate_estimation_ptr,
2970 : uint32_t tb_max_depth)
2971 : {
2972 : (void)tb_max_depth;
2973 : (void)leaf_index;
2974 :
2975 1427770 : const BlockGeom *blk_geom = get_blk_geom_mds(cu_ptr->mds_idx);
2976 1427680 : EbErrorType return_error = EB_ErrorNone;
2977 :
2978 1427680 : uint32_t cu_origin_x = context_ptr->sb_origin_x + blk_geom->origin_x;
2979 1427680 : uint32_t cu_origin_y = context_ptr->sb_origin_y + blk_geom->origin_y;
2980 :
2981 1427680 : PartitionType p = partitionType;
2982 :
2983 1427680 : uint32_t cu_depth = blk_geom->depth;
2984 : UNUSED(cu_depth);
2985 1427680 : BlockSize bsize = blk_geom->bsize;
2986 1427680 : assert(bsize<BlockSizeS_ALL);
2987 1427670 : const int32_t is_partition_point = blk_geom->bsize >= BLOCK_8X8;
2988 :
2989 1427670 : if (is_partition_point) {
2990 1427670 : const int32_t hbs = (mi_size_wide[bsize] << 2) >> 1;
2991 1427670 : const int32_t hasRows = (cu_origin_y + hbs) < sequence_control_set_ptr->seq_header.max_frame_height;
2992 1427670 : const int32_t hasCols = (cu_origin_x + hbs) < sequence_control_set_ptr->seq_header.max_frame_width;
2993 :
2994 1427670 : uint32_t contextIndex = 0;
2995 :
2996 1427670 : const PartitionContextType left_ctx = context_ptr->md_local_cu_unit[cu_ptr->mds_idx].left_neighbor_partition == (int8_t)(INVALID_NEIGHBOR_DATA) ? 0 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].left_neighbor_partition;
2997 1427670 : const PartitionContextType above_ctx = context_ptr->md_local_cu_unit[cu_ptr->mds_idx].above_neighbor_partition == (int8_t)(INVALID_NEIGHBOR_DATA) ? 0 : context_ptr->md_local_cu_unit[cu_ptr->mds_idx].above_neighbor_partition;
2998 :
2999 1427670 : const int32_t bsl = mi_size_wide_log2[bsize] - mi_size_wide_log2[BLOCK_8X8];
3000 :
3001 1427670 : int32_t above = (above_ctx >> bsl) & 1, left = (left_ctx >> bsl) & 1;
3002 :
3003 1427670 : assert(mi_size_wide_log2[bsize] == mi_size_high_log2[bsize]);
3004 1427670 : assert(bsl >= 0);
3005 :
3006 1427670 : contextIndex = (left * 2 + above) + bsl * PARTITION_PLOFFSET;
3007 :
3008 1427670 : if (hasRows && hasCols) {
3009 1294220 : *split_rate = (uint64_t)md_rate_estimation_ptr->partition_fac_bits[contextIndex][partitionType];
3010 :
3011 : }
3012 133452 : else if (!hasRows && hasCols) {
3013 133560 : *split_rate = (uint64_t)md_rate_estimation_ptr->partition_fac_bits[2][p == PARTITION_SPLIT];
3014 :
3015 : }
3016 : else {
3017 0 : *split_rate = (uint64_t)md_rate_estimation_ptr->partition_fac_bits[2][p == PARTITION_SPLIT];
3018 :
3019 : }
3020 : }
3021 : else
3022 0 : *split_rate = (uint64_t)md_rate_estimation_ptr->partition_fac_bits[0][partitionType];
3023 1427670 : *split_rate = RDCOST(lambda, *split_rate, 0);
3024 :
3025 1427670 : return return_error;
3026 : }
3027 :
3028 : /********************************************
3029 : * tu_calc_cost
3030 : * Computes TU Cost and generetes TU Cbf
3031 : * at the level of the encode pass
3032 : ********************************************/
3033 20940 : EbErrorType av1_encode_tu_calc_cost(
3034 : EncDecContext *context_ptr,
3035 : uint32_t *count_non_zero_coeffs,
3036 : uint64_t y_tu_distortion[DIST_CALC_TOTAL],
3037 : uint64_t *y_tu_coeff_bits,
3038 : uint32_t component_mask
3039 : )
3040 : {
3041 20940 : CodingUnit *cu_ptr = context_ptr->cu_ptr;
3042 20940 : uint32_t tu_index = context_ptr->txb_itr;
3043 20940 : MdRateEstimationContext *md_rate_estimation_ptr = context_ptr->md_rate_estimation_ptr;
3044 20940 : uint64_t lambda = context_ptr->full_lambda;
3045 20940 : uint32_t y_count_non_zero_coeffs = count_non_zero_coeffs[0];
3046 20940 : uint32_t cb_count_non_zero_coeffs = count_non_zero_coeffs[1];
3047 20940 : uint32_t cr_count_non_zero_coeffs = count_non_zero_coeffs[2];
3048 :
3049 20940 : EbErrorType return_error = EB_ErrorNone;
3050 :
3051 : // Non Zero Cbf mode variables
3052 20940 : uint64_t yNonZeroCbfDistortion = y_tu_distortion[DIST_CALC_RESIDUAL];
3053 :
3054 : uint64_t yNonZeroCbfRate;
3055 :
3056 20940 : uint64_t yNonZeroCbfCost = 0;
3057 :
3058 : // Zero Cbf mode variables
3059 20940 : uint64_t yZeroCbfDistortion = y_tu_distortion[DIST_CALC_PREDICTION];
3060 :
3061 20940 : uint64_t yZeroCbfLumaFlagBitsNum = 0;
3062 :
3063 : uint64_t yZeroCbfRate;
3064 :
3065 20940 : uint64_t yZeroCbfCost = 0;
3066 20940 : int16_t txb_skip_ctx = context_ptr->md_context->luma_txb_skip_context;
3067 : // **Compute distortion
3068 20940 : if (component_mask == PICTURE_BUFFER_DESC_LUMA_MASK || component_mask == PICTURE_BUFFER_DESC_FULL_MASK) {
3069 : // Non Zero Distortion
3070 : // *Note - As of Oct 2011, the JCT-VC uses the PSNR forumula
3071 : // PSNR = (LUMA_WEIGHT * PSNRy + PSNRu + PSNRv) / (2+LUMA_WEIGHT)
3072 20940 : yNonZeroCbfDistortion = LUMA_WEIGHT * (yNonZeroCbfDistortion << AV1_COST_PRECISION);
3073 :
3074 : // Zero distortion
3075 : // *Note - As of Oct 2011, the JCT-VC uses the PSNR forumula
3076 : // PSNR = (LUMA_WEIGHT * PSNRy + PSNRu + PSNRv) / (2+LUMA_WEIGHT)
3077 20940 : yZeroCbfDistortion = LUMA_WEIGHT * (yZeroCbfDistortion << AV1_COST_PRECISION);
3078 20940 : TxSize txSize = context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr];
3079 20940 : assert(txSize < TX_SIZES_ALL);
3080 :
3081 20940 : const TxSize txs_ctx = (TxSize)((txsize_sqr_map[txSize] + txsize_sqr_up_map[txSize] + 1) >> 1);
3082 20940 : assert(txs_ctx < TX_SIZES);
3083 20940 : const LvMapCoeffCost *const coeff_costs = &md_rate_estimation_ptr->coeff_fac_bits[txs_ctx][0];
3084 :
3085 20940 : yZeroCbfLumaFlagBitsNum = coeff_costs->txb_skip_cost[txb_skip_ctx][1];
3086 :
3087 20940 : yNonZeroCbfRate = *y_tu_coeff_bits; // yNonZeroCbfLumaFlagBitsNum is already calculated inside y_tu_coeff_bits
3088 :
3089 20940 : yZeroCbfRate = yZeroCbfLumaFlagBitsNum;
3090 20940 : TransformUnit *txb_ptr = &cu_ptr->transform_unit_array[context_ptr->txb_itr];
3091 20940 : if (txb_ptr->transform_type[PLANE_TYPE_Y] != DCT_DCT) {
3092 5764 : yZeroCbfCost = 0xFFFFFFFFFFFFFFFFull;
3093 : }
3094 : else
3095 15176 : yZeroCbfCost = RDCOST(lambda, yZeroCbfRate, yZeroCbfDistortion);
3096 : // **Compute Cost
3097 20940 : yNonZeroCbfCost = RDCOST(lambda, yNonZeroCbfRate, yNonZeroCbfDistortion);
3098 20940 : cu_ptr->transform_unit_array[tu_index].y_has_coeff = ((y_count_non_zero_coeffs != 0) && (yNonZeroCbfCost < yZeroCbfCost)) ? EB_TRUE : EB_FALSE;
3099 20940 : *y_tu_coeff_bits = (yNonZeroCbfCost < yZeroCbfCost) ? *y_tu_coeff_bits : 0;
3100 20940 : y_tu_distortion[DIST_CALC_RESIDUAL] = (yNonZeroCbfCost < yZeroCbfCost) ? y_tu_distortion[DIST_CALC_RESIDUAL] : y_tu_distortion[DIST_CALC_PREDICTION];
3101 : }
3102 : else
3103 0 : cu_ptr->transform_unit_array[tu_index].y_has_coeff = EB_FALSE;
3104 20940 : cu_ptr->transform_unit_array[tu_index].u_has_coeff = cb_count_non_zero_coeffs != 0 ? EB_TRUE : EB_FALSE;
3105 20940 : cu_ptr->transform_unit_array[tu_index].v_has_coeff = cr_count_non_zero_coeffs != 0 ? EB_TRUE : EB_FALSE;
3106 :
3107 20940 : return return_error;
3108 : }
3109 :
3110 0 : uint64_t GetPMCost(
3111 : uint64_t lambda,
3112 : uint64_t tuDistortion,
3113 : uint64_t y_tu_coeff_bits
3114 : )
3115 : {
3116 0 : uint64_t yNonZeroCbfDistortion = LUMA_WEIGHT * (tuDistortion << COST_PRECISION);
3117 0 : uint64_t yNonZeroCbfRate = (y_tu_coeff_bits);
3118 0 : uint64_t yNonZeroCbfCost = yNonZeroCbfDistortion + (((lambda * yNonZeroCbfRate) + MD_OFFSET) >> MD_SHIFT);
3119 :
3120 0 : return yNonZeroCbfCost;
3121 : }
|