Line data Source code
1 : /*
2 : * Copyright(c) 2019 Intel Corporation
3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 : */
5 :
6 : /*
7 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
8 : *
9 : * This source code is subject to the terms of the BSD 2 Clause License and
10 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
11 : * was not distributed with this source code in the LICENSE file, you can
12 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
13 : * Media Patent License 1.0 was not distributed with this source code in the
14 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
15 : */
16 :
17 : #include "EbDefinitions.h"
18 : #include "EbModeDecisionProcess.h"
19 : #include "EbTransforms.h"
20 : #include "EbFullLoop.h"
21 : #include "EbRateDistortionCost.h"
22 : #include "EbCommonUtils.h"
23 : #include "aom_dsp_rtcd.h"
24 :
25 : #ifdef __GNUC__
26 : #define LIKELY(v) __builtin_expect(v, 1)
27 : #define UNLIKELY(v) __builtin_expect(v, 0)
28 : #else
29 : #define LIKELY(v) (v)
30 : #define UNLIKELY(v) (v)
31 : #endif
32 : static PartitionType from_shape_to_part[] = {
33 : PARTITION_NONE,
34 : PARTITION_HORZ,
35 : PARTITION_VERT,
36 : PARTITION_HORZ_A,
37 : PARTITION_HORZ_B,
38 : PARTITION_VERT_A,
39 : PARTITION_VERT_B,
40 : PARTITION_HORZ_4,
41 : PARTITION_VERT_4,
42 : PARTITION_SPLIT
43 : };
44 0 : void quantize_b_helper_c_II(const TranLow *coeff_ptr, intptr_t n_coeffs,
45 : int32_t skip_block, const int16_t *zbin_ptr,
46 : const int16_t *round_ptr, const int16_t *quant_ptr,
47 : const int16_t *quant_shift_ptr, TranLow *qcoeff_ptr,
48 : TranLow *dqcoeff_ptr, const int16_t *dequant_ptr,
49 : uint16_t *eob_ptr, const int16_t *scan,
50 : const int16_t *iscan, const QmVal *qm_ptr,
51 : const QmVal *iqm_ptr, const int32_t log_scale) {
52 0 : const int32_t zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
53 0 : ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
54 0 : const int32_t nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
55 0 : int32_t i, non_zero_count = (int32_t)n_coeffs, eob = -1;
56 : (void)iscan;
57 :
58 0 : memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
59 0 : memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
60 :
61 0 : if (!skip_block) {
62 : // Pre-scan pass
63 0 : for (i = (int32_t)n_coeffs - 1; i >= 0; i--) {
64 0 : const int32_t rc = scan[i];
65 0 : const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
66 0 : const int32_t coeff = coeff_ptr[rc] * wt;
67 :
68 0 : if (coeff < (zbins[rc != 0] * (1 << AOM_QM_BITS)) &&
69 0 : coeff >(nzbins[rc != 0] * (1 << AOM_QM_BITS)))
70 0 : non_zero_count--;
71 : else
72 : break;
73 : }
74 :
75 : // Quantization pass: All coefficients with index >= zero_flag are
76 : // skippable. Note: zero_flag can be zero.
77 0 : for (i = 0; i < non_zero_count; i++) {
78 0 : const int32_t rc = scan[i];
79 0 : const int32_t coeff = coeff_ptr[rc];
80 0 : const int32_t coeff_sign = (coeff >> 31);
81 0 : const int32_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
82 : int32_t tmp32;
83 :
84 0 : const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
85 0 : if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
86 0 : int64_t tmp =
87 0 : clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
88 : INT16_MIN, INT16_MAX);
89 0 : tmp *= wt;
90 0 : tmp32 = (int32_t)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
91 0 : quant_shift_ptr[rc != 0]) >>
92 0 : (16 - log_scale + AOM_QM_BITS)); // quantization
93 0 : qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
94 0 : const int32_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
95 0 : const int32_t dequant =
96 0 : (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
97 : AOM_QM_BITS;
98 0 : const TranLow abs_dqcoeff = (tmp32 * dequant) >> log_scale;
99 0 : dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
100 :
101 0 : if (tmp32) eob = i;
102 : }
103 : }
104 : }
105 0 : *eob_ptr = (uint16_t)(eob + 1);
106 0 : }
107 0 : void eb_aom_quantize_b_c_II(const TranLow *coeff_ptr, intptr_t n_coeffs,
108 : int32_t skip_block, const int16_t *zbin_ptr,
109 : const int16_t *round_ptr, const int16_t *quant_ptr,
110 : const int16_t *quant_shift_ptr, TranLow *qcoeff_ptr,
111 : TranLow *dqcoeff_ptr, const int16_t *dequant_ptr,
112 : uint16_t *eob_ptr, const int16_t *scan,
113 : const int16_t *iscan) {
114 0 : quantize_b_helper_c_II(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
115 : quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
116 : dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 0);
117 0 : }
118 :
119 0 : void eb_aom_quantize_b_32x32_c_II(const TranLow *coeff_ptr, intptr_t n_coeffs,
120 : int32_t skip_block, const int16_t *zbin_ptr,
121 : const int16_t *round_ptr, const int16_t *quant_ptr,
122 : const int16_t *quant_shift_ptr,
123 : TranLow *qcoeff_ptr, TranLow *dqcoeff_ptr,
124 : const int16_t *dequant_ptr, uint16_t *eob_ptr,
125 : const int16_t *scan, const int16_t *iscan) {
126 0 : quantize_b_helper_c_II(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
127 : quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
128 : dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 1);
129 0 : }
130 :
131 0 : void eb_aom_quantize_b_64x64_c_II(const TranLow *coeff_ptr, intptr_t n_coeffs,
132 : int32_t skip_block, const int16_t *zbin_ptr,
133 : const int16_t *round_ptr, const int16_t *quant_ptr,
134 : const int16_t *quant_shift_ptr,
135 : TranLow *qcoeff_ptr, TranLow *dqcoeff_ptr,
136 : const int16_t *dequant_ptr, uint16_t *eob_ptr,
137 : const int16_t *scan, const int16_t *iscan) {
138 0 : quantize_b_helper_c_II(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
139 : quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
140 : dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 2);
141 0 : }
142 :
143 0 : void eb_quantize_b_helper_c(
144 : const TranLow *coeff_ptr,
145 : int32_t stride,
146 : #
147 : int32_t width,
148 : int32_t height,
149 : intptr_t n_coeffs,
150 : int32_t skip_block,
151 : const int16_t *zbin_ptr,
152 : const int16_t *round_ptr,
153 : const int16_t *quant_ptr,
154 : const int16_t *quant_shift_ptr,
155 : TranLow *qcoeff_ptr,
156 : TranLow *dqcoeff_ptr,
157 : const int16_t *dequant_ptr,
158 : uint16_t *eob_ptr,
159 : const int16_t *scan,
160 : const int16_t *iscan,
161 : const QmVal *qm_ptr,
162 : const QmVal *iqm_ptr,
163 : const int32_t log_scale)
164 : {
165 0 : const int32_t zbins[2] = {
166 0 : ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
167 0 : ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale)
168 : };
169 0 : const int32_t nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
170 0 : int32_t i, non_zero_count = (int32_t)n_coeffs, eob = -1;
171 : (void)iscan;
172 :
173 : // Nader quantisation
174 0 : for (int32_t x = 0; x < height; x++) {
175 0 : memset(qcoeff_ptr + (x * stride), 0, width /*n_coeffs*/ * sizeof(*qcoeff_ptr));
176 0 : memset(dqcoeff_ptr + (x * stride), 0, width /*n_coeffs*/ * sizeof(*dqcoeff_ptr));
177 : }
178 :
179 0 : if (!skip_block) {
180 : // Pre-scan pass
181 0 : for (i = (int32_t)n_coeffs - 1; i >= 0; i--) {
182 0 : const int32_t mapRc = scan[i];
183 :
184 0 : const int32_t rc = ((mapRc / MIN(32, height)) * stride) + (mapRc % MIN(32, width));
185 :
186 0 : const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
187 0 : const int32_t coeff = coeff_ptr[rc] * wt;
188 :
189 : ////if (mapRc != NewTab[rc])
190 : //printf("%d\n", coeff);
191 :
192 0 : if (coeff < (zbins[rc != 0] * (1 << AOM_QM_BITS)) &&
193 0 : coeff >(nzbins[rc != 0] * (1 << AOM_QM_BITS)))
194 0 : non_zero_count--;
195 : else
196 : break;
197 : }
198 : // Quantization pass: All coefficients with index >= zero_flag are
199 : // skippable. Note: zero_flag can be zero.
200 0 : for (i = 0; i < non_zero_count; i++) {
201 0 : const int32_t mapRc = scan[i];
202 :
203 0 : const int32_t rc = ((mapRc / MIN(32, height)) * stride) + (mapRc % MIN(32, width));
204 0 : const int32_t coeff = coeff_ptr[rc];
205 0 : const int32_t coeff_sign = (coeff >> 31);
206 0 : const int32_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
207 : int32_t tmp32;
208 :
209 0 : const QmVal wt = qm_ptr != NULL ? qm_ptr[mapRc] : (1 << AOM_QM_BITS);
210 :
211 0 : if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
212 0 : int64_t tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale), INT16_MIN, INT16_MAX);
213 :
214 0 : tmp *= wt;
215 :
216 0 : tmp32 = (int32_t)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) * quant_shift_ptr[rc != 0]) >> (16 - log_scale + AOM_QM_BITS)); // quantization
217 :
218 0 : qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
219 :
220 0 : const int32_t iwt = iqm_ptr != NULL ? iqm_ptr[mapRc] : (1 << AOM_QM_BITS);
221 :
222 0 : const int32_t dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
223 :
224 0 : dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / (1 << log_scale);
225 :
226 0 : if (tmp32) eob = i;
227 : }
228 : }
229 : }
230 :
231 0 : *eob_ptr = (uint16_t)(eob + 1);
232 0 : }
233 0 : void eb_highbd_quantize_b_helper_c(
234 : const TranLow *coeff_ptr, intptr_t n_coeffs, int32_t skip_block,
235 : const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
236 : const int16_t *quant_shift_ptr, TranLow *qcoeff_ptr,
237 : TranLow *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
238 : const int16_t *scan, const int16_t *iscan, const QmVal *qm_ptr,
239 : const QmVal *iqm_ptr, const int32_t log_scale) {
240 0 : int32_t i, eob = -1;
241 0 : const int32_t zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
242 0 : ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
243 0 : const int32_t nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
244 : int32_t dequant;
245 : int32_t idx_arr[4096];
246 : (void)iscan;
247 0 : int32_t idx = 0;
248 :
249 0 : memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
250 0 : memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
251 :
252 0 : if (!skip_block) {
253 : // Pre-scan pass
254 0 : for (i = 0; i < n_coeffs; i++) {
255 0 : const int32_t rc = scan[i];
256 0 : const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
257 0 : const int32_t coeff = coeff_ptr[rc] * wt;
258 :
259 : // If the coefficient is out of the base ZBIN range, keep it for
260 : // quantization.
261 0 : if (coeff >= (zbins[rc != 0] * (1 << AOM_QM_BITS)) ||
262 0 : coeff <= (nzbins[rc != 0] * (1 << AOM_QM_BITS)))
263 0 : idx_arr[idx++] = i;
264 : }
265 :
266 : // Quantization pass: only process the coefficients selected in
267 : // pre-scan pass. Note: idx can be zero.
268 0 : for (i = 0; i < idx; i++) {
269 0 : const int32_t rc = scan[idx_arr[i]];
270 0 : const int32_t coeff = coeff_ptr[rc];
271 0 : const int32_t coeff_sign = (coeff >> 31);
272 0 : const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
273 0 : const QmVal iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
274 0 : const int32_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
275 0 : const int64_t tmp1 =
276 0 : abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
277 0 : const int64_t tmpw = tmp1 * wt;
278 0 : const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
279 0 : const int32_t abs_qcoeff = (int32_t)((tmp2 * quant_shift_ptr[rc != 0]) >>
280 0 : (16 - log_scale + AOM_QM_BITS));
281 0 : qcoeff_ptr[rc] = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign);
282 0 : dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
283 : AOM_QM_BITS;
284 0 : const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
285 0 : dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
286 0 : if (abs_qcoeff) eob = idx_arr[i];
287 : }
288 : }
289 0 : *eob_ptr = (uint16_t)(eob + 1);
290 0 : }
291 :
292 0 : void eb_aom_highbd_quantize_b_c(const TranLow *coeff_ptr, intptr_t n_coeffs,
293 : int32_t skip_block, const int16_t *zbin_ptr,
294 : const int16_t *round_ptr, const int16_t *quant_ptr,
295 : const int16_t *quant_shift_ptr,
296 : TranLow *qcoeff_ptr, TranLow *dqcoeff_ptr,
297 : const int16_t *dequant_ptr, uint16_t *eob_ptr,
298 : const int16_t *scan, const int16_t *iscan) {
299 0 : eb_highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
300 : round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
301 : dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
302 : NULL, NULL, 0);
303 0 : }
304 :
305 0 : void eb_aom_highbd_quantize_b_32x32_c(
306 : const TranLow *coeff_ptr, intptr_t n_coeffs, int32_t skip_block,
307 : const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
308 : const int16_t *quant_shift_ptr, TranLow *qcoeff_ptr,
309 : TranLow *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
310 : const int16_t *scan, const int16_t *iscan) {
311 0 : eb_highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
312 : round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
313 : dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
314 : NULL, NULL, 1);
315 0 : }
316 :
317 0 : void eb_aom_highbd_quantize_b_64x64_c(
318 : const TranLow *coeff_ptr, intptr_t n_coeffs, int32_t skip_block,
319 : const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
320 : const int16_t *quant_shift_ptr, TranLow *qcoeff_ptr,
321 : TranLow *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
322 : const int16_t *scan, const int16_t *iscan) {
323 0 : eb_highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
324 : round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
325 : dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
326 : NULL, NULL, 2);
327 0 : }
328 :
329 0 : void eb_av1_highbd_quantize_b_facade(const TranLow *coeff_ptr,
330 : intptr_t n_coeffs, const MacroblockPlane *p,
331 : TranLow *qcoeff_ptr,
332 : TranLow *dqcoeff_ptr, uint16_t *eob_ptr,
333 : const ScanOrder *sc,
334 : const QuantParam *qparam) {
335 : // obsolete skip_block
336 0 : const int32_t skip_block = 0;
337 0 : const QmVal *qm_ptr = qparam->qmatrix;
338 0 : const QmVal *iqm_ptr = qparam->iqmatrix;
339 0 : if (qm_ptr != NULL && iqm_ptr != NULL) {
340 0 : eb_highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
341 : p->round_QTX, p->quant_QTX, p->quant_shift_QTX,
342 : qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
343 : sc->scan, sc->iscan, qm_ptr, iqm_ptr,
344 : qparam->log_scale);
345 : }
346 : else {
347 0 : switch (qparam->log_scale) {
348 0 : case 0:
349 0 : if (LIKELY(n_coeffs >= 8)) {
350 0 : eb_aom_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
351 : p->round_QTX, p->quant_QTX, p->quant_shift_QTX,
352 : qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
353 : eob_ptr, sc->scan, sc->iscan);
354 : }
355 : else {
356 0 : eb_aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
357 : p->round_QTX, p->quant_QTX,
358 : p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
359 : p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
360 : }
361 0 : break;
362 0 : case 1:
363 0 : eb_aom_highbd_quantize_b_32x32(
364 : coeff_ptr, n_coeffs, skip_block, p->zbin_QTX, p->round_QTX,
365 : p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
366 : p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
367 0 : break;
368 0 : case 2:
369 0 : eb_aom_highbd_quantize_b_64x64(
370 : coeff_ptr, n_coeffs, skip_block, p->zbin_QTX, p->round_QTX,
371 : p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
372 : p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
373 0 : break;
374 0 : default: assert(0);
375 : }
376 : }
377 0 : }
378 :
379 64414900 : void av1_quantize_b_facade_II(
380 : const TranLow *coeff_ptr,
381 : int32_t stride,
382 : int32_t width,
383 : int32_t height,
384 : intptr_t n_coeffs,
385 : const MacroblockPlane *p,
386 : TranLow *qcoeff_ptr,
387 : TranLow *dqcoeff_ptr,
388 : uint16_t *eob_ptr,
389 : const ScanOrder *sc,
390 : const QuantParam *qparam)
391 : {
392 : // obsolete skip_block
393 64414900 : const int32_t skip_block = 0;
394 64414900 : const QmVal *qm_ptr = qparam->qmatrix;
395 64414900 : const QmVal *iqm_ptr = qparam->iqmatrix;
396 64414900 : if (qm_ptr != NULL && iqm_ptr != NULL) {
397 0 : eb_quantize_b_helper_c(
398 : coeff_ptr,
399 : stride,
400 : width,
401 : height,
402 : n_coeffs,
403 : skip_block,
404 : p->zbin_QTX,
405 : p->round_QTX,
406 : p->quant_QTX,
407 : p->quant_shift_QTX,
408 : qcoeff_ptr,
409 : dqcoeff_ptr,
410 : p->dequant_QTX,
411 : eob_ptr,
412 : sc->scan,
413 : sc->iscan,
414 : qm_ptr,
415 : iqm_ptr,
416 : qparam->log_scale);
417 : }
418 : else {
419 64414900 : switch (qparam->log_scale) {
420 56360000 : case 0:
421 56360000 : eb_aom_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
422 : p->round_QTX, p->quant_QTX, p->quant_shift_QTX,
423 : qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
424 : sc->scan, sc->iscan);
425 :
426 56385000 : break;
427 6721270 : case 1:
428 :
429 6721270 : eb_aom_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
430 : p->round_QTX, p->quant_QTX, p->quant_shift_QTX,
431 : qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
432 : sc->scan, sc->iscan);
433 :
434 6722010 : break;
435 1367350 : case 2:
436 :
437 1367350 : eb_aom_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin_QTX,
438 : p->round_QTX, p->quant_QTX, p->quant_shift_QTX,
439 : qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
440 : sc->scan, sc->iscan);
441 :
442 1367390 : break;
443 0 : default: assert(0);
444 : }
445 : }
446 64474400 : }
447 :
448 :
449 0 : static void quantize_fp_helper_c(
450 : const TranLow *coeff_ptr,
451 : intptr_t n_coeffs,
452 : const int16_t *zbin_ptr,
453 : const int16_t *round_ptr,
454 : const int16_t *quant_ptr,
455 : const int16_t *quant_shift_ptr,
456 : TranLow *qcoeff_ptr,
457 : TranLow *dqcoeff_ptr,
458 : const int16_t *dequant_ptr,
459 : uint16_t *eob_ptr,
460 : const int16_t *scan,
461 : const int16_t *iscan,
462 : const QmVal *qm_ptr,
463 : const QmVal *iqm_ptr,
464 : int log_scale)
465 : {
466 0 : int i, eob = -1;
467 0 : const int rounding[2] = { ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
468 0 : ROUND_POWER_OF_TWO(round_ptr[1], log_scale) };
469 : // TODO(jingning) Decide the need of these arguments after the
470 : // quantization process is completed.
471 : (void)zbin_ptr;
472 : (void)quant_shift_ptr;
473 : (void)iscan;
474 :
475 0 : memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
476 0 : memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
477 :
478 0 : if (qm_ptr == NULL && iqm_ptr == NULL) {
479 0 : for (i = 0; i < n_coeffs; i++) {
480 0 : const int rc = scan[i];
481 0 : const int32_t thresh = (int32_t)(dequant_ptr[rc != 0]);
482 0 : const int coeff = coeff_ptr[rc];
483 0 : const int coeff_sign = (coeff >> 31);
484 0 : int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
485 0 : int tmp32 = 0;
486 0 : if ((abs_coeff << (1 + log_scale)) >= thresh) {
487 : abs_coeff =
488 0 : clamp64(abs_coeff + rounding[rc != 0], INT16_MIN, INT16_MAX);
489 0 : tmp32 = (int)((abs_coeff * quant_ptr[rc != 0]) >> (16 - log_scale));
490 0 : if (tmp32) {
491 0 : qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
492 0 : const TranLow abs_dqcoeff =
493 0 : (tmp32 * dequant_ptr[rc != 0]) >> log_scale;
494 0 : dqcoeff_ptr[rc] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
495 : }
496 : }
497 0 : if (tmp32) eob = i;
498 : }
499 : }
500 : else {
501 : // Quantization pass: All coefficients with index >= zero_flag are
502 : // skippable. Note: zero_flag can be zero.
503 0 : for (i = 0; i < n_coeffs; i++) {
504 0 : const int rc = scan[i];
505 0 : const int coeff = coeff_ptr[rc];
506 0 : const QmVal wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
507 0 : const QmVal iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
508 0 : const int dequant =
509 0 : (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
510 : AOM_QM_BITS;
511 0 : const int coeff_sign = (coeff >> 31);
512 0 : int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
513 0 : int tmp32 = 0;
514 0 : if (abs_coeff * wt >=
515 0 : (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
516 0 : abs_coeff += rounding[rc != 0];
517 0 : abs_coeff = clamp64(abs_coeff, INT16_MIN, INT16_MAX);
518 0 : tmp32 = (int)((abs_coeff * wt * quant_ptr[rc != 0]) >>
519 0 : (16 - log_scale + AOM_QM_BITS));
520 0 : qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
521 0 : const TranLow abs_dqcoeff = (tmp32 * dequant) >> log_scale;
522 0 : dqcoeff_ptr[rc] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
523 : }
524 :
525 0 : if (tmp32) eob = i;
526 : }
527 : }
528 0 : *eob_ptr = eob + 1;
529 0 : }
530 :
531 0 : void eb_av1_quantize_fp_c(const TranLow *coeff_ptr, intptr_t n_coeffs,
532 : const int16_t *zbin_ptr, const int16_t *round_ptr,
533 : const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
534 : TranLow *qcoeff_ptr, TranLow *dqcoeff_ptr,
535 : const int16_t *dequant_ptr, uint16_t *eob_ptr,
536 : const int16_t *scan, const int16_t *iscan) {
537 0 : quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
538 : quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
539 : eob_ptr, scan, iscan, NULL, NULL, 0);
540 0 : }
541 :
542 0 : static void eb_highbd_quantize_fp_helper_c(
543 : const TranLow *coeff_ptr,
544 : intptr_t count,
545 : const int16_t *zbin_ptr,
546 : const int16_t *round_ptr,
547 : const int16_t *quant_ptr,
548 : const int16_t *quant_shift_ptr,
549 : TranLow *qcoeff_ptr,
550 : TranLow *dqcoeff_ptr,
551 : const int16_t *dequant_ptr,
552 : uint16_t *eob_ptr,
553 : const int16_t *scan,
554 : const int16_t *iscan,
555 : const QmVal *qm_ptr,
556 : const QmVal *iqm_ptr,
557 : int16_t log_scale)
558 : {
559 : int i;
560 0 : int eob = -1;
561 0 : const int shift = 16 - log_scale;
562 : // TODO(jingning) Decide the need of these arguments after the
563 : // quantization process is completed.
564 : (void)zbin_ptr;
565 : (void)quant_shift_ptr;
566 : (void)iscan;
567 :
568 0 : if (qm_ptr || iqm_ptr) {
569 : // Quantization pass: All coefficients with index >= zero_flag are
570 : // skippable. Note: zero_flag can be zero.
571 0 : for (i = 0; i < count; i++) {
572 0 : const int rc = scan[i];
573 0 : const int coeff = coeff_ptr[rc];
574 0 : const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
575 0 : const QmVal iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
576 0 : const int dequant =
577 0 : (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
578 : AOM_QM_BITS;
579 0 : const int coeff_sign = (coeff >> 31);
580 0 : const int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
581 0 : int abs_qcoeff = 0;
582 0 : if (abs_coeff * wt >=
583 0 : (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
584 0 : const int64_t tmp =
585 0 : abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
586 0 : abs_qcoeff =
587 0 : (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
588 0 : qcoeff_ptr[rc] = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign);
589 0 : const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
590 0 : dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
591 0 : if (abs_qcoeff) eob = i;
592 : } else {
593 0 : qcoeff_ptr[rc] = 0;
594 0 : dqcoeff_ptr[rc] = 0;
595 : }
596 : }
597 : } else {
598 0 : const int log_scaled_round_arr[2] = {
599 0 : ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
600 0 : ROUND_POWER_OF_TWO(round_ptr[1], log_scale),
601 : };
602 0 : for (i = 0; i < count; i++) {
603 0 : const int rc = scan[i];
604 0 : const int coeff = coeff_ptr[rc];
605 0 : const int rc01 = (rc != 0);
606 0 : const int coeff_sign = (coeff >> 31);
607 0 : const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
608 0 : const int log_scaled_round = log_scaled_round_arr[rc01];
609 0 : if ((abs_coeff << (1 + log_scale)) >= dequant_ptr[rc01]) {
610 0 : const int quant = quant_ptr[rc01];
611 0 : const int dequant = dequant_ptr[rc01];
612 0 : const int64_t tmp = (int64_t)abs_coeff + log_scaled_round;
613 0 : const int abs_qcoeff = (int)((tmp * quant) >> shift);
614 0 : qcoeff_ptr[rc] = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign);
615 0 : const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
616 0 : if (abs_qcoeff) eob = i;
617 0 : dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
618 : } else {
619 0 : qcoeff_ptr[rc] = 0;
620 0 : dqcoeff_ptr[rc] = 0;
621 : }
622 : }
623 : }
624 0 : *eob_ptr = eob + 1;
625 0 : }
626 :
627 0 : static void highbd_quantize_fp_helper_c(
628 : const TranLow *coeff_ptr,
629 : intptr_t count,
630 : const int16_t *zbin_ptr,
631 : const int16_t *round_ptr,
632 : const int16_t *quant_ptr,
633 : const int16_t *quant_shift_ptr,
634 : TranLow *qcoeff_ptr,
635 : TranLow *dqcoeff_ptr,
636 : const int16_t *dequant_ptr,
637 : uint16_t *eob_ptr,
638 : const int16_t *scan,
639 : const int16_t *iscan,
640 : const QmVal *qm_ptr,
641 : const QmVal *iqm_ptr,
642 : int16_t log_scale)
643 : {
644 : int i;
645 0 : int eob = -1;
646 0 : const int shift = 16 - log_scale;
647 : // TODO(jingning) Decide the need of these arguments after the
648 : // quantization process is completed.
649 : (void)zbin_ptr;
650 : (void)quant_shift_ptr;
651 : (void)iscan;
652 :
653 0 : if (qm_ptr || iqm_ptr) {
654 : // Quantization pass: All coefficients with index >= zero_flag are
655 : // skippable. Note: zero_flag can be zero.
656 0 : for (i = 0; i < count; i++) {
657 0 : const int rc = scan[i];
658 0 : const int coeff = coeff_ptr[rc];
659 0 : const QmVal wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
660 0 : const QmVal iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
661 0 : const int dequant =
662 0 : (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
663 : AOM_QM_BITS;
664 0 : const int coeff_sign = (coeff >> 31);
665 0 : const int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
666 0 : int abs_qcoeff = 0;
667 0 : if (abs_coeff * wt >=
668 0 : (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
669 0 : const int64_t tmp =
670 0 : abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
671 0 : abs_qcoeff =
672 0 : (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
673 0 : qcoeff_ptr[rc] = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign);
674 0 : const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
675 0 : dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
676 0 : if (abs_qcoeff) eob = i;
677 : } else {
678 0 : qcoeff_ptr[rc] = 0;
679 0 : dqcoeff_ptr[rc] = 0;
680 : }
681 : }
682 : } else {
683 0 : const int log_scaled_round_arr[2] = {
684 0 : ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
685 0 : ROUND_POWER_OF_TWO(round_ptr[1], log_scale),
686 : };
687 0 : for (i = 0; i < count; i++) {
688 0 : const int rc = scan[i];
689 0 : const int coeff = coeff_ptr[rc];
690 0 : const int rc01 = (rc != 0);
691 0 : const int coeff_sign = (coeff >> 31);
692 0 : const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
693 0 : const int log_scaled_round = log_scaled_round_arr[rc01];
694 0 : if ((abs_coeff << (1 + log_scale)) >= dequant_ptr[rc01]) {
695 0 : const int quant = quant_ptr[rc01];
696 0 : const int dequant = dequant_ptr[rc01];
697 0 : const int64_t tmp = (int64_t)abs_coeff + log_scaled_round;
698 0 : const int abs_qcoeff = (int)((tmp * quant) >> shift);
699 0 : qcoeff_ptr[rc] = (TranLow)((abs_qcoeff ^ coeff_sign) - coeff_sign);
700 0 : const TranLow abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
701 0 : if (abs_qcoeff) eob = i;
702 0 : dqcoeff_ptr[rc] = (TranLow)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
703 : } else {
704 0 : qcoeff_ptr[rc] = 0;
705 0 : dqcoeff_ptr[rc] = 0;
706 : }
707 : }
708 : }
709 0 : *eob_ptr = eob + 1;
710 0 : }
711 :
712 0 : void eb_av1_highbd_quantize_fp_c(
713 : const TranLow *coeff_ptr,
714 : intptr_t count,
715 : const int16_t *zbin_ptr,
716 : const int16_t *round_ptr,
717 : const int16_t *quant_ptr,
718 : const int16_t *quant_shift_ptr,
719 : TranLow *qcoeff_ptr,
720 : TranLow *dqcoeff_ptr,
721 : const int16_t *dequant_ptr,
722 : uint16_t *eob_ptr,
723 : const int16_t *scan,
724 : const int16_t *iscan,
725 : int16_t log_scale)
726 : {
727 0 : highbd_quantize_fp_helper_c(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
728 : quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
729 : dequant_ptr, eob_ptr, scan, iscan, NULL, NULL,
730 : log_scale);
731 0 : }
732 :
733 0 : void eb_av1_quantize_fp_32x32_c(const TranLow *coeff_ptr, intptr_t n_coeffs,
734 : const int16_t *zbin_ptr, const int16_t *round_ptr,
735 : const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
736 : TranLow *qcoeff_ptr, TranLow *dqcoeff_ptr,
737 : const int16_t *dequant_ptr, uint16_t *eob_ptr,
738 : const int16_t *scan, const int16_t *iscan) {
739 0 : quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
740 : quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
741 : eob_ptr, scan, iscan, NULL, NULL, 1);
742 0 : }
743 :
744 0 : void eb_av1_quantize_fp_64x64_c(const TranLow *coeff_ptr, intptr_t n_coeffs,
745 : const int16_t *zbin_ptr, const int16_t *round_ptr,
746 : const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
747 : TranLow *qcoeff_ptr, TranLow *dqcoeff_ptr,
748 : const int16_t *dequant_ptr, uint16_t *eob_ptr,
749 : const int16_t *scan, const int16_t *iscan) {
750 0 : quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
751 : quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
752 : eob_ptr, scan, iscan, NULL, NULL, 2);
753 0 : }
754 :
755 41327800 : void eb_av1_quantize_fp_facade(
756 : const TranLow *coeff_ptr,
757 : intptr_t n_coeffs,
758 : const MacroblockPlane *p,
759 : TranLow *qcoeff_ptr,
760 : TranLow *dqcoeff_ptr,
761 : uint16_t *eob_ptr,
762 : const ScanOrder *sc,
763 : const QuantParam *qparam) {
764 :
765 41327800 : const QmVal *qm_ptr = qparam->qmatrix;
766 41327800 : const QmVal *iqm_ptr = qparam->iqmatrix;
767 :
768 41327800 : if (qm_ptr || iqm_ptr)
769 0 : quantize_fp_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
770 : p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
771 : dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
772 : sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
773 : else {
774 41329400 : switch (qparam->log_scale) {
775 40665700 : case 0:
776 40665700 : eb_av1_quantize_fp(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
777 : p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
778 : dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
779 : sc->iscan);
780 40675500 : break;
781 647908 : case 1:
782 647908 : eb_av1_quantize_fp_32x32(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
783 : p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
784 : dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
785 : sc->iscan);
786 647893 : break;
787 17528 : case 2:
788 17528 : eb_av1_quantize_fp_64x64(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
789 : p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
790 : dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
791 : sc->iscan);
792 17527 : break;
793 0 : default: assert(0);
794 : }
795 : }
796 41340900 : }
797 :
798 0 : void eb_av1_highbd_quantize_fp_facade(
799 : const TranLow *coeff_ptr,
800 : intptr_t n_coeffs,
801 : const MacroblockPlane *p,
802 : TranLow *qcoeff_ptr,
803 : TranLow *dqcoeff_ptr,
804 : uint16_t *eob_ptr,
805 : const ScanOrder *sc,
806 : const QuantParam *qparam)
807 : {
808 0 : const QmVal *qm_ptr = qparam->qmatrix;
809 0 : const QmVal *iqm_ptr = qparam->iqmatrix;
810 0 : if (qm_ptr != NULL && iqm_ptr != NULL) {
811 0 : eb_highbd_quantize_fp_helper_c(
812 : coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, p->quant_fp_QTX,
813 : p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
814 0 : sc->scan, sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
815 : } else {
816 0 : eb_av1_highbd_quantize_fp(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
817 : p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
818 : dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
819 0 : sc->iscan, qparam->log_scale);
820 : }
821 0 : }
822 :
823 : // Hsan: code clean up; from static to extern as now used @ more than 1 file
824 :
825 :
826 : static const int8_t eob_to_pos_small[33] = {
827 : 0, 1, 2, // 0-2
828 : 3, 3, // 3-4
829 : 4, 4, 4, 4, // 5-8
830 : 5, 5, 5, 5, 5, 5, 5, 5, // 9-16
831 : 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 // 17-32
832 : };
833 :
834 : static const int8_t eob_to_pos_large[17] = {
835 : 6, // place holder
836 : 7, // 33-64
837 : 8, 8, // 65-128
838 : 9, 9, 9, 9, // 129-256
839 : 10, 10, 10, 10, 10, 10, 10, 10, // 257-512
840 : 11 // 513-
841 : };
842 :
843 114260000 : static INLINE int32_t get_eob_pos_token(const int32_t eob, int32_t *const extra) {
844 : int32_t t;
845 :
846 114260000 : if (eob < 33)
847 77123300 : t = eob_to_pos_small[eob];
848 : else {
849 37136800 : const int32_t e = AOMMIN((eob - 1) >> 5, 16);
850 37136800 : t = eob_to_pos_large[e];
851 : }
852 :
853 114260000 : *extra = eob - eb_k_eob_group_start[t];
854 :
855 114260000 : return t;
856 : }
857 :
858 35083000 : static INLINE TxSize get_txsize_entropy_ctx(TxSize txsize) {
859 35083000 : return (TxSize)((txsize_sqr_map[txsize] + txsize_sqr_up_map[txsize] + 1) >>
860 : 1);
861 : }
862 35086200 : static INLINE PlaneType get_plane_type(int plane) {
863 35086200 : return (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
864 : }
865 114263000 : static int32_t get_eob_cost(int32_t eob, const LvMapEobCost *txb_eob_costs,
866 : const LvMapCoeffCost *txb_costs, TxType tx_type) {
867 : int32_t eob_extra;
868 114263000 : const int32_t eob_pt = get_eob_pos_token(eob, &eob_extra);
869 114269000 : int32_t eob_cost = 0;
870 114269000 : const int32_t eob_multi_ctx = (tx_type_to_class[tx_type] == TX_CLASS_2D) ? 0 : 1;
871 114269000 : eob_cost = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1];
872 :
873 114269000 : if (eb_k_eob_offset_bits[eob_pt] > 0) {
874 100106000 : const int32_t eob_shift = eb_k_eob_offset_bits[eob_pt] - 1;
875 100106000 : const int32_t bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
876 100106000 : eob_cost += txb_costs->eob_extra_cost[eob_pt][bit];
877 100106000 : const int32_t offset_bits = eb_k_eob_offset_bits[eob_pt];
878 100106000 : if (offset_bits > 1) eob_cost += av1_cost_literal(offset_bits - 1);
879 : }
880 114269000 : return eob_cost;
881 : }
882 :
883 21917800 : static INLINE int get_lower_levels_ctx_general(int is_last, int scan_idx,
884 : int bwl, int height,
885 : const uint8_t *levels,
886 : int coeff_idx, TxSize tx_size,
887 : TxClass tx_class) {
888 21917800 : if (is_last) {
889 1519990 : if (scan_idx == 0) return 0;
890 1011910 : if (scan_idx <= (height << bwl) >> 3) return 1;
891 917076 : if (scan_idx <= (height << bwl) >> 2) return 2;
892 842002 : return 3;
893 : }
894 40797700 : return get_lower_levels_ctx(levels, coeff_idx, bwl, tx_size, tx_class);
895 : }
896 :
897 17214000 : static INLINE int32_t get_golomb_cost(int32_t abs_qc) {
898 17214000 : if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
899 1183580 : const int32_t r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
900 1183580 : const int32_t length = get_msb(r) + 1;
901 1183580 : return av1_cost_literal(2 * length - 1);
902 : }
903 16030400 : return 0;
904 : }
905 17205600 : static INLINE int get_br_cost(TranLow level, const int *coeff_lps) {
906 17205600 : const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
907 17205600 : return coeff_lps[base_range] + get_golomb_cost(level);
908 : }
909 114633000 : static INLINE int get_coeff_cost_general(int is_last, int ci, TranLow abs_qc,
910 : int sign, int coeff_ctx,
911 : int dc_sign_ctx,
912 : const LvMapCoeffCost *txb_costs,
913 : int bwl, TxClass tx_class,
914 : const uint8_t *levels) {
915 114633000 : int cost = 0;
916 114633000 : if (is_last)
917 3039680 : cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
918 : else
919 111593000 : cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
920 114633000 : if (abs_qc != 0) {
921 114755000 : if (ci == 0)
922 33153500 : cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign];
923 : else
924 81601100 : cost += av1_cost_literal(1);
925 114755000 : if (abs_qc > NUM_BASE_LEVELS) {
926 : int br_ctx;
927 14667600 : if (is_last)
928 429348 : br_ctx = get_br_ctx_eob(ci, bwl, tx_class);
929 : else
930 14238200 : br_ctx = get_br_ctx(levels, ci, bwl, (const TxType)tx_class);
931 14669100 : cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
932 : }
933 : }
934 114633000 : return cost;
935 : }
936 633256000 : static INLINE int64_t get_coeff_dist(TranLow tcoeff, TranLow dqcoeff,
937 : int shift) {
938 633256000 : const int64_t diff = (tcoeff - dqcoeff) * (1 << shift);
939 633256000 : const int64_t error = diff * diff;
940 633256000 : return error;
941 : }
942 19001200 : static INLINE void get_qc_dqc_low(TranLow abs_qc, int sign, int dqv,
943 : int shift, TranLow *qc_low,
944 : TranLow *dqc_low) {
945 19001200 : TranLow abs_qc_low = abs_qc - 1;
946 19001200 : *qc_low = (-sign ^ abs_qc_low) + sign;
947 19001200 : assert((sign ? -abs_qc_low : abs_qc_low) == *qc_low);
948 19001200 : TranLow abs_dqc_low = (abs_qc_low * dqv) >> shift;
949 19001200 : *dqc_low = (-sign ^ abs_dqc_low) + sign;
950 19001200 : assert((sign ? -abs_dqc_low : abs_dqc_low) == *dqc_low);
951 19001200 : }
952 : static const int golomb_bits_cost[32] = {
953 : 0, 512, 512 * 3, 512 * 3, 512 * 5, 512 * 5, 512 * 5, 512 * 5,
954 : 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7,
955 : 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9,
956 : 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9
957 : };
958 : static const int golomb_cost_diff[32] = {
959 : 0, 512, 512 * 2, 0, 512 * 2, 0, 0, 0, 512 * 2, 0, 0, 0, 0, 0, 0, 0,
960 : 512 * 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
961 : };
962 44682700 : static INLINE int get_br_cost_with_diff(TranLow level, const int *coeff_lps,
963 : int *diff) {
964 44682700 : const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
965 44682700 : int golomb_bits = 0;
966 44682700 : if (level <= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS)
967 44436500 : *diff += coeff_lps[base_range + COEFF_BASE_RANGE + 1];
968 :
969 44682700 : if (level >= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) {
970 310212 : int r = level - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
971 310212 : if (r < 32) {
972 301475 : golomb_bits = golomb_bits_cost[r];
973 301475 : *diff += golomb_cost_diff[r];
974 : }
975 : else {
976 8737 : golomb_bits = get_golomb_cost(level);
977 8738 : *diff += (r & (r - 1)) == 0 ? 1024 : 0;
978 : }
979 : }
980 :
981 44682700 : return coeff_lps[base_range] + golomb_bits;
982 : }
983 : static AOM_FORCE_INLINE int get_two_coeff_cost_simple(
984 : int ci, TranLow abs_qc, int coeff_ctx,
985 : const LvMapCoeffCost *txb_costs, int bwl, TxClass tx_class,
986 : const uint8_t *levels, int *cost_low) {
987 : // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
988 : // and not the last (scan_idx != eob - 1)
989 0 : assert(ci > 0);
990 : //assert(abs_qc + 4 < 4);
991 295013000 : int cost = txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
992 295013000 : int diff = 0;
993 295013000 : if (abs_qc <= 3) diff = txb_costs->base_cost[coeff_ctx][abs_qc + 4];
994 295013000 : if (abs_qc) {
995 301082000 : cost += av1_cost_literal(1);
996 301082000 : if (abs_qc > NUM_BASE_LEVELS) {
997 44714200 : const int br_ctx = get_br_ctx(levels, ci, bwl, (const TxType)tx_class);
998 44703400 : int brcost_diff = 0;
999 44703400 : cost += get_br_cost_with_diff(abs_qc, txb_costs->lps_cost[br_ctx],
1000 : &brcost_diff);
1001 50494600 : diff += brcost_diff;
1002 : }
1003 : }
1004 300794000 : *cost_low = cost - diff;
1005 :
1006 300794000 : return cost;
1007 : }
1008 120593000 : static INLINE int get_coeff_cost_eob(int ci, TranLow abs_qc, int sign,
1009 : int coeff_ctx, int dc_sign_ctx,
1010 : const LvMapCoeffCost *txb_costs,
1011 : int bwl, TxClass tx_class) {
1012 120593000 : int cost = 0;
1013 120593000 : cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
1014 120593000 : if (abs_qc != 0) {
1015 120628000 : if (ci == 0)
1016 11682200 : cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign];
1017 : else
1018 108945000 : cost += av1_cost_literal(1);
1019 120628000 : if (abs_qc > NUM_BASE_LEVELS) {
1020 : int br_ctx;
1021 2541880 : br_ctx = get_br_ctx_eob(ci, bwl, tx_class);
1022 2541880 : cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
1023 : }
1024 : }
1025 120593000 : return cost;
1026 : }
1027 :
1028 : static AOM_FORCE_INLINE void update_coeff_eob(
1029 : int *accu_rate, int64_t *accu_dist, uint16_t *eob, int *nz_num, int *nz_ci,
1030 : int si, TxSize tx_size, TxClass tx_class, int bwl, int height,
1031 : int dc_sign_ctx, int64_t rdmult, int shift, const int16_t *dequant,
1032 : const int16_t *scan, const LvMapEobCost *txb_eob_costs,
1033 : const LvMapCoeffCost *txb_costs, const TranLow *tcoeff,
1034 : TranLow *qcoeff, TranLow *dqcoeff, uint8_t *levels, int sharpness) {
1035 436296000 : const int dqv = dequant[si != 0];
1036 436296000 : assert(si != *eob - 1);
1037 436296000 : const int ci = scan[si];
1038 436296000 : const TranLow qc = qcoeff[ci];
1039 : const int coeff_ctx =
1040 436296000 : get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
1041 436462000 : if (qc == 0)
1042 358939000 : *accu_rate += txb_costs->base_cost[coeff_ctx][0];
1043 : else {
1044 77523000 : int lower_level = 0;
1045 77523000 : const TranLow abs_qc = abs(qc);
1046 77523000 : const TranLow tqc = tcoeff[ci];
1047 77523000 : const TranLow dqc = dqcoeff[ci];
1048 77523000 : const int sign = (qc < 0) ? 1 : 0;
1049 77523000 : const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
1050 79569000 : int64_t dist = get_coeff_dist(tqc, dqc, shift) - dist0;
1051 : int rate =
1052 79531000 : get_coeff_cost_general(0, ci, abs_qc, sign, coeff_ctx, dc_sign_ctx,
1053 : txb_costs, bwl, tx_class, levels);
1054 79610600 : int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist);
1055 :
1056 : TranLow qc_low, dqc_low;
1057 : TranLow abs_qc_low;
1058 : int64_t dist_low, rd_low;
1059 : int rate_low;
1060 79610600 : if (abs_qc == 1) {
1061 71681900 : abs_qc_low = 0;
1062 71681900 : dqc_low = qc_low = 0;
1063 71681900 : dist_low = 0;
1064 71681900 : rate_low = txb_costs->base_cost[coeff_ctx][0];
1065 71681900 : rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist);
1066 : }
1067 : else {
1068 7928760 : get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
1069 7928900 : abs_qc_low = abs_qc - 1;
1070 7928900 : dist_low = get_coeff_dist(tqc, dqc_low, shift) - dist0;
1071 : rate_low =
1072 7928620 : get_coeff_cost_general(0, ci, abs_qc_low, sign, coeff_ctx,
1073 : dc_sign_ctx, txb_costs, bwl, tx_class, levels);
1074 7928610 : rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low);
1075 : }
1076 :
1077 79610500 : int lower_level_new_eob = 0;
1078 79610500 : const int new_eob = si + 1;
1079 79610500 : const int coeff_ctx_new_eob = get_lower_levels_ctx_eob(bwl, height, si);
1080 : const int new_eob_cost =
1081 79571900 : get_eob_cost(new_eob, txb_eob_costs, txb_costs, (TxType)tx_class);
1082 79635000 : int rate_coeff_eob =
1083 79587900 : new_eob_cost + get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx_new_eob,
1084 : dc_sign_ctx, txb_costs, bwl,
1085 : tx_class);
1086 79635000 : int64_t dist_new_eob = dist;
1087 79635000 : int64_t rd_new_eob = RDCOST(rdmult, rate_coeff_eob, dist_new_eob);
1088 :
1089 79635000 : if (abs_qc_low > 0) {
1090 7928220 : const int rate_coeff_eob_low =
1091 7928640 : new_eob_cost + get_coeff_cost_eob(ci, abs_qc_low, sign,
1092 : coeff_ctx_new_eob, dc_sign_ctx,
1093 : txb_costs, bwl, tx_class);
1094 7928220 : const int64_t dist_new_eob_low = dist_low;
1095 7928220 : const int64_t rd_new_eob_low =
1096 7928220 : RDCOST(rdmult, rate_coeff_eob_low, dist_new_eob_low);
1097 7928220 : if (rd_new_eob_low < rd_new_eob) {
1098 1920400 : lower_level_new_eob = 1;
1099 1920400 : rd_new_eob = rd_new_eob_low;
1100 1920400 : rate_coeff_eob = rate_coeff_eob_low;
1101 1920400 : dist_new_eob = dist_new_eob_low;
1102 : }
1103 : }
1104 :
1105 79634600 : if (rd_low < rd) {
1106 15975400 : lower_level = 1;
1107 15975400 : rd = rd_low;
1108 15975400 : rate = rate_low;
1109 15975400 : dist = dist_low;
1110 : }
1111 :
1112 79634600 : if (sharpness == 0 && rd_new_eob < rd) {
1113 42599100 : for (int ni = 0; ni < *nz_num; ++ni) {
1114 21834900 : int last_ci = nz_ci[ni];
1115 21834900 : levels[get_padded_idx(last_ci, bwl)] = 0;
1116 21834300 : qcoeff[last_ci] = 0;
1117 21834300 : dqcoeff[last_ci] = 0;
1118 : }
1119 20764200 : *eob = new_eob;
1120 20764200 : *nz_num = 0;
1121 20764200 : *accu_rate = rate_coeff_eob;
1122 20764200 : *accu_dist = dist_new_eob;
1123 20764200 : lower_level = lower_level_new_eob;
1124 : }
1125 : else {
1126 58869800 : *accu_rate += rate;
1127 58869800 : *accu_dist += dist;
1128 : }
1129 :
1130 79634000 : if (lower_level) {
1131 11388900 : qcoeff[ci] = qc_low;
1132 11388900 : dqcoeff[ci] = dqc_low;
1133 11388900 : levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
1134 : }
1135 79633900 : if (qcoeff[ci]) {
1136 69316700 : nz_ci[*nz_num] = ci;
1137 69316700 : ++*nz_num;
1138 : }
1139 : }
1140 438573000 : }
1141 21918300 : static INLINE void update_coeff_general(
1142 : int *accu_rate,
1143 : int64_t *accu_dist,
1144 : int si,
1145 : int eob,
1146 : TxSize tx_size,
1147 : TxClass tx_class,
1148 : int bwl,
1149 : int height,
1150 : int64_t rdmult,
1151 : int shift,
1152 : int dc_sign_ctx,
1153 : const int16_t *dequant,
1154 : const int16_t *scan,
1155 : const LvMapCoeffCost *txb_costs,
1156 : const TranLow *tcoeff,
1157 : TranLow *qcoeff,
1158 : TranLow *dqcoeff,
1159 : uint8_t *levels) {
1160 21918300 : const int dqv = dequant[si != 0];
1161 21918300 : const int ci = scan[si];
1162 21918300 : const TranLow qc = qcoeff[ci];
1163 21918300 : const int is_last = si == (eob - 1);
1164 21918300 : const int coeff_ctx = get_lower_levels_ctx_general(
1165 : is_last, si, bwl, height, levels, ci, tx_size, tx_class);
1166 21917500 : if (qc == 0)
1167 5433260 : *accu_rate += txb_costs->base_cost[coeff_ctx][0];
1168 : else {
1169 16484200 : const int sign = (qc < 0) ? 1 : 0;
1170 16484200 : const TranLow abs_qc = abs(qc);
1171 16484200 : const TranLow tqc = tcoeff[ci];
1172 16484200 : const TranLow dqc = dqcoeff[ci];
1173 16484200 : const int64_t dist = get_coeff_dist(tqc, dqc, shift);
1174 16488200 : const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
1175 : const int rate =
1176 16483300 : get_coeff_cost_general(is_last, ci, abs_qc, sign, coeff_ctx,
1177 : dc_sign_ctx, txb_costs, bwl, tx_class, levels);
1178 16489600 : const int64_t rd = RDCOST(rdmult, rate, dist);
1179 :
1180 : TranLow qc_low, dqc_low;
1181 : TranLow abs_qc_low;
1182 : int64_t dist_low, rd_low;
1183 : int rate_low;
1184 16489600 : if (abs_qc == 1) {
1185 5407650 : abs_qc_low = qc_low = dqc_low = 0;
1186 5407650 : dist_low = dist0;
1187 5407650 : rate_low = txb_costs->base_cost[coeff_ctx][0];
1188 : }
1189 : else {
1190 11081900 : get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
1191 11082400 : abs_qc_low = abs_qc - 1;
1192 11082400 : dist_low = get_coeff_dist(tqc, dqc_low, shift);
1193 : rate_low =
1194 11081400 : get_coeff_cost_general(is_last, ci, abs_qc_low, sign, coeff_ctx,
1195 : dc_sign_ctx, txb_costs, bwl, tx_class, levels);
1196 : }
1197 :
1198 16483800 : rd_low = RDCOST(rdmult, rate_low, dist_low);
1199 16483800 : if (rd_low < rd) {
1200 898181 : qcoeff[ci] = qc_low;
1201 898181 : dqcoeff[ci] = dqc_low;
1202 898181 : levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
1203 898184 : *accu_rate += rate_low;
1204 898184 : *accu_dist += dist_low - dist0;
1205 : }
1206 : else {
1207 15585600 : *accu_rate += rate;
1208 15585600 : *accu_dist += dist - dist0;
1209 : }
1210 : }
1211 21917000 : }
1212 :
1213 : static AOM_FORCE_INLINE void update_coeff_simple(
1214 : int *accu_rate,
1215 : int si,
1216 : int eob,
1217 : TxSize tx_size,
1218 : TxClass tx_class,
1219 : int bwl,
1220 : int64_t rdmult,
1221 : int shift,
1222 : const int16_t *dequant,
1223 : const int16_t *scan,
1224 : const LvMapCoeffCost *txb_costs,
1225 : const TranLow *tcoeff,
1226 : TranLow *qcoeff,
1227 : TranLow *dqcoeff,
1228 : uint8_t *levels) {
1229 695711000 : const int dqv = dequant[1];
1230 : (void)eob;
1231 : // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
1232 : // and not the last (scan_idx != eob - 1)
1233 0 : assert(si != eob - 1);
1234 695711000 : assert(si > 0);
1235 695711000 : const int ci = scan[si];
1236 695711000 : const TranLow qc = qcoeff[ci];
1237 : const int coeff_ctx =
1238 695711000 : get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
1239 695361000 : if (qc == 0)
1240 400347000 : *accu_rate += txb_costs->base_cost[coeff_ctx][0];
1241 : else {
1242 295013000 : const TranLow abs_qc = abs(qc);
1243 295013000 : const TranLow abs_tqc = abs(tcoeff[ci]);
1244 295013000 : const TranLow abs_dqc = abs(dqcoeff[ci]);
1245 295013000 : int rate_low = 0;
1246 300794000 : const int rate = get_two_coeff_cost_simple(
1247 : ci, abs_qc, coeff_ctx, txb_costs, bwl, tx_class, levels, &rate_low);
1248 300794000 : if (abs_dqc < abs_tqc) {
1249 120084000 : *accu_rate += rate;
1250 120084000 : return;
1251 : }
1252 :
1253 180710000 : const int64_t dist = get_coeff_dist(abs_tqc, abs_dqc, shift);
1254 181313000 : const int64_t rd = RDCOST(rdmult, rate, dist);
1255 :
1256 181313000 : const TranLow abs_qc_low = abs_qc - 1;
1257 181313000 : const TranLow abs_dqc_low = (abs_qc_low * dqv) >> shift;
1258 181313000 : const int64_t dist_low = get_coeff_dist(abs_tqc, abs_dqc_low, shift);
1259 181382000 : const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low);
1260 :
1261 181382000 : if (rd_low < rd) {
1262 18445500 : const int sign = (qc < 0) ? 1 : 0;
1263 18445500 : qcoeff[ci] = (-sign ^ abs_qc_low) + sign;
1264 18445500 : dqcoeff[ci] = (-sign ^ abs_dqc_low) + sign;
1265 18445500 : levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
1266 18445200 : *accu_rate += rate_low;
1267 : }
1268 : else
1269 162936000 : *accu_rate += rate;
1270 : }
1271 : }
1272 12986100 : static INLINE void update_skip(int *accu_rate, int64_t accu_dist, uint16_t *eob,
1273 : int nz_num, int *nz_ci, int64_t rdmult,
1274 : int skip_cost, int non_skip_cost,
1275 : TranLow *qcoeff, TranLow *dqcoeff,
1276 : int sharpness) {
1277 12986100 : const int64_t rd = RDCOST(rdmult, *accu_rate + non_skip_cost, accu_dist);
1278 12986100 : const int64_t rd_new_eob = RDCOST(rdmult, skip_cost, 0);
1279 12986100 : if (sharpness == 0 && rd_new_eob < rd) {
1280 8048640 : for (int i = 0; i < nz_num; ++i) {
1281 4147310 : const int ci = nz_ci[i];
1282 4147310 : qcoeff[ci] = 0;
1283 4147310 : dqcoeff[ci] = 0;
1284 : // no need to set up levels because this is the last step
1285 : // levels[get_padded_idx(ci, bwl)] = 0;
1286 : }
1287 3901340 : *accu_rate = 0;
1288 3901340 : *eob = 0;
1289 : }
1290 12986100 : }
1291 : enum {
1292 : NO_AQ = 0,
1293 : VARIANCE_AQ = 1,
1294 : COMPLEXITY_AQ = 2,
1295 : CYCLIC_REFRESH_AQ = 3,
1296 : AQ_MODE_COUNT // This should always be the last member of the enum
1297 : } UENUM1BYTE(AQ_MODE);
1298 : enum {
1299 : NO_DELTA_Q = 0,
1300 : DELTA_Q_ONLY = 1,
1301 : DELTA_Q_LF = 2,
1302 : DELTAQ_MODE_COUNT // This should always be the last member of the enum
1303 : } UENUM1BYTE(DELTAQ_MODE);
1304 :
1305 : // These numbers are empirically obtained.
1306 : static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
1307 : { 17, 13 },
1308 : { 16, 10 },
1309 : };
1310 :
1311 35094100 : void eb_av1_optimize_b(
1312 : ModeDecisionContext *md_context,
1313 : int16_t txb_skip_context,
1314 : int16_t dc_sign_context,
1315 : const TranLow *coeff_ptr,
1316 : int32_t stride,
1317 : intptr_t n_coeffs,
1318 : const MacroblockPlane *p,
1319 : TranLow *qcoeff_ptr,
1320 : TranLow *dqcoeff_ptr,
1321 : uint16_t *eob,
1322 : const ScanOrder *sc,
1323 : const QuantParam *qparam,
1324 : TxSize tx_size,
1325 : TxType tx_type,
1326 : EbBool is_inter,
1327 : uint32_t bit_increment,
1328 : int plane)
1329 :
1330 : {
1331 : (void)stride;
1332 : (void)n_coeffs;
1333 : (void)sc;
1334 : (void)qparam;
1335 : (void)bit_increment;
1336 :
1337 : // Hsan (Trellis): hardcoded as not supported:
1338 35094100 : int sharpness = 0; // No Sharpness
1339 35094100 : int fast_mode = 0; // TBD
1340 35094100 : AQ_MODE aq_mode = NO_AQ;
1341 35094100 : DELTAQ_MODE deltaq_mode = NO_DELTA_Q;
1342 35094100 : int8_t segment_id = 0;
1343 35094100 : int sb_energy_level = 0;
1344 35094100 : const ScanOrder *const scan_order = &av1_scan_orders[tx_size][tx_type];
1345 35094100 : const int16_t *scan = scan_order->scan;
1346 35094100 : const int shift = av1_get_tx_scale(tx_size);
1347 35087500 : const PlaneType plane_type = get_plane_type(plane);
1348 35084000 : const TxSize txs_ctx = get_txsize_entropy_ctx(tx_size);
1349 35072400 : const TxClass tx_class = tx_type_to_class[tx_type];
1350 35072400 : const int bwl = get_txb_bwl(tx_size);
1351 35058800 : const int width = get_txb_wide(tx_size);
1352 35037100 : const int height = get_txb_high(tx_size);
1353 35019100 : assert(width == (1 << bwl));
1354 35019100 : assert(txs_ctx < TX_SIZES);
1355 35019100 : const LvMapCoeffCost *txb_costs = &md_context->md_rate_estimation_ptr->coeff_fac_bits[txs_ctx][plane_type];
1356 35019100 : const int eob_multi_size = txsize_log2_minus4[tx_size];
1357 35019100 : const LvMapEobCost *txb_eob_costs = &md_context->md_rate_estimation_ptr->eob_frac_bits[eob_multi_size][plane_type];
1358 35019100 : const int rshift =
1359 35019100 : (sharpness +
1360 0 : (aq_mode == VARIANCE_AQ && segment_id < 4
1361 0 : ? 7 - segment_id
1362 35019100 : : 2) +
1363 35021100 : (aq_mode != VARIANCE_AQ &&
1364 0 : deltaq_mode > NO_DELTA_Q && sb_energy_level < 0
1365 : ? (3 - sb_energy_level)
1366 70040300 : : 0));
1367 35019100 : const int64_t rdmult =
1368 35019100 : (((int64_t)md_context->full_lambda *
1369 35019100 : plane_rd_mult[is_inter][plane_type]) +
1370 : 2) >>
1371 : rshift;
1372 : uint8_t levels_buf[TX_PAD_2D];
1373 35019100 : uint8_t *const levels = set_levels(levels_buf, width);
1374 :
1375 35020900 : if (*eob > 1) eb_av1_txb_init_levels(qcoeff_ptr, width, height, levels);
1376 : // TODO(angirbird): check iqmatrix
1377 35038600 : const int non_skip_cost = txb_costs->txb_skip_cost[txb_skip_context][0];
1378 35038600 : const int skip_cost = txb_costs->txb_skip_cost[txb_skip_context][1];
1379 35038600 : const int eob_cost = get_eob_cost(*eob, txb_eob_costs, txb_costs, (TxType)tx_class);
1380 35059100 : int accu_rate = eob_cost;
1381 :
1382 35059100 : int64_t accu_dist = 0;
1383 35059100 : int si = *eob - 1;
1384 35059100 : const int ci = scan[si];
1385 35059100 : const TranLow qc = qcoeff_ptr[ci];
1386 35059100 : const TranLow abs_qc = abs(qc);
1387 35059100 : const int sign = qc < 0;
1388 35059100 : const int max_nz_num = 2;
1389 35059100 : int nz_num = 1;
1390 35059100 : int nz_ci[3] = { ci, 0, 0 };
1391 :
1392 35059100 : if (abs_qc >= 2) {
1393 1512150 : update_coeff_general(
1394 : &accu_rate,
1395 : &accu_dist,
1396 : si,
1397 1512150 : *eob,
1398 : tx_size,
1399 : tx_class,
1400 : bwl,
1401 : height,
1402 : rdmult,
1403 : shift,
1404 : dc_sign_context,
1405 : p->dequant_QTX,
1406 : scan,
1407 : txb_costs,
1408 : coeff_ptr,
1409 : qcoeff_ptr,
1410 : dqcoeff_ptr,
1411 : levels);
1412 1519940 : --si;
1413 : }
1414 : else {
1415 33546900 : assert(abs_qc == 1);
1416 33546900 : const int coeff_ctx = get_lower_levels_ctx_eob(bwl, height, si);
1417 33543700 : accu_rate += get_coeff_cost_eob(
1418 : ci,
1419 : abs_qc,
1420 : sign,
1421 : coeff_ctx,
1422 : dc_sign_context,
1423 : txb_costs,
1424 : bwl,
1425 : tx_class);
1426 :
1427 33541300 : const TranLow tqc = coeff_ptr[ci];
1428 33541300 : const TranLow dqc = dqcoeff_ptr[ci];
1429 33541300 : const int64_t dist = get_coeff_dist(tqc, dqc, shift);
1430 33539100 : const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
1431 33538000 : accu_dist += dist - dist0;
1432 33538000 : --si;
1433 : }
1434 :
1435 : #define UPDATE_COEFF_EOB_CASE(tx_class_literal) \
1436 : case tx_class_literal: \
1437 : for (; si >= 0 && nz_num <= max_nz_num && !fast_mode; --si) { \
1438 : update_coeff_eob(&accu_rate, &accu_dist, eob, &nz_num, nz_ci, si, \
1439 : tx_size, tx_class_literal, bwl, height, \
1440 : dc_sign_context, rdmult, shift, p->dequant_QTX, scan, \
1441 : txb_eob_costs, txb_costs, coeff_ptr, qcoeff_ptr, dqcoeff_ptr, \
1442 : levels, sharpness); \
1443 : } \
1444 : break;
1445 35058000 : switch (tx_class) {
1446 405244000 : UPDATE_COEFF_EOB_CASE(TX_CLASS_2D);
1447 34552400 : UPDATE_COEFF_EOB_CASE(TX_CLASS_HORIZ);
1448 33835000 : UPDATE_COEFF_EOB_CASE(TX_CLASS_VERT);
1449 : #undef UPDATE_COEFF_EOB_CASE
1450 0 : default: assert(false);
1451 : }
1452 :
1453 37335700 : if (si == -1 && nz_num <= max_nz_num) {
1454 12986200 : update_skip(&accu_rate, accu_dist, eob, nz_num, nz_ci, rdmult, skip_cost,
1455 : non_skip_cost, qcoeff_ptr, dqcoeff_ptr, sharpness);
1456 : }
1457 :
1458 : #define UPDATE_COEFF_SIMPLE_CASE(tx_class_literal) \
1459 : case tx_class_literal: \
1460 : for (; si >= 1; --si) { \
1461 : update_coeff_simple(&accu_rate, si, *eob, tx_size, tx_class_literal, bwl, \
1462 : rdmult, shift, p->dequant_QTX, scan, txb_costs, coeff_ptr, \
1463 : qcoeff_ptr, dqcoeff_ptr, levels); \
1464 : } \
1465 : break;
1466 35079600 : switch (tx_class) {
1467 633522000 : UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_2D);
1468 48488400 : UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_HORIZ);
1469 54882600 : UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_VERT);
1470 : #undef UPDATE_COEFF_SIMPLE_CASE
1471 0 : default: assert(false);
1472 : }
1473 :
1474 : // DC position
1475 41181900 : if (si == 0) {
1476 : // no need to update accu_dist because it's not used after this point
1477 20394100 : int64_t dummy_dist = 0;
1478 20394100 : update_coeff_general(&accu_rate, &dummy_dist, si, *eob, tx_size, tx_class,
1479 : bwl, height, rdmult, shift, dc_sign_context,
1480 : p->dequant_QTX, scan, txb_costs, coeff_ptr, qcoeff_ptr, dqcoeff_ptr,
1481 : levels);
1482 : }
1483 41178100 : }
1484 :
1485 102074000 : static INLINE void set_dc_sign(int32_t *cul_level, int32_t dc_val) {
1486 102074000 : if (dc_val < 0)
1487 14532500 : *cul_level |= 1 << COEFF_CONTEXT_BITS;
1488 87541900 : else if (dc_val > 0)
1489 17956500 : *cul_level += 2 << COEFF_CONTEXT_BITS;
1490 102074000 : }
1491 102058000 : int32_t av1_quantize_inv_quantize(
1492 : PictureControlSet *picture_control_set_ptr,
1493 : ModeDecisionContext *md_context,
1494 : int32_t *coeff,
1495 : const uint32_t coeff_stride,
1496 : int32_t *quant_coeff,
1497 : int32_t *recon_coeff,
1498 : uint32_t qp,
1499 : int32_t segmentation_qp_offset,
1500 : uint32_t width,
1501 : uint32_t height,
1502 : TxSize txsize,
1503 : uint16_t *eob,
1504 : uint32_t *count_non_zero_coeffs,
1505 :
1506 : uint32_t component_type,
1507 : uint32_t bit_increment,
1508 : TxType tx_type,
1509 : ModeDecisionCandidateBuffer *candidate_buffer,
1510 : int16_t txb_skip_context, // Hsan (Trellis): derived @ MD (what about re-generating @ EP ?)
1511 : int16_t dc_sign_context, // Hsan (Trellis): derived @ MD (what about re-generating @ EP ?)
1512 : PredictionMode pred_mode,
1513 : EbBool is_intra_bc,
1514 : EbBool is_encode_pass)
1515 : {
1516 : (void)candidate_buffer;
1517 : (void)is_encode_pass;
1518 : (void)coeff_stride;
1519 : #if !ADD_DELTA_QP_SUPPORT
1520 : (void) qp;
1521 : #endif
1522 : MacroblockPlane candidate_plane ;
1523 :
1524 102058000 : const QmVal *qMatrix = picture_control_set_ptr->parent_pcs_ptr->gqmatrix[NUM_QM_LEVELS - 1][0][txsize];
1525 102058000 : const QmVal *iqMatrix = picture_control_set_ptr->parent_pcs_ptr->giqmatrix[NUM_QM_LEVELS - 1][0][txsize];
1526 : #if ADD_DELTA_QP_SUPPORT
1527 102058000 : uint32_t qIndex = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.delta_q_params.delta_q_present ? quantizer_to_qindex[qp] : picture_control_set_ptr->parent_pcs_ptr->frm_hdr.quantization_params.base_q_idx + segmentation_qp_offset;
1528 : #else
1529 : uint32_t qIndex = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.quantization_params.base_q_idx + segmentation_qp_offset ;
1530 : #endif
1531 102058000 : if (bit_increment == 0) {
1532 102092000 : if (component_type == COMPONENT_LUMA) {
1533 77502500 : candidate_plane.quant_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_quant[qIndex];
1534 77502500 : candidate_plane.quant_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_quant_fp[qIndex];
1535 77502500 : candidate_plane.round_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_round_fp[qIndex];
1536 77502500 : candidate_plane.quant_shift_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_quant_shift[qIndex];
1537 77502500 : candidate_plane.zbin_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_zbin[qIndex];
1538 77502500 : candidate_plane.round_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.y_round[qIndex];
1539 77502500 : candidate_plane.dequant_QTX = picture_control_set_ptr->parent_pcs_ptr->deqMd.y_dequant_QTX[qIndex];
1540 : }
1541 :
1542 102092000 : if (component_type == COMPONENT_CHROMA_CB) {
1543 12527400 : candidate_plane.quant_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_quant[qIndex];
1544 12527400 : candidate_plane.quant_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_quant_fp[qIndex];
1545 12527400 : candidate_plane.round_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_round_fp[qIndex];
1546 12527400 : candidate_plane.quant_shift_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_quant_shift[qIndex];
1547 12527400 : candidate_plane.zbin_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_zbin[qIndex];
1548 12527400 : candidate_plane.round_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.u_round[qIndex];
1549 12527400 : candidate_plane.dequant_QTX = picture_control_set_ptr->parent_pcs_ptr->deqMd.u_dequant_QTX[qIndex];
1550 : }
1551 :
1552 102092000 : if (component_type == COMPONENT_CHROMA_CR) {
1553 12163300 : candidate_plane.quant_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_quant[qIndex];
1554 12163300 : candidate_plane.quant_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_quant_fp[qIndex];
1555 12163300 : candidate_plane.round_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_round_fp[qIndex];
1556 12163300 : candidate_plane.quant_shift_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_quant_shift[qIndex];
1557 12163300 : candidate_plane.zbin_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_zbin[qIndex];
1558 12163300 : candidate_plane.round_QTX = picture_control_set_ptr->parent_pcs_ptr->quantsMd.v_round[qIndex];
1559 12163300 : candidate_plane.dequant_QTX = picture_control_set_ptr->parent_pcs_ptr->deqMd.v_dequant_QTX[qIndex];
1560 : }
1561 : }
1562 : else {
1563 0 : if (component_type == COMPONENT_LUMA) {
1564 0 : candidate_plane.quant_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.y_quant[qIndex];
1565 0 : candidate_plane.quant_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.y_quant_fp[qIndex];
1566 0 : candidate_plane.round_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.y_round_fp[qIndex];
1567 0 : candidate_plane.quant_shift_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.y_quant_shift[qIndex];
1568 0 : candidate_plane.zbin_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.y_zbin[qIndex];
1569 0 : candidate_plane.round_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.y_round[qIndex];
1570 0 : candidate_plane.dequant_QTX = picture_control_set_ptr->parent_pcs_ptr->deq.y_dequant_QTX[qIndex];
1571 : }
1572 :
1573 0 : if (component_type == COMPONENT_CHROMA_CB) {
1574 0 : candidate_plane.quant_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.u_quant[qIndex];
1575 0 : candidate_plane.quant_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.u_quant_fp[qIndex];
1576 0 : candidate_plane.round_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.u_round_fp[qIndex];
1577 0 : candidate_plane.quant_shift_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.u_quant_shift[qIndex];
1578 0 : candidate_plane.zbin_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.u_zbin[qIndex];
1579 0 : candidate_plane.round_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.u_round[qIndex];
1580 0 : candidate_plane.dequant_QTX = picture_control_set_ptr->parent_pcs_ptr->deq.u_dequant_QTX[qIndex];
1581 : }
1582 :
1583 0 : if (component_type == COMPONENT_CHROMA_CR) {
1584 0 : candidate_plane.quant_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.v_quant[qIndex];
1585 0 : candidate_plane.quant_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.v_quant_fp[qIndex];
1586 0 : candidate_plane.round_fp_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.v_round_fp[qIndex];
1587 0 : candidate_plane.quant_shift_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.v_quant_shift[qIndex];
1588 0 : candidate_plane.zbin_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.v_zbin[qIndex];
1589 0 : candidate_plane.round_QTX = picture_control_set_ptr->parent_pcs_ptr->quants.v_round[qIndex];
1590 0 : candidate_plane.dequant_QTX = picture_control_set_ptr->parent_pcs_ptr->deq.v_dequant_QTX[qIndex];
1591 : }
1592 : }
1593 :
1594 102058000 : const ScanOrder *const scan_order = &av1_scan_orders[txsize][tx_type]; //get_scan(tx_size, tx_type);
1595 :
1596 102058000 : const int32_t n_coeffs = av1_get_max_eob(txsize);
1597 :
1598 : QuantParam qparam;
1599 :
1600 102064000 : qparam.log_scale = av1_get_tx_scale(txsize);
1601 102159000 : qparam.tx_size = txsize;
1602 102159000 : qparam.qmatrix = qMatrix;
1603 102159000 : qparam.iqmatrix = iqMatrix;
1604 :
1605 :
1606 102159000 : EbBool is_inter = (pred_mode >= NEARESTMV);
1607 : #if RDOQ_CHROMA
1608 102159000 : EbBool perform_rdoq = ((md_context->md_staging_skip_rdoq == EB_FALSE || is_encode_pass) && md_context->trellis_quant_coeff_optimization && !is_intra_bc);
1609 : #else
1610 : EbBool perform_rdoq = ((md_context->md_staging_skip_rdoq == EB_FALSE || is_encode_pass) && md_context->trellis_quant_coeff_optimization && component_type == COMPONENT_LUMA && !is_intra_bc);
1611 : #endif
1612 :
1613 102159000 : SequenceControlSet *sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
1614 102159000 : perform_rdoq = perform_rdoq && (EbBool) sequence_control_set_ptr->static_config.enable_rdoq;
1615 102159000 : if (sequence_control_set_ptr->static_config.encoder_bit_depth > 8
1616 0 : && picture_control_set_ptr->hbd_mode_decision==0 )
1617 0 : perform_rdoq = EB_FALSE;
1618 :
1619 : // Hsan: set to FALSE until adding x86 quantize_fp
1620 102159000 : EbBool perform_quantize_fp = picture_control_set_ptr->enc_mode == ENC_M0 ? EB_TRUE: EB_FALSE;
1621 :
1622 102159000 : if (perform_rdoq && perform_quantize_fp && !is_inter) {
1623 41328300 : if (bit_increment) {
1624 0 : eb_av1_highbd_quantize_fp_facade(
1625 : (TranLow*)coeff,
1626 : n_coeffs,
1627 : &candidate_plane,
1628 : quant_coeff,
1629 : (TranLow*)recon_coeff,
1630 : eob,
1631 : scan_order,
1632 : &qparam);
1633 : } else {
1634 41328300 : eb_av1_quantize_fp_facade(
1635 : (TranLow*)coeff,
1636 : n_coeffs,
1637 : &candidate_plane,
1638 : quant_coeff,
1639 : (TranLow*)recon_coeff,
1640 : eob,
1641 : scan_order,
1642 : &qparam);
1643 : }
1644 : } else {
1645 60830600 : if (bit_increment) {
1646 0 : eb_av1_highbd_quantize_b_facade(
1647 : (TranLow*)coeff,
1648 : n_coeffs,
1649 : &candidate_plane,
1650 : quant_coeff,
1651 : (TranLow*)recon_coeff,
1652 : eob,
1653 : scan_order,
1654 : &qparam);
1655 : } else {
1656 60830600 : av1_quantize_b_facade_II(
1657 : (TranLow*)coeff,
1658 : coeff_stride,
1659 : width,
1660 : height,
1661 : n_coeffs,
1662 : &candidate_plane,
1663 : quant_coeff,
1664 : (TranLow*)recon_coeff,
1665 : eob,
1666 : scan_order,
1667 : &qparam);
1668 : }
1669 : }
1670 :
1671 102186000 : if (perform_rdoq && *eob != 0) {
1672 :
1673 : // Perform Trellis
1674 35073000 : if (*eob != 0) {
1675 35073400 : eb_av1_optimize_b(
1676 : md_context,
1677 : txb_skip_context,
1678 : dc_sign_context,
1679 : (TranLow*)coeff,
1680 : coeff_stride,
1681 : n_coeffs,
1682 : &candidate_plane,
1683 : quant_coeff,
1684 : (TranLow*)recon_coeff,
1685 : eob,
1686 : scan_order,
1687 : &qparam,
1688 : txsize,
1689 : tx_type,
1690 : is_inter,
1691 : bit_increment,
1692 : (component_type == COMPONENT_LUMA) ? 0 : 1);
1693 : }
1694 : }
1695 :
1696 :
1697 102071000 : *count_non_zero_coeffs = *eob;
1698 :
1699 : // Derive cul_level
1700 102071000 : int32_t cul_level = 0;
1701 102071000 : const int16_t *const scan = scan_order->scan;
1702 1953540000 : for (int32_t c = 0; c < *eob; ++c) {
1703 1851470000 : const int16_t pos = scan[c];
1704 1851470000 : const int32_t v = quant_coeff[pos];
1705 1851470000 : int32_t level = ABS(v);
1706 1851470000 : cul_level += level;
1707 : }
1708 :
1709 102071000 : cul_level = AOMMIN(COEFF_CONTEXT_MASK, cul_level);
1710 : // DC value
1711 102071000 : set_dc_sign(&cul_level, quant_coeff[0]);
1712 102058000 : return cul_level;
1713 : }
1714 :
1715 : /****************************************
1716 : ************ Full loop ****************
1717 : ****************************************/
1718 40264000 : void product_full_loop(
1719 : ModeDecisionCandidateBuffer *candidate_buffer,
1720 : ModeDecisionContext *context_ptr,
1721 : PictureControlSet *picture_control_set_ptr,
1722 : EbPictureBufferDesc *input_picture_ptr,
1723 : uint32_t qp,
1724 : uint32_t *y_count_non_zero_coeffs,
1725 : uint64_t *y_coeff_bits,
1726 : uint64_t *y_full_distortion)
1727 : {
1728 : uint32_t tu_origin_index;
1729 : uint64_t y_full_cost;
1730 40264000 : SequenceControlSet *sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
1731 : // uint32_t currentTuIndex,tuIt;
1732 : uint64_t y_tu_coeff_bits;
1733 : EB_ALIGN(16) uint64_t tuFullDistortion[3][DIST_CALC_TOTAL];
1734 40264000 : context_ptr->three_quad_energy = 0;
1735 : #if ENHANCE_ATB
1736 40264000 : uint8_t tx_depth = context_ptr->tx_depth;
1737 40264000 : uint32_t txb_itr = context_ptr->txb_itr;
1738 40264000 : uint32_t txb_1d_offset = context_ptr->txb_1d_offset;
1739 : #else
1740 : uint32_t txb_1d_offset = 0;
1741 : uint32_t txb_itr = 0;
1742 : #endif
1743 : #if !ENHANCE_ATB
1744 : uint8_t tx_depth = candidate_buffer->candidate_ptr->tx_depth;
1745 : uint16_t txb_count = context_ptr->blk_geom->txb_count[tx_depth];
1746 : for (txb_itr = 0; txb_itr < txb_count; txb_itr++)
1747 : {
1748 : #endif
1749 40264000 : uint16_t tx_org_x = context_ptr->blk_geom->tx_org_x[tx_depth][txb_itr];
1750 40264000 : uint16_t tx_org_y = context_ptr->blk_geom->tx_org_y[tx_depth][txb_itr];
1751 40264000 : int32_t cropped_tx_width = MIN(context_ptr->blk_geom->tx_width[tx_depth][txb_itr], sequence_control_set_ptr->seq_header.max_frame_width - (context_ptr->sb_origin_x + tx_org_x));
1752 40264000 : int32_t cropped_tx_height = MIN(context_ptr->blk_geom->tx_height[tx_depth][txb_itr], sequence_control_set_ptr->seq_header.max_frame_height - (context_ptr->sb_origin_y + tx_org_y));
1753 40264000 : context_ptr->luma_txb_skip_context = 0;
1754 40264000 : context_ptr->luma_dc_sign_context = 0;
1755 40264000 : get_txb_ctx(
1756 : sequence_control_set_ptr,
1757 : COMPONENT_LUMA,
1758 : #if ENHANCE_ATB
1759 : context_ptr->full_loop_luma_dc_sign_level_coeff_neighbor_array,
1760 : #else
1761 : context_ptr->luma_dc_sign_level_coeff_neighbor_array,
1762 : #endif
1763 40264000 : context_ptr->sb_origin_x + tx_org_x,
1764 40264000 : context_ptr->sb_origin_y + tx_org_y,
1765 40264000 : context_ptr->blk_geom->bsize,
1766 40264000 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
1767 : &context_ptr->luma_txb_skip_context,
1768 : &context_ptr->luma_dc_sign_context);
1769 :
1770 40263800 : tu_origin_index = tx_org_x + (tx_org_y * candidate_buffer->residual_ptr->stride_y);
1771 40263800 : y_tu_coeff_bits = 0;
1772 :
1773 : // Y: T Q iQ
1774 40263800 : av1_estimate_transform(
1775 40263800 : &(((int16_t*)candidate_buffer->residual_ptr->buffer_y)[tu_origin_index]),
1776 40263800 : candidate_buffer->residual_ptr->stride_y,
1777 40263800 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[txb_1d_offset]),
1778 : NOT_USED_VALUE,
1779 40263800 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
1780 : &context_ptr->three_quad_energy,
1781 : context_ptr->transform_inner_array_ptr,
1782 40263800 : context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
1783 40263800 : candidate_buffer->candidate_ptr->transform_type[txb_itr],
1784 : PLANE_TYPE_Y,
1785 : DEFAULT_SHAPE);
1786 :
1787 80529600 : int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
1788 40264800 : picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
1789 40264800 : candidate_buffer->candidate_ptr->quantized_dc[0][txb_itr] = av1_quantize_inv_quantize(
1790 : picture_control_set_ptr,
1791 : context_ptr,
1792 40264800 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[txb_1d_offset]),
1793 : NOT_USED_VALUE,
1794 40264800 : &(((int32_t*)candidate_buffer->residual_quant_coeff_ptr->buffer_y)[txb_1d_offset]),
1795 40264800 : &(((int32_t*)candidate_buffer->recon_coeff_ptr->buffer_y)[txb_1d_offset]),
1796 : qp,
1797 : seg_qp,
1798 40264800 : context_ptr->blk_geom->tx_width[tx_depth][txb_itr],
1799 40264800 : context_ptr->blk_geom->tx_height[tx_depth][txb_itr],
1800 40264800 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
1801 40264800 : &candidate_buffer->candidate_ptr->eob[0][txb_itr],
1802 40264800 : &(y_count_non_zero_coeffs[txb_itr]),
1803 : COMPONENT_LUMA,
1804 40264800 : context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
1805 40264800 : candidate_buffer->candidate_ptr->transform_type[txb_itr],
1806 : candidate_buffer,
1807 40264800 : context_ptr->luma_txb_skip_context,
1808 40264800 : context_ptr->luma_dc_sign_context,
1809 40264800 : candidate_buffer->candidate_ptr->pred_mode,
1810 40264800 : candidate_buffer->candidate_ptr->use_intrabc,
1811 : EB_FALSE);
1812 :
1813 40269100 : if (context_ptr->spatial_sse_full_loop) {
1814 40137600 : uint32_t input_tu_origin_index = (context_ptr->sb_origin_x + tx_org_x + input_picture_ptr->origin_x) + ((context_ptr->sb_origin_y + tx_org_y + input_picture_ptr->origin_y) * input_picture_ptr->stride_y);
1815 40137600 : uint32_t y_has_coeff = y_count_non_zero_coeffs[txb_itr] > 0;
1816 :
1817 40137600 : if (y_has_coeff) {
1818 15447300 : inv_transform_recon_wrapper(
1819 15447300 : candidate_buffer->prediction_ptr->buffer_y,
1820 : tu_origin_index,
1821 15447300 : candidate_buffer->prediction_ptr->stride_y,
1822 15447300 : candidate_buffer->recon_ptr->buffer_y,
1823 : tu_origin_index,
1824 15447300 : candidate_buffer->recon_ptr->stride_y,
1825 15447300 : (int32_t*) candidate_buffer->recon_coeff_ptr->buffer_y,
1826 : txb_1d_offset,
1827 15447300 : context_ptr->hbd_mode_decision,
1828 15447300 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
1829 15447300 : candidate_buffer->candidate_ptr->transform_type[txb_itr],
1830 : PLANE_TYPE_Y,
1831 15447300 : (uint32_t)candidate_buffer->candidate_ptr->eob[0][txb_itr]);
1832 : } else {
1833 24690300 : picture_copy(
1834 : candidate_buffer->prediction_ptr,
1835 : tu_origin_index,
1836 : 0,
1837 : candidate_buffer->recon_ptr,
1838 : tu_origin_index,
1839 : 0,
1840 24690300 : context_ptr->blk_geom->tx_width[tx_depth][txb_itr],
1841 24690300 : context_ptr->blk_geom->tx_height[tx_depth][txb_itr],
1842 : 0,
1843 : 0,
1844 : PICTURE_BUFFER_DESC_Y_FLAG,
1845 24690300 : context_ptr->hbd_mode_decision);
1846 : }
1847 :
1848 80198100 : EbSpatialFullDistType spatial_full_dist_type_fun = context_ptr->hbd_mode_decision ?
1849 40099100 : full_distortion_kernel16_bits :
1850 : spatial_full_distortion_kernel;
1851 :
1852 80211800 : tuFullDistortion[0][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun(
1853 : input_picture_ptr->buffer_y,
1854 : input_tu_origin_index,
1855 40099100 : input_picture_ptr->stride_y,
1856 40099100 : candidate_buffer->prediction_ptr->buffer_y,
1857 : tu_origin_index,
1858 40099100 : candidate_buffer->prediction_ptr->stride_y,
1859 : cropped_tx_width,
1860 : cropped_tx_height);
1861 :
1862 80222900 : tuFullDistortion[0][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun(
1863 : input_picture_ptr->buffer_y,
1864 : input_tu_origin_index,
1865 40112700 : input_picture_ptr->stride_y,
1866 40112700 : candidate_buffer->recon_ptr->buffer_y,
1867 : tu_origin_index,
1868 40112700 : candidate_buffer->recon_ptr->stride_y,
1869 : cropped_tx_width,
1870 : cropped_tx_height);
1871 :
1872 40110200 : tuFullDistortion[0][DIST_CALC_PREDICTION] <<= 4;
1873 40110200 : tuFullDistortion[0][DIST_CALC_RESIDUAL] <<= 4;
1874 : }
1875 : else {
1876 : // LUMA DISTORTION
1877 131566 : picture_full_distortion32_bits(
1878 131566 : context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr,
1879 : txb_1d_offset,
1880 : 0,
1881 : candidate_buffer->recon_coeff_ptr,
1882 : txb_1d_offset,
1883 : 0,
1884 131566 : context_ptr->blk_geom->tx_width[tx_depth][txb_itr],
1885 131566 : context_ptr->blk_geom->tx_height[tx_depth][txb_itr],
1886 : NOT_USED_VALUE,
1887 : NOT_USED_VALUE,
1888 : tuFullDistortion[0],
1889 : NOT_USED_VALUE,
1890 : NOT_USED_VALUE,
1891 131566 : y_count_non_zero_coeffs[txb_itr],
1892 : 0,
1893 : 0,
1894 : COMPONENT_LUMA);
1895 :
1896 149367 : tuFullDistortion[0][DIST_CALC_RESIDUAL] += context_ptr->three_quad_energy;
1897 149367 : tuFullDistortion[0][DIST_CALC_PREDICTION] += context_ptr->three_quad_energy;
1898 : //assert(context_ptr->three_quad_energy == 0 && context_ptr->cu_stats->size < 64);
1899 149367 : TxSize tx_size = context_ptr->blk_geom->txsize[tx_depth][txb_itr];
1900 149367 : int32_t shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
1901 149347 : tuFullDistortion[0][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(tuFullDistortion[0][DIST_CALC_RESIDUAL], shift);
1902 149347 : tuFullDistortion[0][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(tuFullDistortion[0][DIST_CALC_PREDICTION], shift);
1903 : }
1904 :
1905 : //LUMA-ONLY
1906 40259500 : av1_tu_estimate_coeff_bits(
1907 : context_ptr,
1908 : 0,//allow_update_cdf,
1909 : NULL,//FRAME_CONTEXT *ec_ctx,
1910 : picture_control_set_ptr,
1911 : candidate_buffer,
1912 : txb_1d_offset,
1913 : 0,
1914 : context_ptr->coeff_est_entropy_coder_ptr,
1915 : candidate_buffer->residual_quant_coeff_ptr,
1916 40259500 : y_count_non_zero_coeffs[txb_itr],
1917 : 0,
1918 : 0,
1919 : &y_tu_coeff_bits,
1920 : &y_tu_coeff_bits,
1921 : &y_tu_coeff_bits,
1922 40259500 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
1923 40259500 : context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
1924 40259500 : candidate_buffer->candidate_ptr->transform_type[txb_itr],
1925 40259500 : candidate_buffer->candidate_ptr->transform_type_uv,
1926 : COMPONENT_LUMA);
1927 :
1928 : //TODO: fix cbf decision
1929 40258900 : av1_tu_calc_cost_luma(
1930 40258900 : context_ptr->luma_txb_skip_context,
1931 : candidate_buffer->candidate_ptr,
1932 : txb_itr,
1933 40258900 : context_ptr->blk_geom->txsize[tx_depth][0],
1934 40258900 : y_count_non_zero_coeffs[txb_itr],
1935 : tuFullDistortion[0], //gets updated inside based on cbf decision
1936 : &y_tu_coeff_bits, //gets updated inside based on cbf decision
1937 : &y_full_cost,
1938 40258900 : context_ptr->full_lambda);
1939 :
1940 40262400 : (*y_coeff_bits) += y_tu_coeff_bits;
1941 :
1942 40262400 : y_full_distortion[DIST_CALC_RESIDUAL] += tuFullDistortion[0][DIST_CALC_RESIDUAL];
1943 40262400 : y_full_distortion[DIST_CALC_PREDICTION] += tuFullDistortion[0][DIST_CALC_PREDICTION];
1944 : #if ENHANCE_ATB
1945 40262400 : context_ptr->txb_1d_offset += context_ptr->blk_geom->tx_width[tx_depth][txb_itr] * context_ptr->blk_geom->tx_height[tx_depth][txb_itr];
1946 : #else
1947 : txb_1d_offset += context_ptr->blk_geom->tx_width[tx_depth][txb_itr] * context_ptr->blk_geom->tx_height[tx_depth][txb_itr];
1948 : }
1949 : #endif
1950 40262400 : }
1951 : // T1
1952 : uint8_t allowed_tx_set_a[TX_SIZES_ALL][TX_TYPES] = {
1953 : {1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
1954 : {1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1},
1955 : {1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
1956 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
1957 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1958 : {1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
1959 : {1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
1960 : {1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1},
1961 : {1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0},
1962 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
1963 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
1964 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1965 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1966 : {1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
1967 : {1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
1968 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
1969 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
1970 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1971 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
1972 :
1973 : uint8_t allowed_tx_set_b[TX_SIZES_ALL][TX_TYPES] = {
1974 : {1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0},
1975 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
1976 : {1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
1977 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
1978 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1979 : {1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0},
1980 : {1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0},
1981 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0},
1982 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0},
1983 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
1984 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
1985 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1986 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1987 : {0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
1988 : {1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
1989 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
1990 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
1991 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1992 : {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
1993 : };
1994 :
1995 1760920 : void product_full_loop_tx_search(
1996 : ModeDecisionCandidateBuffer *candidate_buffer,
1997 : ModeDecisionContext *context_ptr,
1998 : PictureControlSet *picture_control_set_ptr)
1999 : {
2000 : uint32_t tu_origin_index;
2001 1760920 : SequenceControlSet *sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
2002 : uint64_t y_tu_coeff_bits;
2003 : EB_ALIGN(16) uint64_t tuFullDistortion[3][DIST_CALC_TOTAL];
2004 1760920 : int32_t plane = 0;
2005 1760920 : const int32_t is_inter = (candidate_buffer->candidate_ptr->type == INTER_MODE || candidate_buffer->candidate_ptr->use_intrabc) ? EB_TRUE : EB_FALSE;
2006 1760920 : uint64_t best_full_cost = UINT64_MAX;
2007 1760920 : uint64_t y_full_cost = MAX_CU_COST;
2008 : uint32_t yCountNonZeroCoeffsTemp;
2009 1760920 : TxType txk_start = DCT_DCT;
2010 1760920 : TxType txk_end = TX_TYPES;
2011 : TxType tx_type;
2012 1760920 : int32_t txb_itr = 0;
2013 1760920 : uint8_t tx_depth = candidate_buffer->candidate_ptr->tx_depth;
2014 1760920 : TxSize txSize = context_ptr->blk_geom->txsize[tx_depth][txb_itr];
2015 1760920 : assert(txSize < TX_SIZES_ALL);
2016 : const TxSetType tx_set_type =
2017 1760920 : get_ext_tx_set_type(txSize, is_inter, picture_control_set_ptr->parent_pcs_ptr->frm_hdr.reduced_tx_set);
2018 :
2019 1760950 : int32_t allowed_tx_mask[TX_TYPES] = { 0 }; // 1: allow; 0: skip.
2020 1760950 : int32_t allowed_tx_num = 0;
2021 1760950 : TxType uv_tx_type = DCT_DCT;
2022 1760950 : if (picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set == 2)
2023 0 : txk_end = 2;
2024 :
2025 29933000 : for (int32_t tx_type_index = txk_start; tx_type_index < txk_end; ++tx_type_index) {
2026 28172000 : if (picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set == 2)
2027 0 : tx_type_index = (tx_type_index == 1) ? IDTX : tx_type_index;
2028 28172000 : tx_type = (TxType)tx_type_index;
2029 28172000 : allowed_tx_mask[tx_type] = 1;
2030 28172000 : if (plane == 0) {
2031 28172000 : if (allowed_tx_mask[tx_type]) {
2032 28172000 : const TxType ref_tx_type = ((!av1_ext_tx_used[tx_set_type][tx_type]) || txsize_sqr_up_map[txSize] > TX_32X32) ? DCT_DCT : tx_type;
2033 28172000 : if (tx_type != ref_tx_type)
2034 11619500 : allowed_tx_mask[tx_type] = 0;
2035 : }
2036 : }
2037 :
2038 28172000 : allowed_tx_num += allowed_tx_mask[tx_type];
2039 : }
2040 : // Need to have at least one transform type allowed.
2041 1760950 : if (allowed_tx_num == 0)
2042 0 : allowed_tx_mask[plane ? uv_tx_type : DCT_DCT] = 1;
2043 1760950 : TxType best_tx_type = DCT_DCT;
2044 29931500 : for (int32_t tx_type_index = txk_start; tx_type_index < txk_end; ++tx_type_index) {
2045 28169700 : if (picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set == 2)
2046 0 : tx_type_index = (tx_type_index == 1) ? IDTX : tx_type_index;
2047 28169700 : tx_type = (TxType)tx_type_index;
2048 28169700 : if (!allowed_tx_mask[tx_type]) continue;
2049 16550100 : if (picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set)
2050 0 : if (!allowed_tx_set_a[txSize][tx_type]) continue;
2051 :
2052 16550100 : context_ptr->three_quad_energy = 0;
2053 16550100 : uint32_t txb_itr = 0;
2054 16550100 : uint16_t txb_count = context_ptr->blk_geom->txb_count[tx_depth];
2055 33100600 : for (txb_itr = 0; txb_itr < txb_count; txb_itr++)
2056 : {
2057 16549600 : uint8_t txb_origin_x = (uint8_t)context_ptr->blk_geom->tx_org_x[tx_depth][txb_itr];
2058 16549600 : uint8_t txb_origin_y = (uint8_t)context_ptr->blk_geom->tx_org_y[tx_depth][txb_itr];
2059 16549600 : tu_origin_index = txb_origin_x + (txb_origin_y * candidate_buffer->residual_ptr->stride_y);
2060 16549600 : y_tu_coeff_bits = 0;
2061 :
2062 16549600 : candidate_buffer->candidate_ptr->transform_type[txb_itr] = tx_type;
2063 :
2064 16549600 : context_ptr->luma_txb_skip_context = 0;
2065 16549600 : context_ptr->luma_dc_sign_context = 0;
2066 16549600 : get_txb_ctx(
2067 : sequence_control_set_ptr,
2068 : COMPONENT_LUMA,
2069 : #if ENHANCE_ATB
2070 : context_ptr->luma_dc_sign_level_coeff_neighbor_array,
2071 : #else
2072 : picture_control_set_ptr->ep_luma_dc_sign_level_coeff_neighbor_array,
2073 : #endif
2074 16549600 : context_ptr->sb_origin_x + txb_origin_x,
2075 16549600 : context_ptr->sb_origin_y + txb_origin_y,
2076 : //txb_origin_x,// context_ptr->cu_origin_x,
2077 : //txb_origin_y,// context_ptr->cu_origin_y,
2078 16549600 : context_ptr->blk_geom->bsize,
2079 16549600 : context_ptr->blk_geom->txsize[tx_depth][txb_itr], //[0][0],
2080 : &context_ptr->luma_txb_skip_context,
2081 : &context_ptr->luma_dc_sign_context);
2082 :
2083 : // Y: T Q iQ
2084 16548200 : av1_estimate_transform(
2085 16548200 : &(((int16_t*)candidate_buffer->residual_ptr->buffer_y)[tu_origin_index]),
2086 16548200 : candidate_buffer->residual_ptr->stride_y,
2087 16548200 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[tu_origin_index]),
2088 : NOT_USED_VALUE,
2089 16548200 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
2090 : &context_ptr->three_quad_energy,
2091 : context_ptr->transform_inner_array_ptr,
2092 16548200 : context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
2093 : tx_type,
2094 : PLANE_TYPE_Y,
2095 16548200 : context_ptr->pf_md_mode);
2096 :
2097 33100900 : int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
2098 16550500 : picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
2099 :
2100 16550500 : av1_quantize_inv_quantize(
2101 : picture_control_set_ptr,
2102 : context_ptr,
2103 16550500 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_y)[tu_origin_index]),
2104 : NOT_USED_VALUE,
2105 16550500 : &(((int32_t*)candidate_buffer->residual_quant_coeff_ptr->buffer_y)[tu_origin_index]),
2106 16550500 : &(((int32_t*)candidate_buffer->recon_coeff_ptr->buffer_y)[tu_origin_index]),
2107 16550500 : context_ptr->cu_ptr->qp,
2108 : seg_qp,
2109 16550500 : context_ptr->blk_geom->tx_width[tx_depth][txb_itr],
2110 16550500 : context_ptr->blk_geom->tx_height[tx_depth][txb_itr],
2111 16550500 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
2112 16550500 : &candidate_buffer->candidate_ptr->eob[0][txb_itr],
2113 : &yCountNonZeroCoeffsTemp,
2114 : COMPONENT_LUMA,
2115 16550500 : context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
2116 : tx_type,
2117 : candidate_buffer,
2118 16550500 : context_ptr->luma_txb_skip_context,
2119 16550500 : context_ptr->luma_dc_sign_context,
2120 16550500 : candidate_buffer->candidate_ptr->pred_mode,
2121 16550500 : candidate_buffer->candidate_ptr->use_intrabc,
2122 : EB_FALSE);
2123 :
2124 : //tx_type not equal to DCT_DCT and no coeff is not an acceptable option in AV1.
2125 16551400 : if (yCountNonZeroCoeffsTemp == 0 && tx_type != DCT_DCT)
2126 12273500 : continue;
2127 :
2128 :
2129 4277980 : if (context_ptr->spatial_sse_full_loop) {
2130 4280230 : if (yCountNonZeroCoeffsTemp)
2131 2917180 : inv_transform_recon_wrapper(
2132 2917180 : candidate_buffer->prediction_ptr->buffer_y,
2133 : tu_origin_index,
2134 2917180 : candidate_buffer->prediction_ptr->stride_y,
2135 2917180 : candidate_buffer->recon_ptr->buffer_y,
2136 : tu_origin_index,
2137 2917180 : candidate_buffer->recon_ptr->stride_y,
2138 2917180 : (int32_t*) candidate_buffer->recon_coeff_ptr->buffer_y,
2139 : tu_origin_index,
2140 2917180 : context_ptr->hbd_mode_decision,
2141 2917180 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
2142 : tx_type,
2143 : PLANE_TYPE_Y,
2144 2917180 : (uint16_t)candidate_buffer->candidate_ptr->eob[0][txb_itr]);
2145 : else
2146 1363050 : picture_copy(
2147 : candidate_buffer->prediction_ptr,
2148 : tu_origin_index,
2149 : 0,
2150 : candidate_buffer->recon_ptr,
2151 : tu_origin_index,
2152 : 0,
2153 1363050 : context_ptr->blk_geom->tx_width[tx_depth][txb_itr],
2154 1363050 : context_ptr->blk_geom->tx_height[tx_depth][txb_itr],
2155 : 0,
2156 : 0,
2157 : PICTURE_BUFFER_DESC_Y_FLAG,
2158 1363050 : context_ptr->hbd_mode_decision);
2159 :
2160 8560300 : EbPictureBufferDesc *input_picture_ptr = context_ptr->hbd_mode_decision ?
2161 4280150 : picture_control_set_ptr->input_frame16bit : picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
2162 4280150 : uint32_t input_tu_origin_index = (context_ptr->sb_origin_x + txb_origin_x + input_picture_ptr->origin_x) + ((context_ptr->sb_origin_y + txb_origin_y + input_picture_ptr->origin_y) * input_picture_ptr->stride_y);
2163 :
2164 8560300 : EbSpatialFullDistType spatial_full_dist_type_fun = context_ptr->hbd_mode_decision ?
2165 4280150 : full_distortion_kernel16_bits : spatial_full_distortion_kernel;
2166 :
2167 8560210 : tuFullDistortion[0][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun(
2168 : input_picture_ptr->buffer_y,
2169 : input_tu_origin_index,
2170 4280150 : input_picture_ptr->stride_y,
2171 4280150 : candidate_buffer->prediction_ptr->buffer_y,
2172 : tu_origin_index,
2173 4280150 : candidate_buffer->prediction_ptr->stride_y,
2174 4280150 : context_ptr->blk_geom->tx_width[tx_depth][txb_itr],
2175 4280150 : context_ptr->blk_geom->tx_height[tx_depth][txb_itr]);
2176 :
2177 8560040 : tuFullDistortion[0][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun(
2178 : input_picture_ptr->buffer_y,
2179 : input_tu_origin_index,
2180 4280060 : input_picture_ptr->stride_y,
2181 4280060 : candidate_buffer->recon_ptr->buffer_y,
2182 : tu_origin_index,
2183 4280060 : candidate_buffer->recon_ptr->stride_y,
2184 4280060 : context_ptr->blk_geom->tx_width[tx_depth][txb_itr],
2185 4280060 : context_ptr->blk_geom->tx_height[tx_depth][txb_itr]);
2186 :
2187 4279980 : tuFullDistortion[0][DIST_CALC_PREDICTION] <<= 4;
2188 4279980 : tuFullDistortion[0][DIST_CALC_RESIDUAL] <<= 4;
2189 : }
2190 : else {
2191 : // LUMA DISTORTION
2192 0 : picture_full_distortion32_bits(
2193 0 : context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr,
2194 : tu_origin_index,
2195 : 0,
2196 : candidate_buffer->recon_coeff_ptr,
2197 : tu_origin_index,
2198 : 0,
2199 0 : context_ptr->blk_geom->bwidth,
2200 0 : context_ptr->blk_geom->bheight,
2201 0 : context_ptr->blk_geom->bwidth_uv,
2202 0 : context_ptr->blk_geom->bheight_uv,
2203 : tuFullDistortion[0],
2204 : tuFullDistortion[0],
2205 : tuFullDistortion[0],
2206 : yCountNonZeroCoeffsTemp,
2207 : 0,
2208 : 0,
2209 : COMPONENT_LUMA);
2210 :
2211 0 : tuFullDistortion[0][DIST_CALC_RESIDUAL] += context_ptr->three_quad_energy;
2212 0 : tuFullDistortion[0][DIST_CALC_PREDICTION] += context_ptr->three_quad_energy;
2213 :
2214 0 : int32_t shift = (MAX_TX_SCALE - av1_get_tx_scale(txSize)) * 2;
2215 0 : tuFullDistortion[0][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(tuFullDistortion[0][DIST_CALC_RESIDUAL], shift);
2216 0 : tuFullDistortion[0][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(tuFullDistortion[0][DIST_CALC_PREDICTION], shift);
2217 : }
2218 : //LUMA-ONLY
2219 4279980 : av1_tu_estimate_coeff_bits(
2220 : context_ptr,
2221 : 0,//allow_update_cdf,
2222 : NULL,//FRAME_CONTEXT *ec_ctx,
2223 : picture_control_set_ptr,
2224 : candidate_buffer,
2225 : tu_origin_index,
2226 : 0,
2227 : context_ptr->coeff_est_entropy_coder_ptr,
2228 : candidate_buffer->residual_quant_coeff_ptr,
2229 : yCountNonZeroCoeffsTemp,
2230 : 0,
2231 : 0,
2232 : &y_tu_coeff_bits,
2233 : &y_tu_coeff_bits,
2234 : &y_tu_coeff_bits,
2235 4279980 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
2236 4279980 : context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
2237 4279980 : candidate_buffer->candidate_ptr->transform_type[txb_itr],
2238 4279980 : candidate_buffer->candidate_ptr->transform_type_uv,
2239 : COMPONENT_LUMA);
2240 :
2241 4279860 : av1_tu_calc_cost_luma(
2242 4279860 : context_ptr->luma_txb_skip_context,
2243 : candidate_buffer->candidate_ptr,
2244 : txb_itr,
2245 4279860 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
2246 : yCountNonZeroCoeffsTemp,
2247 : tuFullDistortion[0],
2248 : &y_tu_coeff_bits,
2249 : &y_full_cost,
2250 4279860 : context_ptr->full_lambda);
2251 : }
2252 :
2253 16551000 : if (y_full_cost < best_full_cost) {
2254 2354670 : best_full_cost = y_full_cost;
2255 2354670 : best_tx_type = tx_type;
2256 : }
2257 :
2258 : //if (cpi->sf.adaptive_txb_search_level) {
2259 : // if ((best_rd - (best_rd >> cpi->sf.adaptive_txb_search_level)) >
2260 : // ref_best_rd) {
2261 : // break;
2262 : // }
2263 : //}
2264 : //// Skip transform type search when we found the block has been quantized to
2265 : //// all zero and at the same time, it has better rdcost than doing transform.
2266 : //if (cpi->sf.tx_type_search.skip_tx_search && !best_eob) break;
2267 : }
2268 : // this kernel assumes no atb
2269 1761850 : candidate_buffer->candidate_ptr->transform_type[0] = best_tx_type;
2270 : // For Inter blocks, transform type of chroma follows luma transfrom type
2271 1761850 : if (is_inter)
2272 1421380 : candidate_buffer->candidate_ptr->transform_type_uv = candidate_buffer->candidate_ptr->transform_type[0];
2273 1761850 : }
2274 :
2275 16776 : void encode_pass_tx_search(
2276 : PictureControlSet *picture_control_set_ptr,
2277 : EncDecContext *context_ptr,
2278 : LargestCodingUnit *sb_ptr,
2279 : uint32_t cb_qp,
2280 : EbPictureBufferDesc *coeffSamplesTB,
2281 : EbPictureBufferDesc *residual16bit,
2282 : EbPictureBufferDesc *transform16bit,
2283 : EbPictureBufferDesc *inverse_quant_buffer,
2284 : int16_t *transformScratchBuffer,
2285 : uint32_t *count_non_zero_coeffs,
2286 : uint32_t component_mask,
2287 : uint32_t dZoffset,
2288 : uint16_t *eob,
2289 : MacroblockPlane *candidate_plane){
2290 : (void)dZoffset;
2291 : (void)cb_qp;
2292 : (void)candidate_plane;
2293 : UNUSED(count_non_zero_coeffs);
2294 : UNUSED(component_mask);
2295 :
2296 16776 : CodingUnit *cu_ptr = context_ptr->cu_ptr;
2297 16776 : TransformUnit *txb_ptr = &cu_ptr->transform_unit_array[context_ptr->txb_itr];
2298 16776 : uint32_t qp = cu_ptr->qp;
2299 16776 : const uint32_t coeff1dOffset = context_ptr->coded_area_sb;
2300 :
2301 : uint64_t y_tu_coeff_bits;
2302 : EB_ALIGN(16) uint64_t tuFullDistortion[3][DIST_CALC_TOTAL];
2303 16776 : const int32_t is_inter = context_ptr->is_inter;
2304 16776 : uint64_t best_full_cost = UINT64_MAX;
2305 : uint64_t y_full_cost;
2306 : uint32_t yCountNonZeroCoeffsTemp;
2307 16776 : TxType txk_start = DCT_DCT;
2308 16776 : TxType txk_end = TX_TYPES;
2309 : TxType tx_type;
2310 16776 : TxSize txSize = context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr];
2311 16776 : const uint32_t scratch_luma_offset = context_ptr->blk_geom->tx_org_x[cu_ptr->tx_depth][context_ptr->txb_itr] + context_ptr->blk_geom->tx_org_y[cu_ptr->tx_depth][context_ptr->txb_itr] * SB_STRIDE_Y;
2312 16776 : assert(txSize < TX_SIZES_ALL);
2313 : const TxSetType tx_set_type =
2314 16776 : get_ext_tx_set_type(txSize, is_inter, picture_control_set_ptr->parent_pcs_ptr->frm_hdr.reduced_tx_set);
2315 :
2316 16770 : TxType best_tx_type = DCT_DCT;
2317 16770 : if (picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set == 2)
2318 0 : txk_end = 2;
2319 284846 : for (int32_t tx_type_index = txk_start; tx_type_index < txk_end; ++tx_type_index) {
2320 268063 : if (picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set == 2)
2321 0 : tx_type_index = (tx_type_index == 1) ? IDTX : tx_type_index;
2322 268063 : tx_type = (TxType)tx_type_index;
2323 :
2324 268063 : if(picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set)
2325 0 : if (!allowed_tx_set_a[txSize][tx_type]) continue;
2326 :
2327 268063 : const int32_t eset = get_ext_tx_set(txSize, is_inter, picture_control_set_ptr->parent_pcs_ptr->frm_hdr.reduced_tx_set);
2328 : // eset == 0 should correspond to a set with only DCT_DCT and there
2329 : // is no need to send the tx_type
2330 268042 : if (eset <= 0) continue;
2331 233563 : if (av1_ext_tx_used[tx_set_type][tx_type] == 0) continue;
2332 :
2333 162348 : context_ptr->three_quad_energy = 0;
2334 :
2335 162348 : y_tu_coeff_bits = 0;
2336 :
2337 162348 : av1_estimate_transform(
2338 162348 : ((int16_t*)residual16bit->buffer_y) + scratch_luma_offset,
2339 162348 : residual16bit->stride_y,
2340 162348 : ((TranLow*)transform16bit->buffer_y) + coeff1dOffset,
2341 : NOT_USED_VALUE,
2342 162348 : context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
2343 : &context_ptr->three_quad_energy,
2344 : transformScratchBuffer,
2345 : BIT_INCREMENT_8BIT,
2346 : tx_type,
2347 : PLANE_TYPE_Y,
2348 : DEFAULT_SHAPE);
2349 325024 : int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
2350 162512 : picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
2351 :
2352 :
2353 162512 : av1_quantize_inv_quantize(
2354 162512 : sb_ptr->picture_control_set_ptr,
2355 : context_ptr->md_context,
2356 162512 : ((TranLow*)transform16bit->buffer_y) + coeff1dOffset,
2357 : NOT_USED_VALUE,
2358 162512 : ((int32_t*)coeffSamplesTB->buffer_y) + coeff1dOffset,
2359 162512 : ((int32_t*)inverse_quant_buffer->buffer_y) + coeff1dOffset,
2360 : qp,
2361 : seg_qp,
2362 162512 : context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr],
2363 162512 : context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr],
2364 162512 : context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
2365 : &eob[0],
2366 : &yCountNonZeroCoeffsTemp,
2367 : COMPONENT_LUMA,
2368 : BIT_INCREMENT_8BIT,
2369 : tx_type,
2370 162512 : &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
2371 : 0,
2372 : 0,
2373 : 0,
2374 162512 : cu_ptr->av1xd->use_intrabc,
2375 : EB_FALSE);
2376 :
2377 :
2378 : //tx_type not equal to DCT_DCT and no coeff is not an acceptable option in AV1.
2379 162565 : if (yCountNonZeroCoeffsTemp == 0 && tx_type != DCT_DCT)
2380 90763 : continue;
2381 : // LUMA DISTORTION
2382 71802 : picture_full_distortion32_bits(
2383 : transform16bit,
2384 : coeff1dOffset,
2385 : 0,
2386 : inverse_quant_buffer,
2387 : coeff1dOffset,
2388 : 0,
2389 71802 : context_ptr->blk_geom->bwidth,
2390 71802 : context_ptr->blk_geom->bheight,
2391 71802 : context_ptr->blk_geom->bwidth_uv,
2392 71802 : context_ptr->blk_geom->bheight_uv,
2393 : tuFullDistortion[0],
2394 : tuFullDistortion[0],
2395 : tuFullDistortion[0],
2396 : yCountNonZeroCoeffsTemp,
2397 : 0,
2398 : 0,
2399 : COMPONENT_LUMA);
2400 :
2401 71902 : tuFullDistortion[0][DIST_CALC_RESIDUAL] += context_ptr->three_quad_energy;
2402 71902 : tuFullDistortion[0][DIST_CALC_PREDICTION] += context_ptr->three_quad_energy;
2403 :
2404 71902 : int32_t shift = (MAX_TX_SCALE - av1_get_tx_scale(txSize)) * 2;
2405 71897 : tuFullDistortion[0][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(tuFullDistortion[0][DIST_CALC_RESIDUAL], shift);
2406 71897 : tuFullDistortion[0][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(tuFullDistortion[0][DIST_CALC_PREDICTION], shift);
2407 71897 : txb_ptr->transform_type[PLANE_TYPE_Y] = tx_type;
2408 :
2409 : //LUMA-ONLY
2410 :
2411 71897 : ModeDecisionCandidateBuffer **candidate_buffer_ptr_array_base = context_ptr->md_context->candidate_buffer_ptr_array;
2412 71897 : ModeDecisionCandidateBuffer **candidate_buffer_ptr_array = &(candidate_buffer_ptr_array_base[0]);
2413 : ModeDecisionCandidateBuffer *candidate_buffer;
2414 :
2415 : // Set the Candidate Buffer
2416 71897 : candidate_buffer = candidate_buffer_ptr_array[0];
2417 : // Rate estimation function uses the values from CandidatePtr. The right values are copied from cu_ptr to CandidatePtr
2418 71897 : EntropyCoder *coeff_est_entropy_coder_ptr = picture_control_set_ptr->coeff_est_entropy_coder_ptr;
2419 71897 : candidate_buffer->candidate_ptr->type = cu_ptr->prediction_mode_flag;
2420 71897 : candidate_buffer->candidate_ptr->pred_mode = cu_ptr->pred_mode;
2421 : #if FILTER_INTRA_FLAG
2422 71897 : candidate_buffer->candidate_ptr->filter_intra_mode = cu_ptr->filter_intra_mode;
2423 : #endif
2424 71897 : const uint32_t coeff1dOffset = context_ptr->coded_area_sb;
2425 :
2426 71897 : av1_tu_estimate_coeff_bits(
2427 71897 : context_ptr->md_context,
2428 : 0,//allow_update_cdf,
2429 : NULL,//FRAME_CONTEXT *ec_ctx,
2430 : picture_control_set_ptr,
2431 : candidate_buffer,
2432 : coeff1dOffset,
2433 : 0,
2434 : coeff_est_entropy_coder_ptr,
2435 : coeffSamplesTB,
2436 : yCountNonZeroCoeffsTemp,
2437 : 0,
2438 : 0,
2439 : &y_tu_coeff_bits,
2440 : &y_tu_coeff_bits,
2441 : &y_tu_coeff_bits,
2442 71897 : context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
2443 71897 : context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
2444 71897 : cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_Y],
2445 71897 : cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_UV],
2446 : COMPONENT_LUMA);
2447 :
2448 71802 : av1_tu_calc_cost_luma(
2449 71802 : context_ptr->md_context->luma_txb_skip_context,
2450 : candidate_buffer->candidate_ptr,
2451 71802 : context_ptr->txb_itr,
2452 71802 : context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
2453 : yCountNonZeroCoeffsTemp,
2454 : tuFullDistortion[0],
2455 : &y_tu_coeff_bits,
2456 : &y_full_cost,
2457 71802 : context_ptr->full_lambda);
2458 :
2459 71619 : if (y_full_cost < best_full_cost) {
2460 24088 : best_full_cost = y_full_cost;
2461 24088 : best_tx_type = tx_type;
2462 : }
2463 : }
2464 :
2465 16783 : txb_ptr->transform_type[PLANE_TYPE_Y] = best_tx_type;
2466 :
2467 : // For Inter blocks, transform type of chroma follows luma transfrom type
2468 16783 : if (is_inter)
2469 14050 : txb_ptr->transform_type[PLANE_TYPE_UV] = txb_ptr->transform_type[PLANE_TYPE_Y];
2470 16783 : }
2471 :
2472 0 : void encode_pass_tx_search_hbd(
2473 : PictureControlSet *picture_control_set_ptr,
2474 : EncDecContext *context_ptr,
2475 : LargestCodingUnit *sb_ptr,
2476 : uint32_t cb_qp,
2477 : EbPictureBufferDesc *coeffSamplesTB,
2478 : EbPictureBufferDesc *residual16bit,
2479 : EbPictureBufferDesc *transform16bit,
2480 : EbPictureBufferDesc *inverse_quant_buffer,
2481 : int16_t *transformScratchBuffer,
2482 : uint32_t *count_non_zero_coeffs,
2483 : uint32_t component_mask,
2484 : uint32_t dZoffset,
2485 : uint16_t *eob,
2486 : MacroblockPlane *candidate_plane){
2487 : (void)dZoffset;
2488 : (void)cb_qp;
2489 : (void)candidate_plane;
2490 : UNUSED(component_mask);
2491 : UNUSED(count_non_zero_coeffs);
2492 :
2493 0 : CodingUnit *cu_ptr = context_ptr->cu_ptr;
2494 0 : TransformUnit *txb_ptr = &cu_ptr->transform_unit_array[context_ptr->txb_itr];
2495 0 : uint32_t qp = cu_ptr->qp;
2496 0 : const uint32_t scratch_luma_offset = context_ptr->blk_geom->origin_x + context_ptr->blk_geom->origin_y * SB_STRIDE_Y;
2497 0 : const uint32_t coeff1dOffset = context_ptr->coded_area_sb;
2498 :
2499 : uint64_t y_tu_coeff_bits;
2500 : uint64_t tuFullDistortion[3][DIST_CALC_TOTAL];
2501 0 : const int32_t is_inter = context_ptr->is_inter;
2502 0 : uint64_t best_full_cost = UINT64_MAX;
2503 : uint64_t y_full_cost;
2504 : uint32_t yCountNonZeroCoeffsTemp;
2505 0 : TxType txk_start = DCT_DCT;
2506 0 : TxType txk_end = TX_TYPES;
2507 : TxType tx_type;
2508 0 : TxSize txSize = context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr];
2509 0 : assert(txSize < TX_SIZES_ALL);
2510 : const TxSetType tx_set_type =
2511 0 : get_ext_tx_set_type(txSize, is_inter, picture_control_set_ptr->parent_pcs_ptr->frm_hdr.reduced_tx_set);
2512 :
2513 0 : TxType best_tx_type = DCT_DCT;
2514 :
2515 0 : for (int32_t tx_type_index = txk_start; tx_type_index < txk_end; ++tx_type_index) {
2516 0 : tx_type = (TxType)tx_type_index;
2517 : ////if (!allowed_tx_mask[tx_type]) continue;
2518 0 : if (picture_control_set_ptr->parent_pcs_ptr->tx_search_reduced_set)
2519 0 : if (!allowed_tx_set_a[txSize][tx_type]) continue;
2520 :
2521 0 : const int32_t eset = get_ext_tx_set(txSize, is_inter, picture_control_set_ptr->parent_pcs_ptr->frm_hdr.reduced_tx_set);
2522 : // eset == 0 should correspond to a set with only DCT_DCT and there
2523 : // is no need to send the tx_type
2524 0 : if (eset <= 0) continue;
2525 0 : if (av1_ext_tx_used[tx_set_type][tx_type] == 0) continue;
2526 :
2527 0 : context_ptr->three_quad_energy = 0;
2528 :
2529 0 : y_tu_coeff_bits = 0;
2530 :
2531 0 : av1_estimate_transform(
2532 0 : ((int16_t*)residual16bit->buffer_y) + scratch_luma_offset,
2533 0 : residual16bit->stride_y,
2534 0 : ((TranLow*)transform16bit->buffer_y) + coeff1dOffset,
2535 : NOT_USED_VALUE,
2536 0 : context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
2537 : &context_ptr->three_quad_energy,
2538 : transformScratchBuffer,
2539 : BIT_INCREMENT_10BIT,
2540 : tx_type,
2541 : PLANE_TYPE_Y,
2542 : DEFAULT_SHAPE);
2543 0 : int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
2544 0 : picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
2545 :
2546 0 : av1_quantize_inv_quantize(
2547 0 : sb_ptr->picture_control_set_ptr,
2548 : context_ptr->md_context,
2549 0 : ((int32_t*)transform16bit->buffer_y) + coeff1dOffset,
2550 : NOT_USED_VALUE,
2551 0 : ((int32_t*)coeffSamplesTB->buffer_y) + coeff1dOffset,
2552 0 : ((int32_t*)inverse_quant_buffer->buffer_y) + coeff1dOffset,
2553 : qp,
2554 : seg_qp,
2555 0 : context_ptr->blk_geom->tx_width[cu_ptr->tx_depth][context_ptr->txb_itr],
2556 0 : context_ptr->blk_geom->tx_height[cu_ptr->tx_depth][context_ptr->txb_itr],
2557 0 : context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
2558 : &eob[0],
2559 : &yCountNonZeroCoeffsTemp,
2560 : COMPONENT_LUMA,
2561 : BIT_INCREMENT_10BIT,
2562 : tx_type,
2563 0 : &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
2564 : 0,
2565 : 0,
2566 : 0,
2567 0 : cu_ptr->av1xd->use_intrabc,
2568 : EB_FALSE);
2569 :
2570 :
2571 : //tx_type not equal to DCT_DCT and no coeff is not an acceptable option in AV1.
2572 0 : if (yCountNonZeroCoeffsTemp == 0 && tx_type != DCT_DCT)
2573 0 : continue;
2574 : // LUMA DISTORTION
2575 0 : picture_full_distortion32_bits(
2576 : transform16bit,
2577 : coeff1dOffset,
2578 : 0,
2579 : inverse_quant_buffer,
2580 : coeff1dOffset,
2581 : 0,
2582 0 : context_ptr->blk_geom->bwidth,
2583 0 : context_ptr->blk_geom->bheight,
2584 0 : context_ptr->blk_geom->bwidth_uv,
2585 0 : context_ptr->blk_geom->bheight_uv,
2586 : tuFullDistortion[0],
2587 : tuFullDistortion[0],
2588 : tuFullDistortion[0],
2589 : yCountNonZeroCoeffsTemp,
2590 : 0,
2591 : 0,
2592 : COMPONENT_LUMA);
2593 :
2594 0 : tuFullDistortion[0][DIST_CALC_RESIDUAL] += context_ptr->three_quad_energy;
2595 0 : tuFullDistortion[0][DIST_CALC_PREDICTION] += context_ptr->three_quad_energy;
2596 :
2597 0 : int32_t shift = (MAX_TX_SCALE - av1_get_tx_scale(txSize)) * 2;
2598 0 : tuFullDistortion[0][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(tuFullDistortion[0][DIST_CALC_RESIDUAL], shift);
2599 0 : tuFullDistortion[0][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(tuFullDistortion[0][DIST_CALC_PREDICTION], shift);
2600 0 : txb_ptr->transform_type[PLANE_TYPE_Y] = tx_type;
2601 :
2602 : //LUMA-ONLY
2603 :
2604 0 : ModeDecisionCandidateBuffer **candidate_buffer_ptr_array_base = context_ptr->md_context->candidate_buffer_ptr_array;
2605 0 : ModeDecisionCandidateBuffer **candidate_buffer_ptr_array = &(candidate_buffer_ptr_array_base[0]);
2606 : ModeDecisionCandidateBuffer *candidate_buffer;
2607 :
2608 : // Set the Candidate Buffer
2609 0 : candidate_buffer = candidate_buffer_ptr_array[0];
2610 : // Rate estimation function uses the values from CandidatePtr. The right values are copied from cu_ptr to CandidatePtr
2611 0 : EntropyCoder *coeff_est_entropy_coder_ptr = picture_control_set_ptr->coeff_est_entropy_coder_ptr;
2612 0 : candidate_buffer->candidate_ptr->type = cu_ptr->prediction_mode_flag;
2613 0 : candidate_buffer->candidate_ptr->pred_mode = cu_ptr->pred_mode;
2614 : #if FILTER_INTRA_FLAG
2615 0 : candidate_buffer->candidate_ptr->filter_intra_mode = cu_ptr->filter_intra_mode;
2616 : #endif
2617 0 : const uint32_t coeff1dOffset = context_ptr->coded_area_sb;
2618 :
2619 0 : av1_tu_estimate_coeff_bits(
2620 0 : context_ptr->md_context,
2621 : 0,//allow_update_cdf,
2622 : NULL,//FRAME_CONTEXT *ec_ctx,
2623 : picture_control_set_ptr,
2624 : candidate_buffer,
2625 : coeff1dOffset,
2626 : 0,
2627 : coeff_est_entropy_coder_ptr,
2628 : coeffSamplesTB,
2629 : yCountNonZeroCoeffsTemp,
2630 : 0,
2631 : 0,
2632 : &y_tu_coeff_bits,
2633 : &y_tu_coeff_bits,
2634 : &y_tu_coeff_bits,
2635 0 : context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
2636 0 : context_ptr->blk_geom->txsize_uv[cu_ptr->tx_depth][context_ptr->txb_itr],
2637 0 : cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_Y],
2638 0 : cu_ptr->transform_unit_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_UV],
2639 : COMPONENT_LUMA);
2640 :
2641 0 : av1_tu_calc_cost_luma(
2642 0 : context_ptr->md_context->luma_txb_skip_context,
2643 : candidate_buffer->candidate_ptr,
2644 0 : context_ptr->txb_itr,
2645 0 : context_ptr->blk_geom->txsize[cu_ptr->tx_depth][context_ptr->txb_itr],
2646 : yCountNonZeroCoeffsTemp,
2647 : tuFullDistortion[0],
2648 : &y_tu_coeff_bits,
2649 : &y_full_cost,
2650 0 : context_ptr->full_lambda);
2651 :
2652 0 : if (y_full_cost < best_full_cost) {
2653 0 : best_full_cost = y_full_cost;
2654 0 : best_tx_type = tx_type;
2655 : }
2656 : }
2657 :
2658 0 : txb_ptr->transform_type[PLANE_TYPE_Y] = best_tx_type;
2659 :
2660 : // For Inter blocks, transform type of chroma follows luma transfrom type
2661 0 : if (is_inter)
2662 0 : txb_ptr->transform_type[PLANE_TYPE_UV] = txb_ptr->transform_type[PLANE_TYPE_Y];
2663 0 : }
2664 :
2665 44058200 : void inv_transform_recon_wrapper(
2666 : uint8_t *pred_buffer,
2667 : uint32_t pred_offset,
2668 : uint32_t pred_stride,
2669 : uint8_t *rec_buffer,
2670 : uint32_t rec_offset,
2671 : uint32_t rec_stride,
2672 : int32_t *rec_coeff_buffer,
2673 : uint32_t coeff_offset,
2674 : EbBool hbd,
2675 : TxSize txsize,
2676 : TxType transform_type,
2677 : PlaneType component_type,
2678 : uint32_t eob)
2679 : {
2680 44058200 : if (hbd) {
2681 0 : av1_inv_transform_recon(
2682 0 : rec_coeff_buffer + coeff_offset,
2683 0 : CONVERT_TO_BYTEPTR(((uint16_t*)pred_buffer) + pred_offset), pred_stride,
2684 0 : CONVERT_TO_BYTEPTR(((uint16_t*)rec_buffer) + rec_offset), rec_stride,
2685 : txsize,
2686 : BIT_INCREMENT_10BIT,
2687 : transform_type,
2688 : component_type,
2689 : eob, 0 /*lossless*/);
2690 : } else {
2691 44058200 : av1_inv_transform_recon8bit(
2692 44058200 : rec_coeff_buffer + coeff_offset,
2693 : pred_buffer + pred_offset, pred_stride,
2694 : rec_buffer + rec_offset, rec_stride,
2695 : txsize,
2696 : transform_type,
2697 : component_type,
2698 : eob, 0 /*lossless*/);
2699 : }
2700 44052400 : }
2701 :
2702 : /****************************************
2703 : ************ Full loop ****************
2704 : ****************************************/
2705 17201000 : void full_loop_r(
2706 : LargestCodingUnit *sb_ptr,
2707 : ModeDecisionCandidateBuffer *candidate_buffer,
2708 : ModeDecisionContext *context_ptr,
2709 : EbPictureBufferDesc *input_picture_ptr,
2710 : PictureControlSet *picture_control_set_ptr,
2711 : uint32_t component_mask,
2712 : uint32_t cb_qp,
2713 : uint32_t cr_qp,
2714 : uint32_t *cb_count_non_zero_coeffs,
2715 : uint32_t *cr_count_non_zero_coeffs)
2716 : {
2717 : (void)sb_ptr;
2718 : (void)cr_qp;
2719 : (void)input_picture_ptr;
2720 : int16_t *chromaResidualPtr;
2721 : uint32_t tu_origin_index;
2722 : UNUSED(tu_origin_index);
2723 : uint32_t tuCbOriginIndex;
2724 : uint32_t tuCrOriginIndex;
2725 : uint32_t tuCount;
2726 : uint32_t txb_itr;
2727 : uint32_t txb_origin_x;
2728 : uint32_t txb_origin_y;
2729 :
2730 17201000 : SequenceControlSet *sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
2731 :
2732 17201000 : context_ptr->three_quad_energy = 0;
2733 :
2734 17201000 : uint8_t tx_depth = candidate_buffer->candidate_ptr->tx_depth;
2735 17201000 : tuCount = context_ptr->blk_geom->txb_count[candidate_buffer->candidate_ptr->tx_depth];
2736 17201000 : uint32_t txb_1d_offset = 0;
2737 17201000 : tuCount = tx_depth ? 1 : tuCount; //NM: 128x128 exeption
2738 :
2739 17201000 : txb_itr = 0;
2740 : do {
2741 17198000 : txb_origin_x = context_ptr->blk_geom->tx_org_x[tx_depth][txb_itr];
2742 17198000 : txb_origin_y = context_ptr->blk_geom->tx_org_y[tx_depth][txb_itr];
2743 :
2744 17198000 : context_ptr->cb_txb_skip_context = 0;
2745 17198000 : context_ptr->cb_dc_sign_context = 0;
2746 17198000 : get_txb_ctx(
2747 : sequence_control_set_ptr,
2748 : COMPONENT_CHROMA,
2749 : context_ptr->cb_dc_sign_level_coeff_neighbor_array,
2750 17198000 : ROUND_UV(context_ptr->sb_origin_x + txb_origin_x) >> 1,
2751 17198000 : ROUND_UV(context_ptr->sb_origin_y + txb_origin_y) >> 1,
2752 17198000 : context_ptr->blk_geom->bsize_uv,
2753 17198000 : context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
2754 : &context_ptr->cb_txb_skip_context,
2755 : &context_ptr->cb_dc_sign_context);
2756 :
2757 :
2758 17202500 : context_ptr->cr_txb_skip_context = 0;
2759 17202500 : context_ptr->cr_dc_sign_context = 0;
2760 17202500 : get_txb_ctx(
2761 : sequence_control_set_ptr,
2762 : COMPONENT_CHROMA,
2763 : context_ptr->cr_dc_sign_level_coeff_neighbor_array,
2764 17202500 : ROUND_UV(context_ptr->sb_origin_x + txb_origin_x) >> 1,
2765 17202500 : ROUND_UV(context_ptr->sb_origin_y + txb_origin_y) >> 1,
2766 17202500 : context_ptr->blk_geom->bsize_uv,
2767 17202500 : context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
2768 : &context_ptr->cr_txb_skip_context,
2769 : &context_ptr->cr_dc_sign_context);
2770 :
2771 : // NADER - TU
2772 17199200 : tu_origin_index = txb_origin_x + txb_origin_y * candidate_buffer->residual_quant_coeff_ptr->stride_y;
2773 17199200 : tuCbOriginIndex = (((txb_origin_x >> 3) << 3) + (((txb_origin_y >> 3) << 3) * candidate_buffer->residual_quant_coeff_ptr->stride_cb)) >> 1;
2774 17199200 : tuCrOriginIndex = (((txb_origin_x >> 3) << 3) + (((txb_origin_y >> 3) << 3) * candidate_buffer->residual_quant_coeff_ptr->stride_cr)) >> 1;
2775 :
2776 : // This function replaces the previous Intra Chroma mode if the LM fast
2777 : // cost is better.
2778 : // *Note - this might require that we have inv transform in the loop
2779 17199200 : if (component_mask & PICTURE_BUFFER_DESC_Cb_FLAG) {
2780 : // Configure the Chroma Residual Ptr
2781 :
2782 12495500 : chromaResidualPtr = //(candidate_buffer->candidate_ptr->type == INTRA_MODE )?
2783 : //&(((int16_t*) candidate_buffer->intraChromaResidualPtr->buffer_cb)[tu_chroma_origin_index]):
2784 12495500 : &(((int16_t*)candidate_buffer->residual_ptr->buffer_cb)[tuCbOriginIndex]);
2785 :
2786 : // Cb Transform
2787 12495500 : av1_estimate_transform(
2788 : chromaResidualPtr,
2789 12495500 : candidate_buffer->residual_ptr->stride_cb,
2790 12495500 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_cb)[txb_1d_offset]),
2791 : NOT_USED_VALUE,
2792 12495500 : context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
2793 : &context_ptr->three_quad_energy,
2794 : context_ptr->transform_inner_array_ptr,
2795 12495500 : context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
2796 12495500 : candidate_buffer->candidate_ptr->transform_type_uv,
2797 : PLANE_TYPE_UV,
2798 : DEFAULT_SHAPE);
2799 :
2800 24994400 : int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
2801 12497200 : picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
2802 12497200 : candidate_buffer->candidate_ptr->quantized_dc[1][0] = av1_quantize_inv_quantize(
2803 : picture_control_set_ptr,
2804 : context_ptr,
2805 12497200 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_cb)[txb_1d_offset]),
2806 : NOT_USED_VALUE,
2807 12497200 : &(((int32_t*)candidate_buffer->residual_quant_coeff_ptr->buffer_cb)[txb_1d_offset]),
2808 12497200 : &(((int32_t*)candidate_buffer->recon_coeff_ptr->buffer_cb)[txb_1d_offset]),
2809 : cb_qp,
2810 : seg_qp,
2811 12497200 : context_ptr->blk_geom->tx_width_uv[tx_depth][txb_itr],
2812 12497200 : context_ptr->blk_geom->tx_height_uv[tx_depth][txb_itr],
2813 12497200 : context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
2814 12497200 : &candidate_buffer->candidate_ptr->eob[1][txb_itr],
2815 12497200 : &(cb_count_non_zero_coeffs[txb_itr]),
2816 : COMPONENT_CHROMA_CB,
2817 12497200 : context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
2818 12497200 : candidate_buffer->candidate_ptr->transform_type_uv,
2819 : candidate_buffer,
2820 : #if RDOQ_CHROMA
2821 12497200 : context_ptr->cb_txb_skip_context,
2822 12497200 : context_ptr->cb_dc_sign_context,
2823 12497200 : candidate_buffer->candidate_ptr->pred_mode >= NEARESTMV,
2824 : #else
2825 : 0,
2826 : 0,
2827 : 0,
2828 : #endif
2829 12497200 : candidate_buffer->candidate_ptr->use_intrabc,
2830 : EB_FALSE);
2831 :
2832 12496500 : if (context_ptr->spatial_sse_full_loop) {
2833 12474500 : uint32_t cb_has_coeff = cb_count_non_zero_coeffs[txb_itr] > 0;
2834 :
2835 12474500 : if (cb_has_coeff)
2836 5606660 : inv_transform_recon_wrapper(
2837 5606660 : candidate_buffer->prediction_ptr->buffer_cb,
2838 : tuCbOriginIndex,
2839 5606660 : candidate_buffer->prediction_ptr->stride_cb,
2840 5606660 : candidate_buffer->recon_ptr->buffer_cb,
2841 : tuCbOriginIndex,
2842 5606660 : candidate_buffer->recon_ptr->stride_cb,
2843 5606660 : (int32_t*) candidate_buffer->recon_coeff_ptr->buffer_cb,
2844 : txb_1d_offset,
2845 5606660 : context_ptr->hbd_mode_decision,
2846 5606660 : context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
2847 5606660 : candidate_buffer->candidate_ptr->transform_type_uv,
2848 : PLANE_TYPE_UV,
2849 5606660 : (uint32_t)candidate_buffer->candidate_ptr->eob[1][txb_itr]);
2850 : else
2851 6867860 : picture_copy(
2852 : candidate_buffer->prediction_ptr,
2853 : 0,
2854 : tuCbOriginIndex,
2855 : candidate_buffer->recon_ptr,
2856 : 0,
2857 : tuCbOriginIndex,
2858 : 0,
2859 : 0,
2860 6867860 : context_ptr->blk_geom->tx_width_uv[tx_depth][txb_itr],
2861 6867860 : context_ptr->blk_geom->tx_height_uv[tx_depth][txb_itr],
2862 : PICTURE_BUFFER_DESC_Cb_FLAG,
2863 6867860 : context_ptr->hbd_mode_decision);
2864 : }
2865 : }
2866 :
2867 17199600 : if (component_mask & PICTURE_BUFFER_DESC_Cr_FLAG) {
2868 : // Configure the Chroma Residual Ptr
2869 :
2870 12132200 : chromaResidualPtr = //(candidate_buffer->candidate_ptr->type == INTRA_MODE )?
2871 : //&(((int16_t*) candidate_buffer->intraChromaResidualPtr->buffer_cr)[tu_chroma_origin_index]):
2872 12132200 : &(((int16_t*)candidate_buffer->residual_ptr->buffer_cr)[tuCrOriginIndex]);
2873 :
2874 : // Cr Transform
2875 12132200 : av1_estimate_transform(
2876 : chromaResidualPtr,
2877 12132200 : candidate_buffer->residual_ptr->stride_cr,
2878 12132200 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_cr)[txb_1d_offset]),
2879 : NOT_USED_VALUE,
2880 12132200 : context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
2881 : &context_ptr->three_quad_energy,
2882 : context_ptr->transform_inner_array_ptr,
2883 12132200 : context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
2884 12132200 : candidate_buffer->candidate_ptr->transform_type_uv,
2885 : PLANE_TYPE_UV,
2886 : DEFAULT_SHAPE);
2887 24266900 : int32_t seg_qp = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled ?
2888 12133500 : picture_control_set_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.feature_data[context_ptr->cu_ptr->segment_id][SEG_LVL_ALT_Q] : 0;
2889 :
2890 12133500 : candidate_buffer->candidate_ptr->quantized_dc[2][0] = av1_quantize_inv_quantize(
2891 : picture_control_set_ptr,
2892 : context_ptr,
2893 12133500 : &(((int32_t*)context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr->buffer_cr)[txb_1d_offset]),
2894 : NOT_USED_VALUE,
2895 12133500 : &(((int32_t*)candidate_buffer->residual_quant_coeff_ptr->buffer_cr)[txb_1d_offset]),
2896 12133500 : &(((int32_t*)candidate_buffer->recon_coeff_ptr->buffer_cr)[txb_1d_offset]),
2897 : cb_qp,
2898 : seg_qp,
2899 12133500 : context_ptr->blk_geom->tx_width_uv[tx_depth][txb_itr],
2900 12133500 : context_ptr->blk_geom->tx_height_uv[tx_depth][txb_itr],
2901 12133500 : context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
2902 12133500 : &candidate_buffer->candidate_ptr->eob[2][txb_itr],
2903 12133500 : &(cr_count_non_zero_coeffs[txb_itr]),
2904 : COMPONENT_CHROMA_CR,
2905 12133500 : context_ptr->hbd_mode_decision ? BIT_INCREMENT_10BIT : BIT_INCREMENT_8BIT,
2906 12133500 : candidate_buffer->candidate_ptr->transform_type_uv,
2907 : candidate_buffer,
2908 : #if RDOQ_CHROMA
2909 12133500 : context_ptr->cr_txb_skip_context,
2910 12133500 : context_ptr->cr_dc_sign_context,
2911 12133500 : candidate_buffer->candidate_ptr->pred_mode >= NEARESTMV,
2912 : #else
2913 : 0,
2914 : 0,
2915 : 0,
2916 : #endif
2917 12133500 : candidate_buffer->candidate_ptr->use_intrabc,
2918 : EB_FALSE);
2919 :
2920 12132500 : if (context_ptr->spatial_sse_full_loop) {
2921 12111000 : uint32_t cr_has_coeff = cr_count_non_zero_coeffs[txb_itr] > 0;
2922 :
2923 12111000 : if (cr_has_coeff)
2924 3716780 : inv_transform_recon_wrapper(
2925 3716780 : candidate_buffer->prediction_ptr->buffer_cr,
2926 : tuCrOriginIndex,
2927 3716780 : candidate_buffer->prediction_ptr->stride_cr,
2928 3716780 : candidate_buffer->recon_ptr->buffer_cr,
2929 : tuCrOriginIndex,
2930 3716780 : candidate_buffer->recon_ptr->stride_cr,
2931 3716780 : (int32_t*) candidate_buffer->recon_coeff_ptr->buffer_cr,
2932 : txb_1d_offset,
2933 3716780 : context_ptr->hbd_mode_decision,
2934 3716780 : context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
2935 3716780 : candidate_buffer->candidate_ptr->transform_type_uv,
2936 : PLANE_TYPE_UV,
2937 3716780 : (uint32_t)candidate_buffer->candidate_ptr->eob[2][txb_itr]);
2938 : else
2939 8394220 : picture_copy(
2940 : candidate_buffer->prediction_ptr,
2941 : 0,
2942 : tuCbOriginIndex,
2943 : candidate_buffer->recon_ptr,
2944 : 0,
2945 : tuCbOriginIndex,
2946 : 0,
2947 : 0,
2948 8394220 : context_ptr->blk_geom->tx_width_uv[tx_depth][txb_itr],
2949 8394220 : context_ptr->blk_geom->tx_height_uv[tx_depth][txb_itr],
2950 : PICTURE_BUFFER_DESC_Cr_FLAG,
2951 8394220 : context_ptr->hbd_mode_decision);
2952 : }
2953 : }
2954 :
2955 17199000 : txb_1d_offset += context_ptr->blk_geom->tx_width_uv[tx_depth][txb_itr] * context_ptr->blk_geom->tx_height_uv[tx_depth][txb_itr];
2956 :
2957 17199000 : ++txb_itr;
2958 17199000 : } while (txb_itr < tuCount);
2959 17202000 : }
2960 :
2961 : //****************************************
2962 : // ************ CuFullDistortionFastTuMode ****************
2963 : //****************************************/
2964 17204900 : void cu_full_distortion_fast_tu_mode_r(
2965 : LargestCodingUnit *sb_ptr,
2966 : ModeDecisionCandidateBuffer *candidate_buffer,
2967 : ModeDecisionContext *context_ptr,
2968 : ModeDecisionCandidate *candidate_ptr,
2969 : PictureControlSet *picture_control_set_ptr,
2970 : EbPictureBufferDesc *input_picture_ptr,
2971 : uint64_t cbFullDistortion[DIST_CALC_TOTAL],
2972 : uint64_t crFullDistortion[DIST_CALC_TOTAL],
2973 : uint32_t count_non_zero_coeffs[3][MAX_NUM_OF_TU_PER_CU],
2974 : COMPONENT_TYPE component_type,
2975 : uint64_t *cb_coeff_bits,
2976 : uint64_t *cr_coeff_bits,
2977 : EbBool is_full_loop)
2978 : {
2979 : (void)sb_ptr;
2980 :
2981 : uint64_t y_tu_coeff_bits;
2982 : uint64_t cb_tu_coeff_bits;
2983 : uint64_t cr_tu_coeff_bits;
2984 : uint32_t tu_origin_index;
2985 : uint32_t txb_origin_x;
2986 : uint32_t txb_origin_y;
2987 : uint32_t currentTuIndex;
2988 : int32_t chromaShift;
2989 : uint32_t tu_chroma_origin_index;
2990 : EB_ALIGN(16) uint64_t tuFullDistortion[3][DIST_CALC_TOTAL];
2991 : EbPictureBufferDesc *transform_buffer;
2992 : uint32_t tuTotalCount;
2993 17204900 : uint32_t txb_itr = 0;
2994 17204900 : uint8_t tx_depth = candidate_buffer->candidate_ptr->tx_depth;
2995 17204900 : tuTotalCount = context_ptr->blk_geom->txb_count[tx_depth];
2996 17204900 : currentTuIndex = 0;
2997 17204900 : transform_buffer = context_ptr->trans_quant_buffers_ptr->tu_trans_coeff2_nx2_n_ptr;
2998 :
2999 17204900 : uint32_t txb_1d_offset = 0;
3000 17204900 : candidate_ptr->u_has_coeff = 0;
3001 17204900 : candidate_ptr->v_has_coeff = 0;
3002 17204900 : tuTotalCount = tx_depth ? 1 : tuTotalCount; //NM: 128x128 exeption
3003 : do {
3004 17213400 : txb_origin_x = context_ptr->blk_geom->tx_org_x[tx_depth][txb_itr];
3005 17213400 : txb_origin_y = context_ptr->blk_geom->tx_org_y[tx_depth][txb_itr];
3006 17213400 : int32_t cropped_tx_width_uv = MIN(context_ptr->blk_geom->tx_width_uv[tx_depth][txb_itr], picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.max_frame_width / 2 - ((context_ptr->sb_origin_x + ((txb_origin_x >> 3) << 3)) >> 1));
3007 17213400 : int32_t cropped_tx_height_uv = MIN(context_ptr->blk_geom->tx_height_uv[tx_depth][txb_itr], picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.max_frame_height / 2 - ((context_ptr->sb_origin_y + ((txb_origin_y >> 3) << 3)) >> 1));
3008 17213400 : tu_origin_index = txb_origin_x + txb_origin_y * candidate_buffer->residual_quant_coeff_ptr->stride_y;
3009 17213400 : tu_chroma_origin_index = txb_1d_offset;
3010 : // Reset the Bit Costs
3011 17213400 : y_tu_coeff_bits = 0;
3012 17213400 : cb_tu_coeff_bits = 0;
3013 17213400 : cr_tu_coeff_bits = 0;
3014 :
3015 17213400 : if (component_type == COMPONENT_CHROMA_CB || component_type == COMPONENT_CHROMA_CR || component_type == COMPONENT_CHROMA || component_type == COMPONENT_ALL) {
3016 : uint32_t countNonZeroCoeffsAll[3];
3017 17204500 : countNonZeroCoeffsAll[0] = count_non_zero_coeffs[0][currentTuIndex];
3018 17204500 : countNonZeroCoeffsAll[1] = count_non_zero_coeffs[1][currentTuIndex];
3019 17204500 : countNonZeroCoeffsAll[2] = count_non_zero_coeffs[2][currentTuIndex];
3020 :
3021 17204500 : if (is_full_loop && context_ptr->spatial_sse_full_loop) {
3022 7422890 : uint32_t input_chroma_tu_origin_index = (((context_ptr->sb_origin_y + ((txb_origin_y >> 3) << 3)) >> 1) + (input_picture_ptr->origin_y >> 1)) * input_picture_ptr->stride_cb + (((context_ptr->sb_origin_x + ((txb_origin_x >> 3) << 3)) >> 1) + (input_picture_ptr->origin_x >> 1));
3023 7422890 : uint32_t tu_uv_origin_index = (((txb_origin_x >> 3) << 3) + (((txb_origin_y >> 3) << 3) * candidate_buffer->residual_quant_coeff_ptr->stride_cb)) >> 1;
3024 :
3025 14845800 : EbSpatialFullDistType spatial_full_dist_type_fun = context_ptr->hbd_mode_decision ?
3026 7422890 : full_distortion_kernel16_bits : spatial_full_distortion_kernel;
3027 :
3028 14844900 : tuFullDistortion[1][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun(
3029 : input_picture_ptr->buffer_cb,
3030 : input_chroma_tu_origin_index,
3031 7422890 : input_picture_ptr->stride_cb,
3032 7422890 : candidate_buffer->prediction_ptr->buffer_cb,
3033 : tu_uv_origin_index,
3034 7422890 : candidate_buffer->prediction_ptr->stride_cb,
3035 : cropped_tx_width_uv,
3036 : cropped_tx_height_uv);
3037 :
3038 14844000 : tuFullDistortion[1][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun(
3039 : input_picture_ptr->buffer_cb,
3040 : input_chroma_tu_origin_index,
3041 7422010 : input_picture_ptr->stride_cb,
3042 7422010 : candidate_buffer->recon_ptr->buffer_cb,
3043 : tu_uv_origin_index,
3044 7422010 : candidate_buffer->recon_ptr->stride_cb,
3045 : cropped_tx_width_uv,
3046 : cropped_tx_height_uv);
3047 :
3048 14843900 : tuFullDistortion[2][DIST_CALC_PREDICTION] = spatial_full_dist_type_fun(
3049 : input_picture_ptr->buffer_cr,
3050 : input_chroma_tu_origin_index,
3051 7421970 : input_picture_ptr->stride_cr,
3052 7421970 : candidate_buffer->prediction_ptr->buffer_cr,
3053 : tu_uv_origin_index,
3054 7421970 : candidate_buffer->prediction_ptr->stride_cr,
3055 : cropped_tx_width_uv,
3056 : cropped_tx_height_uv);
3057 :
3058 14843600 : tuFullDistortion[2][DIST_CALC_RESIDUAL] = spatial_full_dist_type_fun(
3059 : input_picture_ptr->buffer_cr,
3060 : input_chroma_tu_origin_index,
3061 7421880 : input_picture_ptr->stride_cr,
3062 7421880 : candidate_buffer->recon_ptr->buffer_cr,
3063 : tu_uv_origin_index,
3064 7421880 : candidate_buffer->recon_ptr->stride_cr,
3065 : cropped_tx_width_uv,
3066 : cropped_tx_height_uv);
3067 7421760 : tuFullDistortion[1][DIST_CALC_PREDICTION] <<= 4;
3068 7421760 : tuFullDistortion[1][DIST_CALC_RESIDUAL] <<= 4;
3069 7421760 : tuFullDistortion[2][DIST_CALC_PREDICTION] <<= 4;
3070 7421760 : tuFullDistortion[2][DIST_CALC_RESIDUAL] <<= 4;
3071 : }
3072 : else {
3073 : // *Full Distortion (SSE)
3074 : // *Note - there are known issues with how this distortion metric is currently
3075 : // calculated. The amount of scaling between the two arrays is not
3076 : // equivalent.
3077 :
3078 9781610 : picture_full_distortion32_bits(
3079 : transform_buffer,
3080 : NOT_USED_VALUE,
3081 : tu_chroma_origin_index,
3082 : candidate_buffer->recon_coeff_ptr,
3083 : NOT_USED_VALUE,
3084 : tu_chroma_origin_index,
3085 : NOT_USED_VALUE,
3086 : NOT_USED_VALUE,
3087 9781610 : context_ptr->blk_geom->tx_width_uv[tx_depth][txb_itr],
3088 9781610 : context_ptr->blk_geom->tx_height_uv[tx_depth][txb_itr],
3089 : tuFullDistortion[0],
3090 : tuFullDistortion[1],
3091 : tuFullDistortion[2],
3092 : countNonZeroCoeffsAll[0],
3093 : countNonZeroCoeffsAll[1],
3094 : countNonZeroCoeffsAll[2],
3095 : component_type);
3096 9791380 : TxSize txSize = context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr];
3097 9791380 : chromaShift = (MAX_TX_SCALE - av1_get_tx_scale(txSize)) * 2;
3098 9790880 : tuFullDistortion[1][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(tuFullDistortion[1][DIST_CALC_RESIDUAL], chromaShift);
3099 9790880 : tuFullDistortion[1][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(tuFullDistortion[1][DIST_CALC_PREDICTION], chromaShift);
3100 9790880 : tuFullDistortion[2][DIST_CALC_RESIDUAL] = RIGHT_SIGNED_SHIFT(tuFullDistortion[2][DIST_CALC_RESIDUAL], chromaShift);
3101 9790880 : tuFullDistortion[2][DIST_CALC_PREDICTION] = RIGHT_SIGNED_SHIFT(tuFullDistortion[2][DIST_CALC_PREDICTION], chromaShift);
3102 : }
3103 : //CHROMA-ONLY
3104 17212600 : av1_tu_estimate_coeff_bits(
3105 : context_ptr,
3106 : 0,//allow_update_cdf,
3107 : NULL,//FRAME_CONTEXT *ec_ctx,
3108 : picture_control_set_ptr,
3109 : candidate_buffer,
3110 : tu_origin_index,
3111 : tu_chroma_origin_index,
3112 : context_ptr->coeff_est_entropy_coder_ptr,
3113 : candidate_buffer->residual_quant_coeff_ptr,
3114 : count_non_zero_coeffs[0][currentTuIndex],
3115 17212600 : count_non_zero_coeffs[1][currentTuIndex],
3116 17212600 : count_non_zero_coeffs[2][currentTuIndex],
3117 : &y_tu_coeff_bits,
3118 : &cb_tu_coeff_bits,
3119 : &cr_tu_coeff_bits,
3120 17212600 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
3121 17212600 : context_ptr->blk_geom->txsize_uv[tx_depth][txb_itr],
3122 17212600 : candidate_buffer->candidate_ptr->transform_type[txb_itr],
3123 17212600 : candidate_buffer->candidate_ptr->transform_type_uv,
3124 : component_type);
3125 :
3126 : // OMK Useless ? We don't calculate Chroma CBF here
3127 17194000 : av1_tu_calc_cost(
3128 : candidate_ptr,
3129 17194000 : context_ptr->luma_txb_skip_context,
3130 : currentTuIndex,
3131 : count_non_zero_coeffs[0][currentTuIndex],
3132 17194000 : count_non_zero_coeffs[1][currentTuIndex],
3133 17194000 : count_non_zero_coeffs[2][currentTuIndex],
3134 : tuFullDistortion[0],
3135 : tuFullDistortion[1],
3136 : tuFullDistortion[2],
3137 : component_type,
3138 : &y_tu_coeff_bits,
3139 : &cb_tu_coeff_bits,
3140 : &cr_tu_coeff_bits,
3141 17194000 : context_ptr->blk_geom->txsize[tx_depth][txb_itr],
3142 17194000 : context_ptr->full_lambda);
3143 :
3144 17199900 : *cb_coeff_bits += cb_tu_coeff_bits;
3145 17199900 : *cr_coeff_bits += cr_tu_coeff_bits;
3146 17199900 : cbFullDistortion[DIST_CALC_RESIDUAL] += tuFullDistortion[1][DIST_CALC_RESIDUAL];
3147 17199900 : crFullDistortion[DIST_CALC_RESIDUAL] += tuFullDistortion[2][DIST_CALC_RESIDUAL];
3148 17199900 : cbFullDistortion[DIST_CALC_PREDICTION] += tuFullDistortion[1][DIST_CALC_PREDICTION];
3149 17199900 : crFullDistortion[DIST_CALC_PREDICTION] += tuFullDistortion[2][DIST_CALC_PREDICTION];
3150 : }
3151 :
3152 17208800 : txb_1d_offset += context_ptr->blk_geom->tx_width_uv[tx_depth][txb_itr] * context_ptr->blk_geom->tx_height_uv[tx_depth][txb_itr];
3153 17208800 : currentTuIndex++;
3154 :
3155 17208800 : ++txb_itr;
3156 17208800 : } while (txb_itr < tuTotalCount);
3157 17200300 : }
3158 :
3159 : /***************************************
3160 : * Check merge_block algorithm
3161 : ***************************************/
3162 1039640 : EbBool merge_1D_inter_block(
3163 : ModeDecisionContext *context_ptr,
3164 : uint32_t sq_idx,
3165 : uint32_t nsq_idx) {
3166 1039640 : EbBool merge_blocks = EB_FALSE;
3167 1039640 : CodingUnit *parent_cu_ptr = &context_ptr->md_cu_arr_nsq[sq_idx];
3168 1039640 : CodingUnit *child_cu_ptr = &context_ptr->md_cu_arr_nsq[nsq_idx];
3169 1039640 : int parent_diriction = parent_cu_ptr->prediction_unit_array[0].inter_pred_direction_index;
3170 1039640 : int parent_mv_l0 = parent_cu_ptr->prediction_unit_array[0].mv[REF_LIST_0].mv_union;
3171 1039640 : int parent_mv_l1 = parent_cu_ptr->prediction_unit_array[0].mv[REF_LIST_1].mv_union;
3172 1039640 : int child_0_diriction = child_cu_ptr->prediction_unit_array[0].inter_pred_direction_index;
3173 1039640 : int child_0_mv_l0 = child_cu_ptr->prediction_unit_array[0].mv[REF_LIST_0].mv_union;
3174 1039640 : int child_0_mv_l1 = child_cu_ptr->prediction_unit_array[0].mv[REF_LIST_1].mv_union;
3175 1039640 : int child_eob = child_cu_ptr->block_has_coeff;
3176 1039640 : if (parent_diriction == child_0_diriction && child_eob == 0) {
3177 607059 : switch (parent_diriction) {
3178 96317 : case UNI_PRED_LIST_0:
3179 96317 : if (parent_mv_l0 == child_0_mv_l0)
3180 31917 : merge_blocks = EB_TRUE;
3181 96317 : break;
3182 25603 : case UNI_PRED_LIST_1:
3183 25603 : if (parent_mv_l1 == child_0_mv_l1)
3184 8147 : merge_blocks = EB_TRUE;
3185 25603 : break;
3186 469675 : case BI_PRED:
3187 469675 : if (parent_mv_l0 == child_0_mv_l0 &&
3188 : parent_mv_l1 == child_0_mv_l1) {
3189 133319 : merge_blocks = EB_TRUE;
3190 : }
3191 469675 : break;
3192 15464 : default:
3193 15464 : merge_blocks = EB_FALSE;
3194 15464 : break;
3195 : }
3196 432581 : }
3197 1039640 : return merge_blocks;
3198 : }
3199 924655 : void d1_non_square_block_decision(
3200 : ModeDecisionContext *context_ptr
3201 : #if ADD_SUPPORT_TO_SKIP_PART_N
3202 : , uint32_t d1_block_itr
3203 : #endif
3204 : )
3205 : {
3206 : //compute total cost for the whole block partition
3207 924655 : uint64_t tot_cost = 0;
3208 924655 : uint32_t first_blk_idx = context_ptr->cu_ptr->mds_idx - (context_ptr->blk_geom->totns - 1);//index of first block in this partition
3209 : uint32_t blk_it;
3210 924655 : uint32_t merge_block_cnt = 0;
3211 924655 : EbBool merge_block_flag = EB_FALSE;
3212 2746620 : for (blk_it = 0; blk_it < context_ptr->blk_geom->totns; blk_it++)
3213 : {
3214 1821970 : tot_cost += context_ptr->md_local_cu_unit[first_blk_idx + blk_it].cost;
3215 1821970 : if (context_ptr->blk_geom->sqi_mds != first_blk_idx + blk_it)
3216 1412660 : if (context_ptr->md_local_cu_unit[context_ptr->blk_geom->sqi_mds].avail_blk_flag)
3217 1039640 : merge_block_cnt += merge_1D_inter_block(context_ptr, context_ptr->blk_geom->sqi_mds, first_blk_idx + blk_it);
3218 : }
3219 924654 : if (context_ptr->blk_geom->bsize > BLOCK_4X4) {
3220 738786 : uint64_t split_cost = 0;
3221 738786 : uint32_t parent_depth_idx_mds = context_ptr->blk_geom->sqi_mds;
3222 738786 : av1_split_flag_rate(
3223 738786 : context_ptr->sb_ptr->picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr,
3224 : context_ptr,
3225 738786 : &context_ptr->md_cu_arr_nsq[parent_depth_idx_mds],
3226 : 0,
3227 738786 : from_shape_to_part[context_ptr->blk_geom->shape],
3228 : &split_cost,
3229 738786 : context_ptr->full_lambda,
3230 : context_ptr->md_rate_estimation_ptr,
3231 738786 : context_ptr->sb_ptr->picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->max_sb_depth);
3232 :
3233 738850 : tot_cost += split_cost;
3234 : }
3235 924718 : if (merge_block_cnt == context_ptr->blk_geom->totns) merge_block_flag = EB_TRUE;
3236 : #if ADD_SUPPORT_TO_SKIP_PART_N
3237 924718 : if (d1_block_itr == 0 || (tot_cost < context_ptr->md_local_cu_unit[context_ptr->blk_geom->sqi_mds].cost && merge_block_flag == EB_FALSE))
3238 : #else
3239 : if (context_ptr->blk_geom->shape == PART_N || (tot_cost < context_ptr->md_local_cu_unit[context_ptr->blk_geom->sqi_mds].cost && merge_block_flag == EB_FALSE))
3240 : #endif
3241 : {
3242 : //store best partition cost in parent square
3243 421493 : context_ptr->md_local_cu_unit[context_ptr->blk_geom->sqi_mds].cost = tot_cost;
3244 421493 : context_ptr->md_cu_arr_nsq[context_ptr->blk_geom->sqi_mds].part = from_shape_to_part[context_ptr->blk_geom->shape];
3245 421493 : context_ptr->md_cu_arr_nsq[context_ptr->blk_geom->sqi_mds].best_d1_blk = first_blk_idx;
3246 : }
3247 924718 : }
3248 :
3249 : /// compute the cost of curr depth, and the depth above
3250 100178 : void compute_depth_costs(
3251 : ModeDecisionContext *context_ptr,
3252 : SequenceControlSet *sequence_control_set_ptr,
3253 : uint32_t curr_depth_mds,
3254 : uint32_t above_depth_mds,
3255 : uint32_t step,
3256 : uint64_t *above_depth_cost,
3257 : uint64_t *curr_depth_cost)
3258 : {
3259 100178 : uint64_t above_non_split_rate = 0;
3260 100178 : uint64_t above_split_rate = 0;
3261 :
3262 : /*
3263 : ___________
3264 : | | |
3265 : |blk0 |blk1 |
3266 : |-----|-----|
3267 : |blk2 |blk3 |
3268 : |_____|_____|
3269 : */
3270 : // current depth blocks
3271 100178 : uint32_t curr_depth_blk0_mds = curr_depth_mds - 3 * step;
3272 100178 : uint32_t curr_depth_blk1_mds = curr_depth_mds - 2 * step;
3273 100178 : uint32_t curr_depth_blk2_mds = curr_depth_mds - 1 * step;
3274 100178 : uint32_t curr_depth_blk3_mds = curr_depth_mds;
3275 :
3276 : // Rate of not spliting the current depth (Depth != 4) in case the children were omitted by MDC
3277 100178 : uint64_t curr_non_split_rate_blk0 = 0;
3278 100178 : uint64_t curr_non_split_rate_blk1 = 0;
3279 100178 : uint64_t curr_non_split_rate_blk2 = 0;
3280 100178 : uint64_t curr_non_split_rate_blk3 = 0;
3281 :
3282 100178 : context_ptr->md_local_cu_unit[above_depth_mds].left_neighbor_mode = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].left_neighbor_mode;
3283 100178 : context_ptr->md_local_cu_unit[above_depth_mds].left_neighbor_depth = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].left_neighbor_depth;
3284 100178 : context_ptr->md_local_cu_unit[above_depth_mds].top_neighbor_mode = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].top_neighbor_mode;
3285 100178 : context_ptr->md_local_cu_unit[above_depth_mds].top_neighbor_depth = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].top_neighbor_depth;
3286 100178 : context_ptr->md_local_cu_unit[above_depth_mds].left_neighbor_partition = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].left_neighbor_partition;
3287 100178 : context_ptr->md_local_cu_unit[above_depth_mds].above_neighbor_partition = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].above_neighbor_partition;
3288 :
3289 : // Compute above depth cost
3290 100178 : if (context_ptr->md_local_cu_unit[above_depth_mds].tested_cu_flag == EB_TRUE)
3291 : {
3292 96405 : *above_depth_cost = context_ptr->md_local_cu_unit[above_depth_mds].cost + above_non_split_rate;
3293 : // Compute curr depth cost
3294 96405 : av1_split_flag_rate(
3295 : sequence_control_set_ptr,
3296 : context_ptr,
3297 96405 : &context_ptr->md_cu_arr_nsq[above_depth_mds],
3298 : 0,
3299 : PARTITION_SPLIT,
3300 : &above_split_rate,
3301 96405 : context_ptr->full_lambda,
3302 : context_ptr->md_rate_estimation_ptr,
3303 96405 : sequence_control_set_ptr->max_sb_depth);
3304 : }
3305 : else
3306 3773 : *above_depth_cost = MAX_MODE_COST;
3307 100178 : if (context_ptr->blk_geom->bsize > BLOCK_4X4) {
3308 38491 : if (context_ptr->md_local_cu_unit[curr_depth_blk0_mds].tested_cu_flag)
3309 38491 : if (context_ptr->md_cu_arr_nsq[curr_depth_blk0_mds].mdc_split_flag == 0)
3310 30293 : av1_split_flag_rate(
3311 : sequence_control_set_ptr,
3312 : context_ptr,
3313 30293 : &context_ptr->md_cu_arr_nsq[curr_depth_blk0_mds],
3314 : 0,
3315 : PARTITION_NONE,
3316 : &curr_non_split_rate_blk0,
3317 30293 : context_ptr->full_lambda,
3318 : context_ptr->md_rate_estimation_ptr,
3319 30293 : sequence_control_set_ptr->max_sb_depth);
3320 :
3321 38490 : if (context_ptr->md_local_cu_unit[curr_depth_blk1_mds].tested_cu_flag)
3322 38486 : if (context_ptr->md_cu_arr_nsq[curr_depth_blk1_mds].mdc_split_flag == 0)
3323 30293 : av1_split_flag_rate(
3324 : sequence_control_set_ptr,
3325 : context_ptr,
3326 30293 : &context_ptr->md_cu_arr_nsq[curr_depth_blk1_mds],
3327 : 0,
3328 : PARTITION_NONE,
3329 : &curr_non_split_rate_blk1,
3330 30293 : context_ptr->full_lambda,
3331 : context_ptr->md_rate_estimation_ptr,
3332 30293 : sequence_control_set_ptr->max_sb_depth);
3333 :
3334 38490 : if (context_ptr->md_local_cu_unit[curr_depth_blk2_mds].tested_cu_flag)
3335 38491 : if (context_ptr->md_cu_arr_nsq[curr_depth_blk2_mds].mdc_split_flag == 0)
3336 30293 : av1_split_flag_rate(
3337 : sequence_control_set_ptr,
3338 : context_ptr,
3339 30293 : &context_ptr->md_cu_arr_nsq[curr_depth_blk2_mds],
3340 : 0,
3341 : PARTITION_NONE,
3342 : &curr_non_split_rate_blk2,
3343 30293 : context_ptr->full_lambda,
3344 : context_ptr->md_rate_estimation_ptr,
3345 30293 : sequence_control_set_ptr->max_sb_depth);
3346 :
3347 38490 : if (context_ptr->md_local_cu_unit[curr_depth_blk3_mds].tested_cu_flag)
3348 38491 : if (context_ptr->md_cu_arr_nsq[curr_depth_blk3_mds].mdc_split_flag == 0)
3349 30293 : av1_split_flag_rate(
3350 : sequence_control_set_ptr,
3351 : context_ptr,
3352 30293 : &context_ptr->md_cu_arr_nsq[curr_depth_blk3_mds],
3353 : 0,
3354 : PARTITION_NONE,
3355 : &curr_non_split_rate_blk3,
3356 30293 : context_ptr->full_lambda,
3357 : context_ptr->md_rate_estimation_ptr,
3358 30293 : sequence_control_set_ptr->max_sb_depth);
3359 : }
3360 : //curr_non_split_rate_344 = splitflag_mdc_344 || 4x4 ? 0 : compute;
3361 :
3362 100177 : *curr_depth_cost =
3363 100177 : context_ptr->md_local_cu_unit[curr_depth_mds].cost + curr_non_split_rate_blk3 +
3364 100177 : context_ptr->md_local_cu_unit[curr_depth_mds - 1 * step].cost + curr_non_split_rate_blk2 +
3365 100177 : context_ptr->md_local_cu_unit[curr_depth_mds - 2 * step].cost + curr_non_split_rate_blk1 +
3366 100177 : context_ptr->md_local_cu_unit[curr_depth_mds - 3 * step].cost + curr_non_split_rate_blk0 +
3367 : above_split_rate;
3368 100177 : }
3369 :
3370 409400 : uint32_t d2_inter_depth_block_decision(
3371 : ModeDecisionContext *context_ptr,
3372 : uint32_t blk_mds,
3373 : LargestCodingUnit *tb_ptr,
3374 : uint32_t lcuAddr,
3375 : uint32_t tbOriginX,
3376 : uint32_t tbOriginY,
3377 : uint64_t full_lambda,
3378 : MdRateEstimationContext *md_rate_estimation_ptr,
3379 : PictureControlSet *picture_control_set_ptr)
3380 : {
3381 : UNUSED(tb_ptr);
3382 : UNUSED(lcuAddr);
3383 : UNUSED(tbOriginX);
3384 : UNUSED(tbOriginY);
3385 : UNUSED(full_lambda);
3386 : UNUSED(md_rate_estimation_ptr);
3387 :
3388 : uint32_t lastCuIndex, d0_idx_mds, d1_idx_mds, d2_idx_mds, top_left_idx_mds;
3389 : UNUSED(top_left_idx_mds);
3390 : UNUSED(d2_idx_mds);
3391 : UNUSED(d1_idx_mds);
3392 : UNUSED(d0_idx_mds);
3393 409400 : uint64_t parent_depth_cost = 0, current_depth_cost = 0;
3394 409400 : SequenceControlSet *sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
3395 : EbBool lastDepthFlag;
3396 : const BlockGeom * blk_geom;
3397 :
3398 409400 : lastDepthFlag = context_ptr->md_cu_arr_nsq[blk_mds].split_flag == EB_FALSE ? EB_TRUE : EB_FALSE;
3399 409400 : d1_idx_mds = blk_mds;
3400 409400 : d2_idx_mds = blk_mds;
3401 409400 : lastCuIndex = blk_mds;
3402 409400 : blk_geom = get_blk_geom_mds(blk_mds);
3403 409366 : uint32_t parent_depth_idx_mds = blk_mds;
3404 409366 : uint32_t current_depth_idx_mds = blk_mds;
3405 :
3406 409366 : if (lastDepthFlag) {
3407 413224 : while (blk_geom->is_last_quadrant) {
3408 : //get parent idx
3409 100178 : parent_depth_idx_mds = current_depth_idx_mds - parent_depth_offset[sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128][blk_geom->depth];
3410 100178 : if (picture_control_set_ptr->slice_type == I_SLICE && parent_depth_idx_mds == 0 && sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128)
3411 0 : parent_depth_cost = MAX_MODE_COST;
3412 : else
3413 100178 : compute_depth_costs(context_ptr, sequence_control_set_ptr, current_depth_idx_mds, parent_depth_idx_mds, ns_depth_offset[sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128][blk_geom->depth], &parent_depth_cost, ¤t_depth_cost);
3414 100185 : if (!sequence_control_set_ptr->sb_geom[lcuAddr].block_is_allowed[parent_depth_idx_mds])
3415 20400 : parent_depth_cost = MAX_MODE_COST;
3416 100185 : if (parent_depth_cost <= current_depth_cost) {
3417 73988 : context_ptr->md_cu_arr_nsq[parent_depth_idx_mds].split_flag = EB_FALSE;
3418 73988 : context_ptr->md_local_cu_unit[parent_depth_idx_mds].cost = parent_depth_cost;
3419 73988 : lastCuIndex = parent_depth_idx_mds;
3420 : }
3421 : else {
3422 26197 : context_ptr->md_local_cu_unit[parent_depth_idx_mds].cost = current_depth_cost;
3423 26197 : context_ptr->md_cu_arr_nsq[parent_depth_idx_mds].part = PARTITION_SPLIT;
3424 : }
3425 :
3426 : //setup next parent inter depth
3427 100185 : blk_geom = get_blk_geom_mds(parent_depth_idx_mds);
3428 100181 : current_depth_idx_mds = parent_depth_idx_mds;
3429 : }
3430 : }
3431 :
3432 409369 : return lastCuIndex;
3433 : }
3434 299913 : void compute_depth_costs_md_skip(
3435 : ModeDecisionContext *context_ptr,
3436 : SequenceControlSet *sequence_control_set_ptr,
3437 : uint32_t above_depth_mds,
3438 : uint32_t step,
3439 : uint64_t *above_depth_cost,
3440 : uint64_t *curr_depth_cost)
3441 : {
3442 299913 : uint64_t above_non_split_rate = 0;
3443 299913 : uint64_t above_split_rate = 0;
3444 299913 : *curr_depth_cost = 0;
3445 : // sum the previous ones
3446 895123 : for (int i = 1; i < context_ptr->blk_geom->quadi + 1; i++) {
3447 595150 : uint32_t curr_depth_cur_blk_mds = context_ptr->blk_geom->sqi_mds - i * step;
3448 595150 : uint64_t curr_non_split_rate_blk = 0;
3449 595150 : if (context_ptr->blk_geom->bsize > BLOCK_4X4) {
3450 316368 : if (context_ptr->md_local_cu_unit[curr_depth_cur_blk_mds].tested_cu_flag)
3451 316340 : if (context_ptr->md_cu_arr_nsq[curr_depth_cur_blk_mds].mdc_split_flag == 0)
3452 184382 : av1_split_flag_rate(
3453 : sequence_control_set_ptr,
3454 : context_ptr,
3455 184382 : &context_ptr->md_cu_arr_nsq[curr_depth_cur_blk_mds],
3456 : 0,
3457 : PARTITION_NONE,
3458 : &curr_non_split_rate_blk,
3459 184382 : context_ptr->full_lambda,
3460 : context_ptr->md_rate_estimation_ptr,
3461 184382 : sequence_control_set_ptr->max_sb_depth);
3462 : }
3463 595210 : *curr_depth_cost +=
3464 595210 : context_ptr->md_local_cu_unit[curr_depth_cur_blk_mds].cost + curr_non_split_rate_blk;
3465 : }
3466 : /*
3467 : ___________
3468 : | | |
3469 : |blk0 |blk1 |
3470 : |-----|-----|
3471 : |blk2 |blk3 |
3472 : |_____|_____|
3473 : */
3474 : // current depth blocks
3475 299973 : uint32_t curr_depth_blk0_mds = context_ptr->blk_geom->sqi_mds - context_ptr->blk_geom->quadi * step;
3476 :
3477 299973 : context_ptr->md_local_cu_unit[above_depth_mds].left_neighbor_mode = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].left_neighbor_mode;
3478 299973 : context_ptr->md_local_cu_unit[above_depth_mds].left_neighbor_depth = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].left_neighbor_depth;
3479 299973 : context_ptr->md_local_cu_unit[above_depth_mds].top_neighbor_mode = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].top_neighbor_mode;
3480 299973 : context_ptr->md_local_cu_unit[above_depth_mds].top_neighbor_depth = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].top_neighbor_depth;
3481 299973 : context_ptr->md_local_cu_unit[above_depth_mds].left_neighbor_partition = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].left_neighbor_partition;
3482 299973 : context_ptr->md_local_cu_unit[above_depth_mds].above_neighbor_partition = context_ptr->md_local_cu_unit[curr_depth_blk0_mds].above_neighbor_partition;
3483 :
3484 : // Compute above depth cost
3485 299973 : if (context_ptr->md_local_cu_unit[above_depth_mds].tested_cu_flag == EB_TRUE)
3486 : {
3487 288051 : *above_depth_cost = context_ptr->md_local_cu_unit[above_depth_mds].cost + above_non_split_rate;
3488 : // Compute curr depth cost
3489 288051 : av1_split_flag_rate(
3490 : sequence_control_set_ptr,
3491 : context_ptr,
3492 288051 : &context_ptr->md_cu_arr_nsq[above_depth_mds],
3493 : 0,
3494 : PARTITION_SPLIT,
3495 : &above_split_rate,
3496 288051 : context_ptr->full_lambda,
3497 : context_ptr->md_rate_estimation_ptr,
3498 288051 : sequence_control_set_ptr->max_sb_depth);
3499 : }
3500 : else
3501 11922 : *above_depth_cost = MAX_MODE_COST;
3502 :
3503 :
3504 299953 : *curr_depth_cost +=
3505 : above_split_rate;
3506 299953 : }
|