Line data Source code
1 : /*
2 : * Copyright(c) 2019 Intel Corporation
3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 : */
5 :
6 : /*
7 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
8 : *
9 : * This source code is subject to the terms of the BSD 2 Clause License and
10 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
11 : * was not distributed with this source code in the LICENSE file, you can
12 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
13 : * Media Patent License 1.0 was not distributed with this source code in the
14 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
15 : */
16 :
17 : #include <stdlib.h>
18 :
19 : #include "EbPictureControlSet.h"
20 : #include "EbReferenceObject.h"
21 :
22 : #include "EbInterPrediction.h"
23 : #include "EbSvtAv1.h"
24 : #include "EbDefinitions.h"
25 : #include "EbAdaptiveMotionVectorPrediction.h"
26 :
27 : #include "EbModeDecisionProcess.h"
28 :
29 : #include "convolve.h"
30 : #include "aom_dsp_rtcd.h"
31 : #include "EbRateDistortionCost.h"
32 :
33 : #define MVBOUNDLOW 36 // (80-71)<<2 // 80 = ReferencePadding ; minus 71 is derived from the expression -64 + 1 - 8, and plus 7 is derived from expression -1 + 8
34 : #define MVBOUNDHIGH 348 // (80+7)<<2
35 : #define REFPADD_QPEL 320 // (16+64)<<2
36 :
37 : #define AOM_INTERP_EXTEND 4
38 :
39 : #define SCALE_NUMERATOR 8
40 :
41 : #define SCALE_SUBPEL_BITS 10
42 : #define SCALE_SUBPEL_SHIFTS (1 << SCALE_SUBPEL_BITS)
43 : #define SCALE_SUBPEL_MASK (SCALE_SUBPEL_SHIFTS - 1)
44 : #define SCALE_EXTRA_BITS (SCALE_SUBPEL_BITS - SUBPEL_BITS)
45 : #define SCALE_EXTRA_OFF ((1 << SCALE_EXTRA_BITS) / 2)
46 :
47 : #define BIL_SUBPEL_BITS 3
48 : #define BIL_SUBPEL_SHIFTS (1 << BIL_SUBPEL_BITS)
49 :
50 : #define ROUND0_BITS 3
51 : #define COMPOUND_ROUND1_BITS 7
52 :
53 : static EB_AV1_INTER_PREDICTION_FUNC_PTR av1_inter_prediction_function_table[2] =
54 : {
55 : av1_inter_prediction,
56 : av1_inter_prediction_hbd
57 : };
58 :
59 : /* TODO: Add scaling of reference frame support later */
60 : // Note: Expect val to be in q4 precision
61 0 : static INLINE int32_t scaled_x(int32_t val, const ScaleFactors *sf) {
62 0 : const int off =
63 0 : (sf->x_scale_fp - (1 << REF_SCALE_SHIFT)) * (1 << (SUBPEL_BITS - 1));
64 0 : const int64_t tval = (int64_t)val * sf->x_scale_fp + off;
65 0 : return (int)ROUND_POWER_OF_TWO_SIGNED_64(tval,
66 : REF_SCALE_SHIFT - SCALE_EXTRA_BITS);
67 : }
68 :
69 : // Note: Expect val to be in q4 precision
70 0 : static INLINE int32_t scaled_y(int32_t val, const ScaleFactors *sf) {
71 0 : const int32_t off =
72 0 : (sf->y_scale_fp - (1 << REF_SCALE_SHIFT)) * (1 << (SUBPEL_BITS - 1));
73 0 : const int64_t tval = (int64_t)val * sf->y_scale_fp + off;
74 0 : return (int32_t)ROUND_POWER_OF_TWO_SIGNED_64(tval,
75 : REF_SCALE_SHIFT - SCALE_EXTRA_BITS);
76 : }
77 :
78 : // Note: Expect val to be in q4 precision
79 0 : static int32_t unscaled_value(int32_t val, const ScaleFactors *sf) {
80 : (void)sf;
81 0 : return val << SCALE_EXTRA_BITS;
82 : }
83 :
84 812 : static int32_t get_fixed_point_scale_factor(int32_t other_size, int32_t this_size) {
85 : // Calculate scaling factor once for each reference frame
86 : // and use fixed point scaling factors in decoding and encoding routines.
87 : // Hardware implementations can calculate scale factor in device driver
88 : // and use multiplication and shifting on hardware instead of division.
89 812 : return ((other_size << REF_SCALE_SHIFT) + this_size / 2) / this_size;
90 : }
91 :
92 : // Given the fixed point scale, calculate coarse point scale.
93 812 : static int32_t fixed_point_scale_to_coarse_point_scale(int32_t scale_fp) {
94 812 : return ROUND_POWER_OF_TWO(scale_fp, REF_SCALE_SHIFT - SCALE_SUBPEL_BITS);
95 : }
96 :
97 : // Note: x and y are integer precision, mvq4 is q4 precision.
98 0 : MV32 av1_scale_mv(const MV *mvq4, int x, int y,
99 : const ScaleFactors *sf) {
100 0 : const int x_off_q4 = scaled_x(x << SUBPEL_BITS, sf);
101 0 : const int y_off_q4 = scaled_y(y << SUBPEL_BITS, sf);
102 0 : const MV32 res = { scaled_y((y << SUBPEL_BITS) + mvq4->row, sf) - y_off_q4,
103 0 : scaled_x((x << SUBPEL_BITS) + mvq4->col, sf) - x_off_q4 };
104 0 : return res;
105 : }
106 :
107 406 : void av1_setup_scale_factors_for_frame(ScaleFactors *sf, int other_w,
108 : int other_h, int this_w, int this_h) {
109 406 : if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) {
110 0 : sf->x_scale_fp = REF_INVALID_SCALE;
111 0 : sf->y_scale_fp = REF_INVALID_SCALE;
112 0 : return;
113 : }
114 :
115 406 : sf->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w);
116 406 : sf->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h);
117 :
118 406 : sf->x_step_q4 = fixed_point_scale_to_coarse_point_scale(sf->x_scale_fp);
119 406 : sf->y_step_q4 = fixed_point_scale_to_coarse_point_scale(sf->y_scale_fp);
120 :
121 406 : if (av1_is_scaled(sf)) {
122 0 : sf->scale_value_x = scaled_x;
123 0 : sf->scale_value_y = scaled_y;
124 : }
125 : else {
126 406 : sf->scale_value_x = unscaled_value;
127 406 : sf->scale_value_y = unscaled_value;
128 : }
129 : }
130 :
131 39670 : static INLINE int32_t has_scale(int32_t xs, int32_t ys) {
132 39670 : return xs != SCALE_SUBPEL_SHIFTS || ys != SCALE_SUBPEL_SHIFTS;
133 : }
134 :
135 39670 : static INLINE void revert_scale_extra_bits(SubpelParams *sp) {
136 39670 : sp->subpel_x >>= SCALE_EXTRA_BITS;
137 39670 : sp->subpel_y >>= SCALE_EXTRA_BITS;
138 39670 : sp->xs >>= SCALE_EXTRA_BITS;
139 39670 : sp->ys >>= SCALE_EXTRA_BITS;
140 39670 : assert(sp->subpel_x < SUBPEL_SHIFTS);
141 39670 : assert(sp->subpel_y < SUBPEL_SHIFTS);
142 39670 : assert(sp->xs <= SUBPEL_SHIFTS);
143 39670 : assert(sp->ys <= SUBPEL_SHIFTS);
144 39670 : }
145 :
146 : extern void av1_set_ref_frame(MvReferenceFrame *rf,
147 : int8_t ref_frame_type);
148 :
149 443962000 : static INLINE MV clamp_mv_to_umv_border_sb(const MacroBlockD *xd,
150 : const MV *src_mv, int32_t bw, int32_t bh,
151 : int32_t ss_x, int32_t ss_y) {
152 : // If the MV points so far into the UMV border that no visible pixels
153 : // are used for reconstruction, the subpel part of the MV can be
154 : // discarded and the MV limited to 16 pixels with equivalent results.
155 443962000 : const int32_t spel_left = (AOM_INTERP_EXTEND + bw) << SUBPEL_BITS;
156 443962000 : const int32_t spel_right = spel_left - SUBPEL_SHIFTS;
157 443962000 : const int32_t spel_top = (AOM_INTERP_EXTEND + bh) << SUBPEL_BITS;
158 443962000 : const int32_t spel_bottom = spel_top - SUBPEL_SHIFTS;
159 443962000 : MV clamped_mv = { (int16_t)(src_mv->row * (1 << (1 - ss_y))),
160 443962000 : (int16_t)(src_mv->col * (1 << (1 - ss_x))) };
161 443962000 : assert(ss_x <= 1);
162 443962000 : assert(ss_y <= 1);
163 :
164 443962000 : clamp_mv(&clamped_mv,
165 443962000 : xd->mb_to_left_edge * (1 << (1 - ss_x)) - spel_left,
166 443962000 : xd->mb_to_right_edge * (1 << (1 - ss_x)) + spel_right,
167 443962000 : xd->mb_to_top_edge * (1 << (1 - ss_y)) - spel_top,
168 443962000 : xd->mb_to_bottom_edge * (1 << (1 - ss_y)) + spel_bottom);
169 :
170 444154000 : return clamped_mv;
171 : }
172 :
173 : DECLARE_ALIGNED(256, const InterpKernel,
174 : sub_pel_filters_8[SUBPEL_SHIFTS]) = {
175 : { 0, 0, 0, 128, 0, 0, 0, 0 },{ 0, 2, -6, 126, 8, -2, 0, 0 },
176 : { 0, 2, -10, 122, 18, -4, 0, 0 },{ 0, 2, -12, 116, 28, -8, 2, 0 },
177 : { 0, 2, -14, 110, 38, -10, 2, 0 },{ 0, 2, -14, 102, 48, -12, 2, 0 },
178 : { 0, 2, -16, 94, 58, -12, 2, 0 },{ 0, 2, -14, 84, 66, -12, 2, 0 },
179 : { 0, 2, -14, 76, 76, -14, 2, 0 },{ 0, 2, -12, 66, 84, -14, 2, 0 },
180 : { 0, 2, -12, 58, 94, -16, 2, 0 },{ 0, 2, -12, 48, 102, -14, 2, 0 },
181 : { 0, 2, -10, 38, 110, -14, 2, 0 },{ 0, 2, -8, 28, 116, -12, 2, 0 },
182 : { 0, 0, -4, 18, 122, -10, 2, 0 },{ 0, 0, -2, 8, 126, -6, 2, 0 }
183 : };
184 : DECLARE_ALIGNED(256, const InterpKernel,
185 : sub_pel_filters_4[SUBPEL_SHIFTS]) = {
186 : { 0, 0, 0, 128, 0, 0, 0, 0 },{ 0, 0, -4, 126, 8, -2, 0, 0 },
187 : { 0, 0, -8, 122, 18, -4, 0, 0 },{ 0, 0, -10, 116, 28, -6, 0, 0 },
188 : { 0, 0, -12, 110, 38, -8, 0, 0 },{ 0, 0, -12, 102, 48, -10, 0, 0 },
189 : { 0, 0, -14, 94, 58, -10, 0, 0 },{ 0, 0, -12, 84, 66, -10, 0, 0 },
190 : { 0, 0, -12, 76, 76, -12, 0, 0 },{ 0, 0, -10, 66, 84, -12, 0, 0 },
191 : { 0, 0, -10, 58, 94, -14, 0, 0 },{ 0, 0, -10, 48, 102, -12, 0, 0 },
192 : { 0, 0, -8, 38, 110, -12, 0, 0 },{ 0, 0, -6, 28, 116, -10, 0, 0 },
193 : { 0, 0, -4, 18, 122, -8, 0, 0 },{ 0, 0, -2, 8, 126, -4, 0, 0 }
194 : };
195 :
196 : #define MAX_FILTER_TAP 8
197 149484000 : int get_relative_dist_enc(SeqHeader *seq_header, int ref_hint, int order_hint)
198 : {
199 : int diff, m;
200 149484000 : if (!seq_header->order_hint_info.enable_order_hint)
201 0 : return 0;
202 149484000 : diff = ref_hint - order_hint;
203 149484000 : m = 1 << (seq_header->order_hint_info.order_hint_bits - 1);
204 149484000 : diff = (diff & (m - 1)) - (diff & m);
205 149484000 : return diff;
206 : }
207 :
208 : static const int quant_dist_weight[4][2] = {
209 : { 2, 3 }, { 2, 5 }, { 2, 7 }, { 1, MAX_FRAME_DISTANCE }
210 : };
211 : static const int quant_dist_lookup_table[2][4][2] = {
212 : { { 9, 7 }, { 11, 5 }, { 12, 4 }, { 13, 3 } },
213 : { { 7, 9 }, { 5, 11 }, { 4, 12 }, { 3, 13 } },
214 : };
215 :
216 208721000 : void av1_dist_wtd_comp_weight_assign(
217 : SeqHeader *seq_header,
218 : int cur_frame_index,
219 : int bck_frame_index,
220 : int fwd_frame_index,
221 : int compound_idx,
222 : int order_idx,
223 : int *fwd_offset, int *bck_offset,
224 : int *use_dist_wtd_comp_avg,
225 : int is_compound) {
226 :
227 208721000 : assert(fwd_offset != NULL && bck_offset != NULL);
228 208867000 : if (!is_compound || compound_idx) {
229 174338000 : *use_dist_wtd_comp_avg = 0;
230 174338000 : return;
231 : }
232 :
233 34529200 : *use_dist_wtd_comp_avg = 1;
234 :
235 34529200 : int d0 = clamp(abs(get_relative_dist_enc(seq_header,
236 : fwd_frame_index, cur_frame_index)),
237 : 0, MAX_FRAME_DISTANCE);
238 34522200 : int d1 = clamp(abs(get_relative_dist_enc(seq_header,
239 : cur_frame_index, bck_frame_index)),
240 : 0, MAX_FRAME_DISTANCE);
241 :
242 34512100 : const int order = d0 <= d1;
243 :
244 34512100 : if (d0 == 0 || d1 == 0) {
245 0 : *fwd_offset = quant_dist_lookup_table[order_idx][3][order];
246 0 : *bck_offset = quant_dist_lookup_table[order_idx][3][1 - order];
247 0 : return;
248 : }
249 :
250 : int i;
251 74198800 : for (i = 0; i < 3; ++i) {
252 66973600 : int c0 = quant_dist_weight[i][order];
253 66973600 : int c1 = quant_dist_weight[i][!order];
254 66973600 : int d0_c0 = d0 * c0;
255 66973600 : int d1_c1 = d1 * c1;
256 66973600 : if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
257 : }
258 :
259 34512100 : *fwd_offset = quant_dist_lookup_table[order_idx][i][order];
260 34512100 : *bck_offset = quant_dist_lookup_table[order_idx][i][1 - order];
261 : }
262 :
263 0 : void eb_av1_convolve_2d_sr_c(const uint8_t *src, int32_t src_stride, uint8_t *dst,
264 : int32_t dst_stride, int32_t w, int32_t h,
265 : InterpFilterParams *filter_params_x,
266 : InterpFilterParams *filter_params_y,
267 : const int32_t subpel_x_q4, const int32_t subpel_y_q4,
268 : ConvolveParams *conv_params)
269 : {
270 : int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
271 0 : int32_t im_h = h + filter_params_y->taps - 1;
272 0 : int32_t im_stride = w;
273 0 : const int32_t fo_vert = filter_params_y->taps / 2 - 1;
274 0 : const int32_t fo_horiz = filter_params_x->taps / 2 - 1;
275 0 : const int32_t bd = 8;
276 0 : const int32_t bits =
277 0 : FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
278 :
279 : // horizontal filter
280 0 : const uint8_t *src_horiz = src - fo_vert * src_stride;
281 0 : const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
282 : *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
283 0 : for (int32_t y = 0; y < im_h; ++y) {
284 0 : for (int32_t x = 0; x < w; ++x) {
285 0 : int32_t sum = (1 << (bd + FILTER_BITS - 1));
286 0 : for (int32_t k = 0; k < filter_params_x->taps; ++k)
287 0 : sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
288 0 : assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
289 0 : im_block[y * im_stride + x] =
290 0 : (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
291 : }
292 : }
293 :
294 : // vertical filter
295 0 : int16_t *src_vert = im_block + fo_vert * im_stride;
296 0 : const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
297 : *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
298 0 : const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
299 0 : for (int32_t y = 0; y < h; ++y) {
300 0 : for (int32_t x = 0; x < w; ++x) {
301 0 : int32_t sum = 1 << offset_bits;
302 0 : for (int32_t k = 0; k < filter_params_y->taps; ++k)
303 0 : sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
304 0 : assert(0 <= sum && sum < (1 << (offset_bits + 2)));
305 0 : int16_t res = (ConvBufType)(ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
306 0 : ((1 << (offset_bits - conv_params->round_1)) +
307 0 : (1 << (offset_bits - conv_params->round_1 - 1))));
308 0 : dst[y * dst_stride + x] = (uint8_t)clip_pixel_highbd(ROUND_POWER_OF_TWO(res, bits), 8);
309 : }
310 : }
311 0 : }
312 :
313 0 : void eb_av1_convolve_y_sr_c(const uint8_t *src, int32_t src_stride, uint8_t *dst,
314 : int32_t dst_stride, int32_t w, int32_t h,
315 : InterpFilterParams *filter_params_x,
316 : InterpFilterParams *filter_params_y,
317 : const int32_t subpel_x_q4, const int32_t subpel_y_q4,
318 : ConvolveParams *conv_params)
319 : {
320 0 : assert(filter_params_y != NULL);
321 0 : const int32_t fo_vert = filter_params_y->taps / 2 - 1;
322 : (void)filter_params_x;
323 : (void)subpel_x_q4;
324 : (void)conv_params;
325 :
326 0 : assert(conv_params->round_0 <= FILTER_BITS);
327 0 : assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) ||
328 : ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
329 :
330 : // vertical filter
331 0 : const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
332 : *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
333 :
334 0 : for (int32_t y = 0; y < h; ++y) {
335 0 : for (int32_t x = 0; x < w; ++x) {
336 0 : int32_t res = 0;
337 0 : for (int32_t k = 0; k < filter_params_y->taps; ++k)
338 0 : res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
339 0 : dst[y * dst_stride + x] =
340 0 : (uint8_t)clip_pixel_highbd(ROUND_POWER_OF_TWO(res, FILTER_BITS), 8);
341 : }
342 : }
343 0 : }
344 :
345 0 : void eb_av1_convolve_x_sr_c(const uint8_t *src, int32_t src_stride, uint8_t *dst,
346 : int32_t dst_stride, int32_t w, int32_t h,
347 : InterpFilterParams *filter_params_x,
348 : InterpFilterParams *filter_params_y,
349 : const int32_t subpel_x_q4, const int32_t subpel_y_q4,
350 : ConvolveParams *conv_params)
351 : {
352 0 : const int32_t fo_horiz = filter_params_x->taps / 2 - 1;
353 0 : const int32_t bits = FILTER_BITS - conv_params->round_0;
354 : (void)filter_params_y;
355 : (void)subpel_y_q4;
356 : (void)conv_params;
357 :
358 0 : assert(bits >= 0);
359 0 : assert((FILTER_BITS - conv_params->round_1) >= 0 ||
360 : ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
361 :
362 : // horizontal filter
363 0 : const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
364 : *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
365 :
366 0 : for (int32_t y = 0; y < h; ++y) {
367 0 : for (int32_t x = 0; x < w; ++x) {
368 0 : int32_t res = 0;
369 0 : for (int32_t k = 0; k < filter_params_x->taps; ++k)
370 0 : res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
371 0 : res = ROUND_POWER_OF_TWO(res, conv_params->round_0);
372 0 : dst[y * dst_stride + x] = (uint8_t)clip_pixel_highbd(ROUND_POWER_OF_TWO(res, bits), 8);
373 : }
374 : }
375 0 : }
376 :
377 0 : void eb_av1_convolve_2d_copy_sr_c(const uint8_t *src, int32_t src_stride, uint8_t *dst,
378 : int32_t dst_stride, int32_t w, int32_t h,
379 : InterpFilterParams *filter_params_x,
380 : InterpFilterParams *filter_params_y,
381 : const int32_t subpel_x_q4, const int32_t subpel_y_q4,
382 : ConvolveParams *conv_params) {
383 : (void)filter_params_x;
384 : (void)filter_params_y;
385 : (void)subpel_x_q4;
386 : (void)subpel_y_q4;
387 : (void)conv_params;
388 :
389 0 : for (int32_t y = 0; y < h; ++y) {
390 0 : for (int32_t x = 0; x < w; ++x)
391 0 : dst[y * dst_stride + x] = src[y * src_stride + x];
392 : }
393 0 : }
394 :
395 0 : void eb_av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
396 : uint8_t *dst8,
397 : int dst8_stride, int w, int h,
398 : const InterpFilterParams *filter_params_x,
399 : const InterpFilterParams *filter_params_y,
400 : const int subpel_x_qn, const int x_step_qn,
401 : const int subpel_y_qn, const int y_step_qn,
402 : ConvolveParams *conv_params)
403 : {
404 : int16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
405 0 : int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
406 0 : filter_params_y->taps;
407 0 : CONV_BUF_TYPE *dst16 = conv_params->dst;
408 0 : const int dst16_stride = conv_params->dst_stride;
409 0 : const int bits =
410 0 : FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
411 0 : assert(bits >= 0);
412 0 : int im_stride = w;
413 0 : const int fo_vert = filter_params_y->taps / 2 - 1;
414 0 : const int fo_horiz = filter_params_x->taps / 2 - 1;
415 0 : const int bd = 8;
416 :
417 : // horizontal filter
418 0 : const uint8_t *src_horiz = src - fo_vert * src_stride;
419 0 : for (int y = 0; y < im_h; ++y) {
420 0 : int x_qn = subpel_x_qn;
421 0 : for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
422 0 : const uint8_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
423 0 : const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
424 0 : assert(x_filter_idx < SUBPEL_SHIFTS);
425 : const int16_t *x_filter =
426 0 : av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
427 0 : int32_t sum = (1 << (bd + FILTER_BITS - 1));
428 0 : for (int k = 0; k < filter_params_x->taps; ++k) {
429 0 : sum += x_filter[k] * src_x[k - fo_horiz];
430 : }
431 0 : assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
432 0 : im_block[y * im_stride + x] =
433 0 : (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
434 : }
435 0 : src_horiz += src_stride;
436 : }
437 :
438 : // vertical filter
439 0 : int16_t *src_vert = im_block + fo_vert * im_stride;
440 0 : const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
441 0 : for (int x = 0; x < w; ++x) {
442 0 : int y_qn = subpel_y_qn;
443 0 : for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
444 0 : const int16_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
445 0 : const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
446 0 : assert(y_filter_idx < SUBPEL_SHIFTS);
447 : const int16_t *y_filter =
448 0 : av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
449 0 : int32_t sum = 1 << offset_bits;
450 0 : for (int k = 0; k < filter_params_y->taps; ++k) {
451 0 : sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
452 : }
453 0 : assert(0 <= sum && sum < (1 << (offset_bits + 2)));
454 0 : CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
455 0 : if (conv_params->is_compound) {
456 0 : if (conv_params->do_average) {
457 0 : int32_t tmp = dst16[y * dst16_stride + x];
458 0 : if (conv_params->use_dist_wtd_comp_avg) {
459 0 : tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
460 0 : tmp = tmp >> DIST_PRECISION_BITS;
461 : }
462 : else {
463 0 : tmp += res;
464 0 : tmp = tmp >> 1;
465 : }
466 : /* Subtract round offset and convolve round */
467 0 : tmp = tmp - ((1 << (offset_bits - conv_params->round_1)) +
468 0 : (1 << (offset_bits - conv_params->round_1 - 1)));
469 0 : dst8[y * dst8_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits));
470 : }
471 : else {
472 0 : dst16[y * dst16_stride + x] = res;
473 : }
474 : }
475 : else {
476 : /* Subtract round offset and convolve round */
477 0 : int32_t tmp = res - ((1 << (offset_bits - conv_params->round_1)) +
478 0 : (1 << (offset_bits - conv_params->round_1 - 1)));
479 0 : dst8[y * dst8_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits));
480 : }
481 : }
482 0 : src_vert++;
483 : }
484 0 : }
485 :
486 0 : void eb_av1_jnt_convolve_2d_c(const uint8_t *src, int32_t src_stride, uint8_t *dst8,
487 : int32_t dst8_stride, int32_t w, int32_t h,
488 : InterpFilterParams *filter_params_x,
489 : InterpFilterParams *filter_params_y,
490 : const int32_t subpel_x_q4, const int32_t subpel_y_q4,
491 : ConvolveParams *conv_params)
492 : {
493 0 : ConvBufType *dst = conv_params->dst;
494 0 : int32_t dst_stride = conv_params->dst_stride;
495 : int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
496 0 : int32_t im_h = h + filter_params_y->taps - 1;
497 0 : int32_t im_stride = w;
498 0 : const int32_t fo_vert = filter_params_y->taps / 2 - 1;
499 0 : const int32_t fo_horiz = filter_params_x->taps / 2 - 1;
500 0 : const int32_t bd = 8;
501 0 : const int32_t round_bits =
502 0 : 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
503 :
504 : // horizontal filter
505 0 : const uint8_t *src_horiz = src - fo_vert * src_stride;
506 0 : const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
507 : *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
508 0 : for (int32_t y = 0; y < im_h; ++y) {
509 0 : for (int32_t x = 0; x < w; ++x) {
510 0 : int32_t sum = (1 << (bd + FILTER_BITS - 1));
511 0 : for (int32_t k = 0; k < filter_params_x->taps; ++k)
512 0 : sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
513 0 : assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
514 0 : im_block[y * im_stride + x] =
515 0 : (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
516 : }
517 : }
518 :
519 : // vertical filter
520 0 : int16_t *src_vert = im_block + fo_vert * im_stride;
521 0 : const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
522 : *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
523 0 : const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
524 0 : for (int32_t y = 0; y < h; ++y) {
525 0 : for (int32_t x = 0; x < w; ++x) {
526 0 : int32_t sum = 1 << offset_bits;
527 0 : for (int32_t k = 0; k < filter_params_y->taps; ++k)
528 0 : sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
529 0 : assert(0 <= sum && sum < (1 << (offset_bits + 2)));
530 0 : ConvBufType res = (ConvBufType)ROUND_POWER_OF_TWO(sum, conv_params->round_1);
531 0 : if (conv_params->do_average) {
532 0 : int32_t tmp = dst[y * dst_stride + x];
533 0 : if (conv_params->use_jnt_comp_avg) {
534 0 : tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
535 0 : tmp = tmp >> DIST_PRECISION_BITS;
536 : }
537 : else {
538 0 : tmp += res;
539 0 : tmp = tmp >> 1;
540 : }
541 0 : tmp -= (1 << (offset_bits - conv_params->round_1)) +
542 0 : (1 << (offset_bits - conv_params->round_1 - 1));
543 0 : dst8[y * dst8_stride + x] =
544 0 : (uint8_t)clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), 8);
545 : }
546 : else
547 0 : dst[y * dst_stride + x] = res;
548 : }
549 : }
550 0 : }
551 :
552 0 : void eb_av1_jnt_convolve_y_c(const uint8_t *src, int32_t src_stride, uint8_t *dst8,
553 : int32_t dst8_stride, int32_t w, int32_t h,
554 : InterpFilterParams *filter_params_x,
555 : InterpFilterParams *filter_params_y,
556 : const int32_t subpel_x_q4, const int32_t subpel_y_q4,
557 : ConvolveParams *conv_params)
558 : {
559 0 : ConvBufType *dst = conv_params->dst;
560 0 : int32_t dst_stride = conv_params->dst_stride;
561 0 : const int32_t fo_vert = filter_params_y->taps / 2 - 1;
562 0 : const int32_t bits = FILTER_BITS - conv_params->round_0;
563 0 : const int32_t bd = 8;
564 0 : const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
565 0 : const int32_t round_offset = (1 << (offset_bits - conv_params->round_1)) +
566 0 : (1 << (offset_bits - conv_params->round_1 - 1));
567 0 : const int32_t round_bits =
568 0 : 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
569 : (void)filter_params_x;
570 : (void)subpel_x_q4;
571 :
572 : // vertical filter
573 0 : const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
574 : *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
575 0 : for (int32_t y = 0; y < h; ++y) {
576 0 : for (int32_t x = 0; x < w; ++x) {
577 0 : int32_t res = 0;
578 0 : for (int32_t k = 0; k < filter_params_y->taps; ++k)
579 0 : res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
580 0 : res *= (1 << bits);
581 0 : res = ROUND_POWER_OF_TWO(res, conv_params->round_1) + round_offset;
582 :
583 0 : if (conv_params->do_average) {
584 0 : int32_t tmp = dst[y * dst_stride + x];
585 0 : if (conv_params->use_jnt_comp_avg) {
586 0 : tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
587 0 : tmp = tmp >> DIST_PRECISION_BITS;
588 : }
589 : else {
590 0 : tmp += res;
591 0 : tmp = tmp >> 1;
592 : }
593 0 : tmp -= round_offset;
594 0 : dst8[y * dst8_stride + x] =
595 0 : (uint8_t)clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), 8);
596 : }
597 : else
598 0 : dst[y * dst_stride + x] = (ConvBufType)res;
599 : }
600 : }
601 0 : }
602 :
603 0 : void eb_av1_jnt_convolve_x_c(const uint8_t *src, int32_t src_stride, uint8_t *dst8,
604 : int32_t dst8_stride, int32_t w, int32_t h,
605 : InterpFilterParams *filter_params_x,
606 : InterpFilterParams *filter_params_y,
607 : const int32_t subpel_x_q4, const int32_t subpel_y_q4,
608 : ConvolveParams *conv_params)
609 : {
610 0 : ConvBufType *dst = conv_params->dst;
611 0 : int32_t dst_stride = conv_params->dst_stride;
612 0 : const int32_t fo_horiz = filter_params_x->taps / 2 - 1;
613 0 : const int32_t bits = FILTER_BITS - conv_params->round_1;
614 0 : const int32_t bd = 8;
615 0 : const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
616 0 : const int32_t round_offset = (1 << (offset_bits - conv_params->round_1)) +
617 0 : (1 << (offset_bits - conv_params->round_1 - 1));
618 0 : const int32_t round_bits =
619 0 : 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
620 : (void)filter_params_y;
621 : (void)subpel_y_q4;
622 :
623 : // horizontal filter
624 0 : const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
625 : *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
626 0 : for (int32_t y = 0; y < h; ++y) {
627 0 : for (int32_t x = 0; x < w; ++x) {
628 0 : int32_t res = 0;
629 0 : for (int32_t k = 0; k < filter_params_x->taps; ++k)
630 0 : res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
631 0 : res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0);
632 0 : res += round_offset;
633 :
634 0 : if (conv_params->do_average) {
635 0 : int32_t tmp = dst[y * dst_stride + x];
636 0 : if (conv_params->use_jnt_comp_avg) {
637 0 : tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
638 0 : tmp = tmp >> DIST_PRECISION_BITS;
639 : }
640 : else {
641 0 : tmp += res;
642 0 : tmp = tmp >> 1;
643 : }
644 0 : tmp -= round_offset;
645 0 : dst8[y * dst8_stride + x] =
646 0 : (uint8_t)clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), 8);
647 : }
648 : else
649 0 : dst[y * dst_stride + x] = (ConvBufType)res;
650 : }
651 : }
652 0 : }
653 :
654 0 : void eb_av1_jnt_convolve_2d_copy_c(const uint8_t *src, int32_t src_stride,
655 : uint8_t *dst8, int32_t dst8_stride, int32_t w, int32_t h,
656 : InterpFilterParams *filter_params_x,
657 : InterpFilterParams *filter_params_y,
658 : const int32_t subpel_x_q4, const int32_t subpel_y_q4,
659 : ConvolveParams *conv_params)
660 : {
661 0 : ConvBufType *dst = conv_params->dst;
662 0 : int32_t dst_stride = conv_params->dst_stride;
663 0 : const int32_t bits =
664 0 : FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
665 0 : const int32_t bd = 8;
666 0 : const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
667 0 : const int32_t round_offset = (1 << (offset_bits - conv_params->round_1)) +
668 0 : (1 << (offset_bits - conv_params->round_1 - 1));
669 : (void)filter_params_x;
670 : (void)filter_params_y;
671 : (void)subpel_x_q4;
672 : (void)subpel_y_q4;
673 :
674 0 : for (int32_t y = 0; y < h; ++y) {
675 0 : for (int32_t x = 0; x < w; ++x) {
676 0 : ConvBufType res = src[y * src_stride + x] << bits;
677 0 : res += (ConvBufType)round_offset;
678 :
679 0 : if (conv_params->do_average) {
680 0 : int32_t tmp = dst[y * dst_stride + x];
681 0 : if (conv_params->use_jnt_comp_avg) {
682 0 : tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
683 0 : tmp = tmp >> DIST_PRECISION_BITS;
684 : }
685 : else {
686 0 : tmp += res;
687 0 : tmp = tmp >> 1;
688 : }
689 0 : tmp -= round_offset;
690 0 : dst8[y * dst8_stride + x] = (uint8_t)clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), 8);
691 : }
692 : else
693 0 : dst[y * dst_stride + x] = res;
694 : }
695 : }
696 0 : }
697 :
698 0 : void eb_av1_highbd_convolve_2d_copy_sr_c(
699 : const uint16_t *src, int32_t src_stride, uint16_t *dst, int32_t dst_stride, int32_t w,
700 : int32_t h, const InterpFilterParams *filter_params_x,
701 : const InterpFilterParams *filter_params_y, const int32_t subpel_x_q4,
702 : const int32_t subpel_y_q4, ConvolveParams *conv_params, int32_t bd) {
703 : (void)filter_params_x;
704 : (void)filter_params_y;
705 : (void)subpel_x_q4;
706 : (void)subpel_y_q4;
707 : (void)conv_params;
708 : (void)bd;
709 :
710 0 : for (int32_t y = 0; y < h; ++y) {
711 0 : for (int32_t x = 0; x < w; ++x)
712 0 : dst[y * dst_stride + x] = src[y * src_stride + x];
713 : }
714 0 : }
715 :
716 0 : void eb_av1_highbd_convolve_x_sr_c(const uint16_t *src, int32_t src_stride,
717 : uint16_t *dst, int32_t dst_stride, int32_t w, int32_t h,
718 : const InterpFilterParams *filter_params_x,
719 : const InterpFilterParams *filter_params_y,
720 : const int32_t subpel_x_q4, const int32_t subpel_y_q4,
721 : ConvolveParams *conv_params, int32_t bd) {
722 0 : const int32_t fo_horiz = filter_params_x->taps / 2 - 1;
723 0 : const int32_t bits = FILTER_BITS - conv_params->round_0;
724 : (void)filter_params_y;
725 : (void)subpel_y_q4;
726 :
727 0 : assert(bits >= 0);
728 0 : assert((FILTER_BITS - conv_params->round_1) >= 0 ||
729 : ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
730 :
731 : // horizontal filter
732 0 : const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
733 : *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
734 0 : for (int32_t y = 0; y < h; ++y) {
735 0 : for (int32_t x = 0; x < w; ++x) {
736 0 : int32_t res = 0;
737 0 : for (int32_t k = 0; k < filter_params_x->taps; ++k)
738 0 : res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
739 0 : res = ROUND_POWER_OF_TWO(res, conv_params->round_0);
740 0 : dst[y * dst_stride + x] =
741 0 : clip_pixel_highbd(ROUND_POWER_OF_TWO(res, bits), bd);
742 : }
743 : }
744 0 : }
745 :
746 0 : void eb_av1_highbd_convolve_y_sr_c(const uint16_t *src, int32_t src_stride,
747 : uint16_t *dst, int32_t dst_stride, int32_t w, int32_t h,
748 : const InterpFilterParams *filter_params_x,
749 : const InterpFilterParams *filter_params_y,
750 : const int32_t subpel_x_q4, const int32_t subpel_y_q4,
751 : ConvolveParams *conv_params, int32_t bd) {
752 0 : assert(filter_params_y != NULL);
753 0 : const int32_t fo_vert = filter_params_y->taps / 2 - 1;
754 : (void)filter_params_x;
755 : (void)subpel_x_q4;
756 : (void)conv_params;
757 :
758 0 : assert(conv_params->round_0 <= FILTER_BITS);
759 0 : assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) ||
760 : ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
761 : // vertical filter
762 0 : const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
763 : *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
764 0 : for (int32_t y = 0; y < h; ++y) {
765 0 : for (int32_t x = 0; x < w; ++x) {
766 0 : int32_t res = 0;
767 0 : for (int32_t k = 0; k < filter_params_y->taps; ++k)
768 0 : res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
769 0 : dst[y * dst_stride + x] =
770 0 : clip_pixel_highbd(ROUND_POWER_OF_TWO(res, FILTER_BITS), bd);
771 : }
772 : }
773 0 : }
774 :
775 0 : void eb_av1_highbd_convolve_2d_sr_c(const uint16_t *src, int32_t src_stride,
776 : uint16_t *dst, int32_t dst_stride, int32_t w, int32_t h,
777 : const InterpFilterParams *filter_params_x,
778 : const InterpFilterParams *filter_params_y,
779 : const int32_t subpel_x_q4, const int32_t subpel_y_q4,
780 : ConvolveParams *conv_params, int32_t bd) {
781 : int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
782 0 : int32_t im_h = h + filter_params_y->taps - 1;
783 0 : int32_t im_stride = w;
784 0 : const int32_t fo_vert = filter_params_y->taps / 2 - 1;
785 0 : const int32_t fo_horiz = filter_params_x->taps / 2 - 1;
786 0 : const int32_t bits =
787 0 : FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
788 0 : assert(bits >= 0);
789 :
790 : // horizontal filter
791 0 : const uint16_t *src_horiz = src - fo_vert * src_stride;
792 0 : const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
793 : *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
794 0 : for (int32_t y = 0; y < im_h; ++y) {
795 0 : for (int32_t x = 0; x < w; ++x) {
796 0 : int32_t sum = (1 << (bd + FILTER_BITS - 1));
797 0 : for (int32_t k = 0; k < filter_params_x->taps; ++k)
798 0 : sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
799 0 : assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
800 0 : im_block[y * im_stride + x] = (ConvBufType)
801 0 : ROUND_POWER_OF_TWO(sum, conv_params->round_0);
802 : }
803 : }
804 :
805 : // vertical filter
806 0 : int16_t *src_vert = im_block + fo_vert * im_stride;
807 0 : const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
808 : *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
809 0 : const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
810 0 : for (int32_t y = 0; y < h; ++y) {
811 0 : for (int32_t x = 0; x < w; ++x) {
812 0 : int32_t sum = 1 << offset_bits;
813 0 : for (int32_t k = 0; k < filter_params_y->taps; ++k)
814 0 : sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
815 0 : assert(0 <= sum && sum < (1 << (offset_bits + 2)));
816 0 : int32_t res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
817 0 : ((1 << (offset_bits - conv_params->round_1)) +
818 0 : (1 << (offset_bits - conv_params->round_1 - 1)));
819 0 : dst[y * dst_stride + x] =
820 0 : clip_pixel_highbd(ROUND_POWER_OF_TWO(res, bits), bd);
821 : }
822 : }
823 0 : }
824 :
825 0 : void eb_av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
826 : uint16_t *dst, int dst_stride, int w, int h,
827 : const InterpFilterParams *filter_params_x,
828 : const InterpFilterParams *filter_params_y,
829 : const int subpel_x_qn, const int x_step_qn,
830 : const int subpel_y_qn, const int y_step_qn,
831 : ConvolveParams *conv_params, int bd)
832 : {
833 : int16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
834 0 : int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
835 0 : filter_params_y->taps;
836 0 : int im_stride = w;
837 0 : const int fo_vert = filter_params_y->taps / 2 - 1;
838 0 : const int fo_horiz = filter_params_x->taps / 2 - 1;
839 0 : CONV_BUF_TYPE *dst16 = conv_params->dst;
840 0 : const int dst16_stride = conv_params->dst_stride;
841 0 : const int bits =
842 0 : FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
843 0 : assert(bits >= 0);
844 : // horizontal filter
845 0 : const uint16_t *src_horiz = src - fo_vert * src_stride;
846 0 : for (int y = 0; y < im_h; ++y) {
847 0 : int x_qn = subpel_x_qn;
848 0 : for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
849 0 : const uint16_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
850 0 : const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
851 0 : assert(x_filter_idx < SUBPEL_SHIFTS);
852 : const int16_t *x_filter =
853 0 : av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
854 0 : int32_t sum = (1 << (bd + FILTER_BITS - 1));
855 0 : for (int k = 0; k < filter_params_x->taps; ++k) {
856 0 : sum += x_filter[k] * src_x[k - fo_horiz];
857 : }
858 0 : assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
859 0 : im_block[y * im_stride + x] =
860 0 : (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
861 : }
862 0 : src_horiz += src_stride;
863 : }
864 :
865 : // vertical filter
866 0 : int16_t *src_vert = im_block + fo_vert * im_stride;
867 0 : const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
868 0 : for (int x = 0; x < w; ++x) {
869 0 : int y_qn = subpel_y_qn;
870 0 : for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
871 0 : const int16_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
872 0 : const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
873 0 : assert(y_filter_idx < SUBPEL_SHIFTS);
874 : const int16_t *y_filter =
875 0 : av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
876 0 : int32_t sum = 1 << offset_bits;
877 0 : for (int k = 0; k < filter_params_y->taps; ++k) {
878 0 : sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
879 : }
880 0 : assert(0 <= sum && sum < (1 << (offset_bits + 2)));
881 0 : CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
882 0 : if (conv_params->is_compound) {
883 0 : if (conv_params->do_average) {
884 0 : int32_t tmp = dst16[y * dst16_stride + x];
885 0 : if (conv_params->use_dist_wtd_comp_avg) {
886 0 : tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
887 0 : tmp = tmp >> DIST_PRECISION_BITS;
888 : }
889 : else {
890 0 : tmp += res;
891 0 : tmp = tmp >> 1;
892 : }
893 : /* Subtract round offset and convolve round */
894 0 : tmp = tmp - ((1 << (offset_bits - conv_params->round_1)) +
895 0 : (1 << (offset_bits - conv_params->round_1 - 1)));
896 0 : dst[y * dst_stride + x] =
897 0 : clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd);
898 : }
899 : else {
900 0 : dst16[y * dst16_stride + x] = res;
901 : }
902 : }
903 : else {
904 : /* Subtract round offset and convolve round */
905 0 : int32_t tmp = res - ((1 << (offset_bits - conv_params->round_1)) +
906 0 : (1 << (offset_bits - conv_params->round_1 - 1)));
907 0 : dst[y * dst_stride + x] =
908 0 : clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd);
909 : }
910 : }
911 0 : src_vert++;
912 : }
913 0 : }
914 :
915 :
916 0 : void eb_av1_highbd_jnt_convolve_x_c(const uint16_t *src, int32_t src_stride,
917 : uint16_t *dst16, int32_t dst16_stride, int32_t w,
918 : int32_t h, const InterpFilterParams *filter_params_x,
919 : const InterpFilterParams *filter_params_y,
920 : const int32_t subpel_x_q4, const int32_t subpel_y_q4,
921 : ConvolveParams *conv_params, int32_t bd) {
922 0 : ConvBufType *dst = conv_params->dst;
923 0 : int32_t dst_stride = conv_params->dst_stride;
924 0 : const int32_t fo_horiz = filter_params_x->taps / 2 - 1;
925 0 : const int32_t bits = FILTER_BITS - conv_params->round_1;
926 0 : const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
927 0 : const int32_t round_offset = (1 << (offset_bits - conv_params->round_1)) +
928 0 : (1 << (offset_bits - conv_params->round_1 - 1));
929 0 : const int32_t round_bits =
930 0 : 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
931 0 : assert(round_bits >= 0);
932 : (void)filter_params_y;
933 : (void)subpel_y_q4;
934 0 : assert(bits >= 0);
935 : // horizontal filter
936 0 : const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
937 : *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
938 0 : for (int32_t y = 0; y < h; ++y) {
939 0 : for (int32_t x = 0; x < w; ++x) {
940 0 : int32_t res = 0;
941 0 : for (int32_t k = 0; k < filter_params_x->taps; ++k)
942 0 : res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
943 0 : res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0);
944 0 : res += round_offset;
945 :
946 0 : if (conv_params->do_average) {
947 0 : int32_t tmp = dst[y * dst_stride + x];
948 0 : if (conv_params->use_jnt_comp_avg) {
949 0 : tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
950 0 : tmp = tmp >> DIST_PRECISION_BITS;
951 : }
952 : else {
953 0 : tmp += res;
954 0 : tmp = tmp >> 1;
955 : }
956 0 : tmp -= round_offset;
957 0 : dst16[y * dst16_stride + x] =
958 0 : clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), bd);
959 : }
960 : else
961 0 : dst[y * dst_stride + x] = (ConvBufType)res;
962 : }
963 : }
964 0 : }
965 :
966 0 : void eb_av1_highbd_jnt_convolve_y_c(const uint16_t *src, int32_t src_stride,
967 : uint16_t *dst16, int32_t dst16_stride, int32_t w,
968 : int32_t h, const InterpFilterParams *filter_params_x,
969 : const InterpFilterParams *filter_params_y,
970 : const int32_t subpel_x_q4, const int32_t subpel_y_q4,
971 : ConvolveParams *conv_params, int32_t bd) {
972 0 : ConvBufType *dst = conv_params->dst;
973 0 : int32_t dst_stride = conv_params->dst_stride;
974 0 : const int32_t fo_vert = filter_params_y->taps / 2 - 1;
975 0 : const int32_t bits = FILTER_BITS - conv_params->round_0;
976 0 : const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
977 0 : const int32_t round_offset = (1 << (offset_bits - conv_params->round_1)) +
978 0 : (1 << (offset_bits - conv_params->round_1 - 1));
979 0 : const int32_t round_bits =
980 0 : 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
981 0 : assert(round_bits >= 0);
982 : (void)filter_params_x;
983 : (void)subpel_x_q4;
984 0 : assert(bits >= 0);
985 : // vertical filter
986 0 : const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
987 : *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
988 0 : for (int32_t y = 0; y < h; ++y) {
989 0 : for (int32_t x = 0; x < w; ++x) {
990 0 : int32_t res = 0;
991 0 : for (int32_t k = 0; k < filter_params_y->taps; ++k)
992 0 : res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
993 0 : res *= (1 << bits);
994 0 : res = ROUND_POWER_OF_TWO(res, conv_params->round_1) + round_offset;
995 :
996 0 : if (conv_params->do_average) {
997 0 : int32_t tmp = dst[y * dst_stride + x];
998 0 : if (conv_params->use_jnt_comp_avg) {
999 0 : tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
1000 0 : tmp = tmp >> DIST_PRECISION_BITS;
1001 : }
1002 : else {
1003 0 : tmp += res;
1004 0 : tmp = tmp >> 1;
1005 : }
1006 0 : tmp -= round_offset;
1007 0 : dst16[y * dst16_stride + x] =
1008 0 : clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), bd);
1009 : }
1010 : else
1011 0 : dst[y * dst_stride + x] = (ConvBufType)res;
1012 : }
1013 : }
1014 0 : }
1015 :
1016 0 : void eb_av1_highbd_jnt_convolve_2d_copy_c(
1017 : const uint16_t *src, int32_t src_stride, uint16_t *dst16, int32_t dst16_stride,
1018 : int32_t w, int32_t h, const InterpFilterParams *filter_params_x,
1019 : const InterpFilterParams *filter_params_y, const int32_t subpel_x_q4,
1020 : const int32_t subpel_y_q4, ConvolveParams *conv_params, int32_t bd) {
1021 0 : ConvBufType *dst = conv_params->dst;
1022 0 : int32_t dst_stride = conv_params->dst_stride;
1023 0 : const int32_t bits =
1024 0 : FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
1025 0 : const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
1026 0 : const int32_t round_offset = (1 << (offset_bits - conv_params->round_1)) +
1027 0 : (1 << (offset_bits - conv_params->round_1 - 1));
1028 0 : assert(bits >= 0);
1029 : (void)filter_params_x;
1030 : (void)filter_params_y;
1031 : (void)subpel_x_q4;
1032 : (void)subpel_y_q4;
1033 :
1034 0 : for (int32_t y = 0; y < h; ++y) {
1035 0 : for (int32_t x = 0; x < w; ++x) {
1036 0 : ConvBufType res = src[y * src_stride + x] << bits;
1037 0 : res += (ConvBufType)round_offset;
1038 0 : if (conv_params->do_average) {
1039 0 : int32_t tmp = dst[y * dst_stride + x];
1040 0 : if (conv_params->use_jnt_comp_avg) {
1041 0 : tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
1042 0 : tmp = tmp >> DIST_PRECISION_BITS;
1043 : }
1044 : else {
1045 0 : tmp += res;
1046 0 : tmp = tmp >> 1;
1047 : }
1048 0 : tmp -= round_offset;
1049 0 : dst16[y * dst16_stride + x] =
1050 0 : clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd);
1051 : }
1052 : else
1053 0 : dst[y * dst_stride + x] = res;
1054 : }
1055 : }
1056 0 : }
1057 :
1058 0 : void eb_av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int32_t src_stride,
1059 : uint16_t *dst16, int32_t dst16_stride, int32_t w,
1060 : int32_t h, const InterpFilterParams *filter_params_x,
1061 : const InterpFilterParams *filter_params_y,
1062 : const int32_t subpel_x_q4, const int32_t subpel_y_q4,
1063 : ConvolveParams *conv_params, int32_t bd)
1064 :
1065 : {
1066 : int32_t x, y, k;
1067 : int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
1068 0 : ConvBufType *dst = conv_params->dst;
1069 0 : int32_t dst_stride = conv_params->dst_stride;
1070 0 : int32_t im_h = h + filter_params_y->taps - 1;
1071 0 : int32_t im_stride = w;
1072 0 : const int32_t fo_vert = filter_params_y->taps / 2 - 1;
1073 0 : const int32_t fo_horiz = filter_params_x->taps / 2 - 1;
1074 :
1075 0 : const int32_t round_bits =
1076 0 : 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
1077 0 : assert(round_bits >= 0);
1078 :
1079 : // horizontal filter
1080 0 : const uint16_t *src_horiz = src - fo_vert * src_stride;
1081 0 : const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
1082 : *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
1083 0 : for (y = 0; y < im_h; ++y) {
1084 0 : for (x = 0; x < w; ++x) {
1085 0 : int32_t sum = (1 << (bd + FILTER_BITS - 1));
1086 0 : for (k = 0; k < filter_params_x->taps; ++k)
1087 0 : sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
1088 0 : assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
1089 : (void)bd;
1090 0 : im_block[y * im_stride + x] =
1091 0 : (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
1092 : }
1093 : }
1094 :
1095 : // vertical filter
1096 0 : int16_t *src_vert = im_block + fo_vert * im_stride;
1097 0 : const int32_t offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
1098 0 : const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
1099 : *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
1100 0 : for (y = 0; y < h; ++y) {
1101 0 : for (x = 0; x < w; ++x) {
1102 0 : int32_t sum = 1 << offset_bits;
1103 0 : for (k = 0; k < filter_params_y->taps; ++k)
1104 0 : sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
1105 0 : assert(0 <= sum && sum < (1 << (offset_bits + 2)));
1106 0 : ConvBufType res = (ConvBufType)ROUND_POWER_OF_TWO(sum, conv_params->round_1);
1107 0 : if (conv_params->do_average) {
1108 0 : int32_t tmp = dst[y * dst_stride + x];
1109 0 : if (conv_params->use_jnt_comp_avg) {
1110 0 : tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
1111 0 : tmp = tmp >> DIST_PRECISION_BITS;
1112 : }
1113 : else {
1114 0 : tmp += res;
1115 0 : tmp = tmp >> 1;
1116 : }
1117 0 : tmp -= (1 << (offset_bits - conv_params->round_1)) +
1118 0 : (1 << (offset_bits - conv_params->round_1 - 1));
1119 0 : dst16[y * dst16_stride + x] =
1120 0 : clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), bd);
1121 : }
1122 : else
1123 0 : dst[y * dst_stride + x] = res;
1124 : }
1125 : }
1126 0 : }
1127 :
1128 : aom_highbd_convolve_fn_t convolveHbd[/*subX*/2][/*subY*/2][/*bi*/2];
1129 3 : void asmSetConvolveHbdAsmTable(void)
1130 : {
1131 3 : convolveHbd[0][0][0] = eb_av1_highbd_convolve_2d_copy_sr;
1132 3 : convolveHbd[0][0][1] = eb_av1_highbd_jnt_convolve_2d_copy;
1133 :
1134 3 : convolveHbd[0][1][0] = eb_av1_highbd_convolve_y_sr;
1135 3 : convolveHbd[0][1][1] = eb_av1_highbd_jnt_convolve_y;
1136 :
1137 3 : convolveHbd[1][0][0] = eb_av1_highbd_convolve_x_sr;
1138 3 : convolveHbd[1][0][1] = eb_av1_highbd_jnt_convolve_x;
1139 :
1140 3 : convolveHbd[1][1][0] = eb_av1_highbd_convolve_2d_sr;
1141 3 : convolveHbd[1][1][1] = eb_av1_highbd_jnt_convolve_2d;
1142 3 : }
1143 :
1144 : aom_convolve_fn_t convolve[/*subX*/2][/*subY*/2][/*bi*/2];
1145 3 : void asmSetConvolveAsmTable(void)
1146 : {
1147 3 : convolve[0][0][0] = eb_av1_convolve_2d_copy_sr;
1148 3 : convolve[0][0][1] = eb_av1_jnt_convolve_2d_copy;
1149 :
1150 3 : convolve[0][1][0] = eb_av1_convolve_y_sr;
1151 3 : convolve[0][1][1] = eb_av1_jnt_convolve_y;
1152 :
1153 3 : convolve[1][0][0] = eb_av1_convolve_x_sr;
1154 3 : convolve[1][0][1] = eb_av1_jnt_convolve_x;
1155 :
1156 3 : convolve[1][1][0] = eb_av1_convolve_2d_sr;
1157 3 : convolve[1][1][1] = eb_av1_jnt_convolve_2d;
1158 3 : }
1159 :
1160 : InterpFilterParams av1RegularFilter = { (const int16_t *)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS, EIGHTTAP_REGULAR };
1161 : InterpFilterParams av1RegularFilterW4 = { (const int16_t *)sub_pel_filters_4, SUBPEL_TAPS, SUBPEL_SHIFTS, EIGHTTAP_REGULAR };
1162 :
1163 : DECLARE_ALIGNED(256, const InterpKernel,
1164 : sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
1165 : { 0, 0, 0, 128, 0, 0, 0, 0 }, { -2, 2, -6, 126, 8, -2, 2, 0 },
1166 : { -2, 6, -12, 124, 16, -6, 4, -2 }, { -2, 8, -18, 120, 26, -10, 6, -2 },
1167 : { -4, 10, -22, 116, 38, -14, 6, -2 }, { -4, 10, -22, 108, 48, -18, 8, -2 },
1168 : { -4, 10, -24, 100, 60, -20, 8, -2 }, { -4, 10, -24, 90, 70, -22, 10, -2 },
1169 : { -4, 12, -24, 80, 80, -24, 12, -4 }, { -2, 10, -22, 70, 90, -24, 10, -4 },
1170 : { -2, 8, -20, 60, 100, -24, 10, -4 }, { -2, 8, -18, 48, 108, -22, 10, -4 },
1171 : { -2, 6, -14, 38, 116, -22, 10, -4 }, { -2, 6, -10, 26, 120, -18, 8, -2 },
1172 : { -2, 4, -6, 16, 124, -12, 6, -2 }, { 0, 2, -2, 8, 126, -6, 2, -2 }
1173 : };
1174 :
1175 : DECLARE_ALIGNED(256, const InterpKernel,
1176 : sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
1177 : { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, 28, 62, 34, 2, 0, 0 },
1178 : { 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 },
1179 : { 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 },
1180 : { 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, -2, 16, 54, 48, 12, 0, 0 },
1181 : { 0, -2, 14, 52, 52, 14, -2, 0 }, { 0, 0, 12, 48, 54, 16, -2, 0 },
1182 : { 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 },
1183 : { 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
1184 : { 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 28, 2, 0 }
1185 : };
1186 : DECLARE_ALIGNED(256, const InterpKernel,
1187 : bilinear_filters[SUBPEL_SHIFTS]) = {
1188 : { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 0, 120, 8, 0, 0, 0 },
1189 : { 0, 0, 0, 112, 16, 0, 0, 0 }, { 0, 0, 0, 104, 24, 0, 0, 0 },
1190 : { 0, 0, 0, 96, 32, 0, 0, 0 }, { 0, 0, 0, 88, 40, 0, 0, 0 },
1191 : { 0, 0, 0, 80, 48, 0, 0, 0 }, { 0, 0, 0, 72, 56, 0, 0, 0 },
1192 : { 0, 0, 0, 64, 64, 0, 0, 0 }, { 0, 0, 0, 56, 72, 0, 0, 0 },
1193 : { 0, 0, 0, 48, 80, 0, 0, 0 }, { 0, 0, 0, 40, 88, 0, 0, 0 },
1194 : { 0, 0, 0, 32, 96, 0, 0, 0 }, { 0, 0, 0, 24, 104, 0, 0, 0 },
1195 : { 0, 0, 0, 16, 112, 0, 0, 0 }, { 0, 0, 0, 8, 120, 0, 0, 0 }
1196 : };
1197 : DECLARE_ALIGNED(256, const InterpKernel,
1198 : sub_pel_filters_4smooth[SUBPEL_SHIFTS]) = {
1199 : { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 30, 62, 34, 2, 0, 0 },
1200 : { 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 },
1201 : { 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 },
1202 : { 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, 0, 14, 54, 48, 12, 0, 0 },
1203 : { 0, 0, 12, 52, 52, 12, 0, 0 }, { 0, 0, 12, 48, 54, 14, 0, 0 },
1204 : { 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 },
1205 : { 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
1206 : { 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 30, 0, 0 }
1207 : };
1208 : static const InterpFilterParams
1209 : av1_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
1210 : { (const int16_t *)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS,
1211 : EIGHTTAP_REGULAR },
1212 : { (const int16_t *)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS,
1213 : EIGHTTAP_SMOOTH },
1214 : { (const int16_t *)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS,
1215 : MULTITAP_SHARP },
1216 : { (const int16_t *)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS,
1217 : BILINEAR }
1218 : };
1219 : static const InterpFilterParams av1_interp_4tap[2] = {
1220 : { (const int16_t *)sub_pel_filters_4, SUBPEL_TAPS, SUBPEL_SHIFTS,
1221 : EIGHTTAP_REGULAR },
1222 : { (const int16_t *)sub_pel_filters_4smooth, SUBPEL_TAPS, SUBPEL_SHIFTS,
1223 : EIGHTTAP_SMOOTH },
1224 : };
1225 879893000 : InterpFilterParams av1_get_interp_filter_params_with_block_size(
1226 : const InterpFilter interp_filter, const int32_t w) {
1227 879893000 : if (w <= 4 &&
1228 47716100 : (interp_filter == MULTITAP_SHARP || interp_filter == EIGHTTAP_REGULAR))
1229 42334000 : return av1_interp_4tap[0];
1230 837559000 : else if (w <= 4 && interp_filter == EIGHTTAP_SMOOTH)
1231 80053 : return av1_interp_4tap[1];
1232 :
1233 837479000 : return av1_interp_filter_params_list[interp_filter];
1234 : }
1235 :
1236 444419000 : void av1_get_convolve_filter_params( uint32_t interp_filters,
1237 : InterpFilterParams *params_x, InterpFilterParams *params_y,
1238 : int32_t w, int32_t h)
1239 : {
1240 444419000 : InterpFilter filter_x = av1_extract_interp_filter(interp_filters, 1);
1241 442871000 : InterpFilter filter_y = av1_extract_interp_filter(interp_filters, 0);
1242 441514000 : *params_x = av1_get_interp_filter_params_with_block_size(filter_x, w);
1243 442093000 : *params_y = av1_get_interp_filter_params_with_block_size(filter_y, h);
1244 442438000 : }
1245 :
1246 : int32_t is_inter_block(const BlockModeInfo *mbmi);
1247 : BlockSize scale_chroma_bsize(BlockSize bsize, int32_t subsampling_x,
1248 : int32_t subsampling_y);
1249 :
1250 : // A special 2-tap bilinear filter for IntraBC chroma. IntraBC uses full pixel
1251 : // MV for luma. If sub-sampling exists, chroma may possibly use half-pel MV.
1252 : DECLARE_ALIGNED(256, static const int16_t, av1_intrabc_bilinear_filter[2]) = {
1253 : 64,
1254 : 64,
1255 : };
1256 :
1257 : static const InterpFilterParams av1_intrabc_filter_params = {
1258 : av1_intrabc_bilinear_filter, 2, 0, BILINEAR
1259 : };
1260 0 : static void convolve_2d_for_intrabc(const uint8_t *src, int src_stride,
1261 : uint8_t *dst, int dst_stride, int w, int h,
1262 : int subpel_x_q4, int subpel_y_q4,
1263 : ConvolveParams *conv_params)
1264 : {
1265 0 : const InterpFilterParams *filter_params_x =
1266 0 : subpel_x_q4 ? &av1_intrabc_filter_params : NULL;
1267 0 : const InterpFilterParams *filter_params_y =
1268 0 : subpel_y_q4 ? &av1_intrabc_filter_params : NULL;
1269 0 : if (subpel_x_q4 != 0 && subpel_y_q4 != 0) {
1270 0 : eb_av1_convolve_2d_sr_c(src, src_stride, dst, dst_stride, w, h,
1271 : (InterpFilterParams *)filter_params_x, (InterpFilterParams *)filter_params_y, 0, 0, conv_params);
1272 : }
1273 0 : else if (subpel_x_q4 != 0) {
1274 0 : eb_av1_convolve_x_sr_c(src, src_stride, dst, dst_stride, w, h, (InterpFilterParams *)filter_params_x,
1275 : (InterpFilterParams *)filter_params_y, 0, 0, conv_params);
1276 : }
1277 : else {
1278 0 : eb_av1_convolve_y_sr_c(src, src_stride, dst, dst_stride, w, h, (InterpFilterParams *)filter_params_x,
1279 : (InterpFilterParams *)filter_params_y, 0, 0, conv_params);
1280 : }
1281 0 : }
1282 0 : static void highbd_convolve_2d_for_intrabc(const uint16_t *src, int src_stride,
1283 : uint16_t *dst, int dst_stride, int w,
1284 : int h, int subpel_x_q4,
1285 : int subpel_y_q4,
1286 : ConvolveParams *conv_params,
1287 : int bd) {
1288 0 : const InterpFilterParams *filter_params_x =
1289 0 : subpel_x_q4 ? &av1_intrabc_filter_params : NULL;
1290 0 : const InterpFilterParams *filter_params_y =
1291 0 : subpel_y_q4 ? &av1_intrabc_filter_params : NULL;
1292 0 : if (subpel_x_q4 != 0 && subpel_y_q4 != 0) {
1293 0 : eb_av1_highbd_convolve_2d_sr_c(src, src_stride, dst, dst_stride, w, h,
1294 : filter_params_x, filter_params_y, 0, 0,
1295 : conv_params, bd);
1296 : }
1297 0 : else if (subpel_x_q4 != 0) {
1298 0 : eb_av1_highbd_convolve_x_sr_c(src, src_stride, dst, dst_stride, w, h,
1299 : filter_params_x, filter_params_y, 0, 0,
1300 : conv_params, bd);
1301 : }
1302 : else {
1303 0 : eb_av1_highbd_convolve_y_sr_c(src, src_stride, dst, dst_stride, w, h,
1304 : filter_params_x, filter_params_y, 0, 0,
1305 : conv_params, bd);
1306 : }
1307 0 : }
1308 :
1309 39670 : void svt_inter_predictor(const uint8_t *src, int32_t src_stride,
1310 : uint8_t *dst, int32_t dst_stride, const SubpelParams *subpel_params,
1311 : const ScaleFactors *sf, int32_t w, int32_t h, ConvolveParams *conv_params,
1312 : InterpFilters interp_filters, int32_t is_intrabc)
1313 : {
1314 : InterpFilterParams filter_params_x, filter_params_y;
1315 39670 : const int32_t is_scaled = has_scale(subpel_params->xs, subpel_params->ys);
1316 :
1317 39670 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
1318 : &filter_params_y, w, h);
1319 :
1320 39670 : assert(conv_params->do_average == 0 || conv_params->do_average == 1);
1321 39670 : assert(sf);
1322 : UNUSED(sf);
1323 39670 : assert(IMPLIES(is_intrabc, !is_scaled));
1324 :
1325 39670 : if (is_scaled) {
1326 0 : if (is_intrabc && (subpel_params->subpel_x != 0 ||
1327 0 : subpel_params->subpel_y != 0))
1328 : {
1329 0 : convolve_2d_for_intrabc(src, src_stride, dst, dst_stride, w, h,
1330 : subpel_params->subpel_x, subpel_params->subpel_y, conv_params);
1331 0 : return;
1332 : }
1333 0 : if (conv_params->is_compound) {
1334 0 : assert(conv_params->dst != NULL);
1335 : }
1336 0 : eb_av1_convolve_2d_scale(src, src_stride, dst, dst_stride, w, h,
1337 : &filter_params_x, &filter_params_y, subpel_params->subpel_x,
1338 : subpel_params->xs, subpel_params->subpel_y,
1339 : subpel_params->ys, conv_params);
1340 : }
1341 : else {
1342 39670 : SubpelParams sp = *subpel_params;
1343 39670 : revert_scale_extra_bits(&sp);
1344 :
1345 39670 : if (is_intrabc && (sp.subpel_x != 0 || sp.subpel_y != 0)) {
1346 0 : convolve_2d_for_intrabc(src, src_stride, dst, dst_stride, w, h,
1347 : sp.subpel_x, sp.subpel_y, conv_params);
1348 0 : return;
1349 : }
1350 :
1351 39670 : convolve[sp.subpel_x != 0][sp.subpel_y != 0][conv_params->is_compound](
1352 : src, src_stride, dst, dst_stride, w, h, &filter_params_x,
1353 : &filter_params_y, sp.subpel_x, sp.subpel_y, conv_params);
1354 : }
1355 : }
1356 :
1357 0 : void svt_highbd_inter_predictor(const uint16_t *src, int32_t src_stride,
1358 : uint16_t *dst, int32_t dst_stride, const SubpelParams *subpel_params,
1359 : const ScaleFactors *sf, int32_t w, int32_t h, ConvolveParams *conv_params,
1360 : InterpFilters interp_filters, int32_t is_intrabc, int32_t bd)
1361 : {
1362 :
1363 : InterpFilterParams filter_params_x, filter_params_y;
1364 0 : const int32_t is_scaled = has_scale(subpel_params->xs, subpel_params->ys);
1365 :
1366 0 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
1367 : &filter_params_y, w, h);
1368 :
1369 0 : assert(conv_params->do_average == 0 || conv_params->do_average == 1);
1370 0 : assert(sf);
1371 : UNUSED(sf);
1372 0 : assert(IMPLIES(is_intrabc, !is_scaled));
1373 :
1374 0 : if (is_scaled) {
1375 0 : if (is_intrabc && (subpel_params->subpel_x != 0 ||
1376 0 : subpel_params->subpel_y != 0))
1377 : {
1378 0 : highbd_convolve_2d_for_intrabc(src, src_stride, dst, dst_stride,
1379 : w, h, subpel_params->subpel_x, subpel_params->subpel_y,
1380 : conv_params, bd);
1381 0 : return;
1382 : }
1383 0 : if (conv_params->is_compound) {
1384 0 : assert(conv_params->dst != NULL);
1385 : }
1386 0 : eb_av1_highbd_convolve_2d_scale(src, src_stride, dst, dst_stride, w, h,
1387 : &filter_params_x, &filter_params_y, subpel_params->subpel_x,
1388 : subpel_params->xs, subpel_params->subpel_y,
1389 : subpel_params->ys, conv_params, bd);
1390 : }
1391 : else {
1392 0 : SubpelParams sp = *subpel_params;
1393 0 : revert_scale_extra_bits(&sp);
1394 :
1395 0 : if (is_intrabc && (sp.subpel_x != 0 || sp.subpel_y != 0)) {
1396 0 : highbd_convolve_2d_for_intrabc(src, src_stride, dst, dst_stride, w, h, sp.subpel_x,
1397 : sp.subpel_y, conv_params, bd);
1398 0 : return;
1399 : }
1400 :
1401 0 : convolveHbd[sp.subpel_x != 0][sp.subpel_y != 0][conv_params->is_compound](
1402 : src, src_stride, dst, dst_stride, w, h, &filter_params_x,
1403 : &filter_params_y, sp.subpel_x, sp.subpel_y, conv_params, bd);
1404 : }
1405 : }
1406 : #define USE_PRECOMPUTED_WEDGE_SIGN 1
1407 : #define USE_PRECOMPUTED_WEDGE_MASK 1
1408 :
1409 : #if USE_PRECOMPUTED_WEDGE_MASK
1410 : static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
1411 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1412 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 6, 18,
1413 : 37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
1414 : 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
1415 : };
1416 : static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
1417 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1418 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 11, 27,
1419 : 46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
1420 : 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
1421 : };
1422 : static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
1423 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1424 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7, 21,
1425 : 43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
1426 : 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
1427 : };
1428 :
1429 :
1430 864 : void aom_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
1431 : ptrdiff_t dst_stride, const int16_t *filter_x,
1432 : int filter_x_stride, const int16_t *filter_y,
1433 : int filter_y_stride, int w, int h) {
1434 : int r;
1435 :
1436 : (void)filter_x;
1437 : (void)filter_x_stride;
1438 : (void)filter_y;
1439 : (void)filter_y_stride;
1440 :
1441 16992 : for (r = h; r > 0; --r) {
1442 16128 : memcpy(dst, src, w);
1443 16128 : src += src_stride;
1444 16128 : dst += dst_stride;
1445 : }
1446 864 : }
1447 :
1448 192 : static void shift_copy(const uint8_t *src, uint8_t *dst, int shift, int width) {
1449 192 : if (shift >= 0) {
1450 99 : memcpy(dst + shift, src, width - shift);
1451 99 : memset(dst, src[0], shift);
1452 : }
1453 : else {
1454 93 : shift = -shift;
1455 93 : memcpy(dst, src + shift, width - shift);
1456 93 : memset(dst + width - shift, src[width - 1], shift);
1457 : }
1458 192 : }
1459 : #endif // USE_PRECOMPUTED_WEDGE_MASK
1460 :
1461 :
1462 : // [negative][direction]
1463 : DECLARE_ALIGNED(
1464 : 16, static uint8_t,
1465 : wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
1466 :
1467 : // 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
1468 : // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
1469 : DECLARE_ALIGNED(16, static uint8_t,
1470 : wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
1471 :
1472 3 : static void init_wedge_master_masks() {
1473 : int i, j;
1474 3 : const int w = MASK_MASTER_SIZE;
1475 3 : const int h = MASK_MASTER_SIZE;
1476 3 : const int stride = MASK_MASTER_STRIDE;
1477 : // Note: index [0] stores the masters, and [1] its complement.
1478 : #if USE_PRECOMPUTED_WEDGE_MASK
1479 : // Generate prototype by shifting the masters
1480 3 : int shift = h / 4;
1481 99 : for (i = 0; i < h; i += 2) {
1482 96 : shift_copy(wedge_master_oblique_even,
1483 96 : &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
1484 : MASK_MASTER_SIZE);
1485 96 : shift--;
1486 96 : shift_copy(wedge_master_oblique_odd,
1487 96 : &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
1488 : MASK_MASTER_SIZE);
1489 96 : memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
1490 : wedge_master_vertical,
1491 : MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
1492 96 : memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
1493 : wedge_master_vertical,
1494 : MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
1495 : }
1496 : #else
1497 : static const double smoother_param = 2.85;
1498 : const int a[2] = { 2, 1 };
1499 : const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]);
1500 : for (i = 0; i < h; i++) {
1501 : for (j = 0; j < w; ++j) {
1502 : int x = (2 * j + 1 - w);
1503 : int y = (2 * i + 1 - h);
1504 : double d = (a[0] * x + a[1] * y) / asqrt;
1505 : const int msk = (int)rint((1.0 + tanh(d / smoother_param)) * 32);
1506 : wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j] = msk;
1507 : const int mskx = (int)rint((1.0 + tanh(x / smoother_param)) * 32);
1508 : wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j] = mskx;
1509 : }
1510 : }
1511 : #endif // USE_PRECOMPUTED_WEDGE_MASK
1512 195 : for (i = 0; i < h; ++i) {
1513 12480 : for (j = 0; j < w; ++j) {
1514 12288 : const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
1515 12288 : wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
1516 12288 : wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
1517 12288 : wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
1518 12288 : (1 << WEDGE_WEIGHT_BITS) - msk;
1519 12288 : wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
1520 12288 : wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
1521 12288 : (1 << WEDGE_WEIGHT_BITS) - msk;
1522 12288 : wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
1523 12288 : wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
1524 12288 : const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
1525 12288 : wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
1526 12288 : wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
1527 12288 : wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
1528 12288 : (1 << WEDGE_WEIGHT_BITS) - mskx;
1529 : }
1530 : }
1531 3 : }
1532 :
1533 : #if !USE_PRECOMPUTED_WEDGE_SIGN
1534 : // If the signs for the wedges for various blocksizes are
1535 : // inconsistent flip the sign flag. Do it only once for every
1536 : // wedge codebook.
1537 : static void init_wedge_signs() {
1538 : BLOCK_SIZE sb_type;
1539 : memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup));
1540 : for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES_ALL; ++sb_type) {
1541 : const int bw = block_size_wide[sb_type];
1542 : const int bh = block_size_high[sb_type];
1543 : const wedge_params_type wedge_params = wedge_params_lookup[sb_type];
1544 : const int wbits = wedge_params.bits;
1545 : const int wtypes = 1 << wbits;
1546 : int i, w;
1547 : if (wbits) {
1548 : for (w = 0; w < wtypes; ++w) {
1549 : // Get the mask master, i.e. index [0]
1550 : const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type);
1551 : int avg = 0;
1552 : for (i = 0; i < bw; ++i) avg += mask[i];
1553 : for (i = 1; i < bh; ++i) avg += mask[i * MASK_MASTER_STRIDE];
1554 : avg = (avg + (bw + bh - 1) / 2) / (bw + bh - 1);
1555 : // Default sign of this wedge is 1 if the average < 32, 0 otherwise.
1556 : // If default sign is 1:
1557 : // If sign requested is 0, we need to flip the sign and return
1558 : // the complement i.e. index [1] instead. If sign requested is 1
1559 : // we need to flip the sign and return index [0] instead.
1560 : // If default sign is 0:
1561 : // If sign requested is 0, we need to return index [0] the master
1562 : // if sign requested is 1, we need to return the complement index [1]
1563 : // instead.
1564 : wedge_params.signflip[w] = (avg < 32);
1565 : }
1566 : }
1567 : }
1568 : }
1569 : #endif // !USE_PRECOMPUTED_WEDGE_SIGN
1570 :
1571 15979100 : static INLINE int get_wedge_bits_lookup(BlockSize sb_type) {
1572 15979100 : return wedge_params_lookup[sb_type].bits;
1573 : }
1574 :
1575 864 : static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
1576 : BlockSize sb_type) {
1577 : const uint8_t *master;
1578 864 : const int bh = block_size_high[sb_type];
1579 864 : const int bw = block_size_wide[sb_type];
1580 864 : const WedgeCodeType *a =
1581 864 : wedge_params_lookup[sb_type].codebook + wedge_index;
1582 : int woff, hoff;
1583 864 : const uint8_t wsignflip = wedge_params_lookup[sb_type].signflip[wedge_index];
1584 :
1585 864 : assert(wedge_index >= 0 &&
1586 : wedge_index < (1 << get_wedge_bits_lookup(sb_type)));
1587 864 : woff = (a->x_offset * bw) >> 3;
1588 864 : hoff = (a->y_offset * bh) >> 3;
1589 864 : master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
1590 864 : MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
1591 864 : MASK_MASTER_SIZE / 2 - woff;
1592 864 : return master;
1593 : }
1594 :
1595 3 : static void init_wedge_masks() {
1596 3 : uint8_t *dst = wedge_mask_buf;
1597 : BlockSize bsize;
1598 3 : memset(wedge_masks, 0, sizeof(wedge_masks));
1599 69 : for (bsize = BLOCK_4X4; bsize < BlockSizeS_ALL; ++bsize) {
1600 : const uint8_t *mask;
1601 66 : const int bw = block_size_wide[bsize];
1602 66 : const int bh = block_size_high[bsize];
1603 66 : const WedgeParamsType *wedge_params = &wedge_params_lookup[bsize];
1604 66 : const int wbits = wedge_params->bits;
1605 66 : const int wtypes = 1 << wbits;
1606 : int w;
1607 66 : if (wbits == 0) continue;
1608 459 : for (w = 0; w < wtypes; ++w) {
1609 432 : mask = get_wedge_mask_inplace(w, 0, bsize);
1610 432 : aom_convolve_copy_c(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
1611 : bh);
1612 432 : wedge_params->masks[0][w] = dst;
1613 432 : dst += bw * bh;
1614 :
1615 432 : mask = get_wedge_mask_inplace(w, 1, bsize);
1616 432 : aom_convolve_copy_c(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
1617 : bh);
1618 432 : wedge_params->masks[1][w] = dst;
1619 432 : dst += bw * bh;
1620 : }
1621 27 : assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
1622 : }
1623 3 : }
1624 :
1625 : // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
1626 3 : void av1_init_wedge_masks() {
1627 3 : init_wedge_master_masks();
1628 : #if !USE_PRECOMPUTED_WEDGE_SIGN
1629 : init_wedge_signs();
1630 : #endif // !USE_PRECOMPUTED_WEDGE_SIGN
1631 3 : init_wedge_masks();
1632 3 : }
1633 :
1634 0 : static void diffwtd_mask_d16(uint8_t *mask, int which_inverse, int mask_base,
1635 : const CONV_BUF_TYPE *src0, int src0_stride,
1636 : const CONV_BUF_TYPE *src1, int src1_stride, int h,
1637 : int w, ConvolveParams *conv_params, int bd) {
1638 0 : int round =
1639 0 : 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
1640 : int i, j, m, diff;
1641 0 : for (i = 0; i < h; ++i) {
1642 0 : for (j = 0; j < w; ++j) {
1643 0 : diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
1644 0 : diff = ROUND_POWER_OF_TWO(diff, round);
1645 0 : m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
1646 0 : mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
1647 : }
1648 : }
1649 0 : }
1650 :
1651 0 : void av1_build_compound_diffwtd_mask_d16_c(
1652 : uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
1653 : int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
1654 : ConvolveParams *conv_params, int bd) {
1655 0 : switch (mask_type) {
1656 0 : case DIFFWTD_38:
1657 0 : diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
1658 : conv_params, bd);
1659 0 : break;
1660 0 : case DIFFWTD_38_INV:
1661 0 : diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
1662 : conv_params, bd);
1663 0 : break;
1664 0 : default: assert(0);
1665 : }
1666 0 : }
1667 :
1668 : int is_masked_compound_type(COMPOUND_TYPE type);
1669 :
1670 : #if II_COMP_FLAG
1671 : /* clang-format off */
1672 : static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
1673 : 60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
1674 : 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
1675 : 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8,
1676 : 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4,
1677 : 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2,
1678 : 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
1679 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1680 : };
1681 : static uint8_t ii_size_scales[BlockSizeS_ALL] = {
1682 : 32, 16, 16, 16, 8, 8, 8, 4,
1683 : 4, 4, 2, 2, 2, 1, 1, 1,
1684 : 8, 8, 4, 4, 2, 2
1685 : };
1686 : /* clang-format on */
1687 :
1688 24111400 : static void build_smooth_interintra_mask(uint8_t *mask, int stride,
1689 : BlockSize plane_bsize,
1690 : INTERINTRA_MODE mode) {
1691 : int i, j;
1692 24111400 : const int bw = block_size_wide[plane_bsize];
1693 24111400 : const int bh = block_size_high[plane_bsize];
1694 24111400 : const int size_scale = ii_size_scales[plane_bsize];
1695 :
1696 24111400 : switch (mode) {
1697 6223300 : case II_V_PRED:
1698 98468000 : for (i = 0; i < bh; ++i) {
1699 92244700 : memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
1700 92244700 : mask += stride;
1701 : }
1702 6223300 : break;
1703 :
1704 7888590 : case II_H_PRED:
1705 118706000 : for (i = 0; i < bh; ++i) {
1706 1911850000 : for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
1707 110817000 : mask += stride;
1708 : }
1709 7888590 : break;
1710 :
1711 4990540 : case II_SMOOTH_PRED:
1712 77351900 : for (i = 0; i < bh; ++i) {
1713 1234780000 : for (j = 0; j < bw; ++j)
1714 1162420000 : mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
1715 72361400 : mask += stride;
1716 : }
1717 4990540 : break;
1718 :
1719 5008960 : case II_DC_PRED:
1720 : default:
1721 78322000 : for (i = 0; i < bh; ++i) {
1722 73313100 : memset(mask, 32, bw * sizeof(mask[0]));
1723 73313100 : mask += stride;
1724 : }
1725 5008960 : break;
1726 : }
1727 24111400 : }
1728 : #endif
1729 452908000 : static INLINE const uint8_t *av1_get_contiguous_soft_mask(int wedge_index,
1730 : int wedge_sign,
1731 : BlockSize sb_type) {
1732 452908000 : return wedge_params_lookup[sb_type].masks[wedge_sign][wedge_index];
1733 : }
1734 :
1735 : #if COMP_INTERINTRA
1736 0 : void combine_interintra_highbd(
1737 : InterIntraMode mode, uint8_t use_wedge_interintra, uint8_t wedge_index,
1738 : uint8_t wedge_sign, BlockSize bsize, BlockSize plane_bsize,
1739 : uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1740 : int interstride, const uint8_t *intrapred8, int intrastride, int bd)
1741 : {
1742 0 : const int bw = block_size_wide[plane_bsize];
1743 0 : const int bh = block_size_high[plane_bsize];
1744 :
1745 0 : if (use_wedge_interintra) {
1746 0 : if (is_interintra_wedge_used(bsize)) {
1747 : const uint8_t *mask =
1748 0 : av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1749 0 : const int subh = 2 * mi_size_high[bsize] == bh;
1750 0 : const int subw = 2 * mi_size_wide[bsize] == bw;
1751 0 : aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8,
1752 : intrastride, interpred8, interstride, mask,
1753 0 : block_size_wide[bsize], bw, bh, subw, subh, bd);
1754 : }
1755 0 : return;
1756 : }
1757 :
1758 : uint8_t mask[MAX_SB_SQUARE];
1759 0 : build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1760 0 : aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1761 : interpred8, interstride, mask, bw, bw, bh, 0, 0,
1762 : bd);
1763 : }
1764 :
1765 : #endif //comp_interintra
1766 :
1767 39849500 : const uint8_t *av1_get_compound_type_mask(
1768 : const InterInterCompoundData *const comp_data,
1769 : uint8_t *seg_mask, BlockSize sb_type)
1770 : {
1771 39849500 : assert(is_masked_compound_type(comp_data->type));
1772 : (void)sb_type;
1773 39872800 : switch (comp_data->type) {
1774 13164900 : case COMPOUND_WEDGE:
1775 13164900 : return av1_get_contiguous_soft_mask(comp_data->wedge_index,
1776 13164900 : comp_data->wedge_sign, sb_type);
1777 26707900 : case COMPOUND_DIFFWTD: return seg_mask;
1778 0 : default: assert(0); return NULL;
1779 : }
1780 : }
1781 :
1782 39851000 : void build_masked_compound_no_round(
1783 : uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
1784 : const CONV_BUF_TYPE *src1, int src1_stride,
1785 : const InterInterCompoundData *const comp_data,
1786 : uint8_t *seg_mask,
1787 : BlockSize sb_type, int h,
1788 : int w, ConvolveParams *conv_params, uint8_t bit_depth)
1789 : {
1790 : // Derive subsampling from h and w passed in. May be refactored to
1791 : // pass in subsampling factors directly.
1792 39851000 : const int subh = (2 << mi_size_high_log2[sb_type]) == h;
1793 39851000 : const int subw = (2 << mi_size_wide_log2[sb_type]) == w;
1794 39851000 : const uint8_t *mask = av1_get_compound_type_mask(comp_data, seg_mask, sb_type);
1795 :
1796 39848200 : if (bit_depth > EB_8BIT) {
1797 0 : aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
1798 0 : src1_stride, mask, block_size_wide[sb_type], w,
1799 : h, subw, subh, conv_params, bit_depth);
1800 : }
1801 : else {
1802 39848200 : aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
1803 39848200 : src1_stride, mask, block_size_wide[sb_type], w,
1804 : h, subw, subh, conv_params);
1805 : }
1806 39864000 : }
1807 :
1808 39797900 : void av1_make_masked_inter_predictor(
1809 : uint8_t *src_ptr,
1810 : uint32_t src_stride,
1811 : uint8_t *dst_ptr,
1812 : uint32_t dst_stride,
1813 : const BlockGeom *blk_geom,
1814 : uint8_t bwidth,
1815 : uint8_t bheight,
1816 : InterpFilterParams *filter_params_x,
1817 : InterpFilterParams *filter_params_y,
1818 : int32_t subpel_x,
1819 : int32_t subpel_y,
1820 : ConvolveParams *conv_params,
1821 : InterInterCompoundData *comp_data,
1822 : uint8_t bitdepth,
1823 : uint8_t plane
1824 : )
1825 : {
1826 : //We come here when we have a prediction done using regular path for the ref0 stored in conv_param.dst.
1827 : //use regular path to generate a prediction for ref1 into a temporary buffer,
1828 : //then blend that temporary buffer with that from the first reference.
1829 :
1830 : DECLARE_ALIGNED(16, uint8_t, seg_mask[2 * MAX_SB_SQUARE]);
1831 :
1832 : #define INTER_PRED_BYTES_PER_PIXEL 2
1833 : DECLARE_ALIGNED(32, uint8_t,
1834 : tmp_buf[INTER_PRED_BYTES_PER_PIXEL * MAX_SB_SQUARE]);
1835 : #undef INTER_PRED_BYTES_PER_PIXEL
1836 : //uint8_t *tmp_dst = tmp_buf;
1837 39797900 : const int tmp_buf_stride = MAX_SB_SIZE;
1838 :
1839 39797900 : CONV_BUF_TYPE *org_dst = conv_params->dst;//save the ref0 prediction pointer
1840 39797900 : int org_dst_stride = conv_params->dst_stride;
1841 39797900 : CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
1842 39797900 : conv_params->dst = tmp_buf16;
1843 39797900 : conv_params->dst_stride = tmp_buf_stride;
1844 39797900 : assert(conv_params->do_average == 0);
1845 :
1846 39797900 : if (bitdepth == EB_8BIT)
1847 39806100 : convolve[subpel_x != 0][subpel_y != 0][1](
1848 : src_ptr,
1849 : src_stride,
1850 : dst_ptr,
1851 : dst_stride,
1852 : bwidth,
1853 : bheight,
1854 : filter_params_x,
1855 : filter_params_y,
1856 : subpel_x,
1857 : subpel_y,
1858 : conv_params);
1859 : else
1860 0 : convolveHbd[subpel_x != 0][subpel_y != 0][1](
1861 : (uint16_t *)src_ptr,
1862 : src_stride,
1863 : (uint16_t *)dst_ptr,
1864 : dst_stride,
1865 : bwidth,
1866 : bheight,
1867 : filter_params_x,
1868 : filter_params_y,
1869 : subpel_x,
1870 : subpel_y,
1871 : conv_params,
1872 : bitdepth);
1873 :
1874 39845300 : if (!plane && comp_data->type == COMPOUND_DIFFWTD) {
1875 : //CHKN for DIFF: need to compute the mask comp_data->seg_mask is the output computed from the two preds org_dst and tmp_buf16
1876 : //for WEDGE the mask is fixed from the table based on wedge_sign/index
1877 26355400 : av1_build_compound_diffwtd_mask_d16(
1878 26355400 : seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
1879 : tmp_buf16, tmp_buf_stride, bheight, bwidth, conv_params, bitdepth);
1880 : }
1881 :
1882 39850600 : build_masked_compound_no_round(dst_ptr, dst_stride, org_dst, org_dst_stride,
1883 : tmp_buf16, tmp_buf_stride, comp_data, seg_mask,
1884 39850600 : blk_geom->bsize, bheight, bwidth, conv_params, bitdepth);
1885 :
1886 39864500 : }
1887 : #if INTER_INTER_HBD
1888 0 : void av1_make_masked_inter_predictor_hbd(
1889 : uint16_t *src_ptr,
1890 : uint32_t src_stride,
1891 : uint16_t *dst_ptr,
1892 : uint32_t dst_stride,
1893 : const BlockGeom *blk_geom,
1894 : uint8_t bwidth,
1895 : uint8_t bheight,
1896 : InterpFilterParams *filter_params_x,
1897 : InterpFilterParams *filter_params_y,
1898 : int32_t subpel_x,
1899 : int32_t subpel_y,
1900 : ConvolveParams *conv_params,
1901 : InterInterCompoundData *comp_data,
1902 : uint8_t bitdepth,
1903 : uint8_t plane
1904 : )
1905 : {
1906 : //We come here when we have a prediction done using regular path for the ref0 stored in conv_param.dst.
1907 : //use regular path to generate a prediction for ref1 into a temporary buffer,
1908 : //then blend that temporary buffer with that from the first reference.
1909 :
1910 : DECLARE_ALIGNED(16, uint8_t, seg_mask[2 * MAX_SB_SQUARE]);
1911 :
1912 : #define INTER_PRED_BYTES_PER_PIXEL 2
1913 : DECLARE_ALIGNED(32, uint8_t,
1914 : tmp_buf[INTER_PRED_BYTES_PER_PIXEL * MAX_SB_SQUARE]);
1915 : #undef INTER_PRED_BYTES_PER_PIXEL
1916 : //uint8_t *tmp_dst = tmp_buf;
1917 0 : const int tmp_buf_stride = MAX_SB_SIZE;
1918 :
1919 0 : CONV_BUF_TYPE *org_dst = conv_params->dst;//save the ref0 prediction pointer
1920 0 : int org_dst_stride = conv_params->dst_stride;
1921 0 : CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
1922 0 : conv_params->dst = tmp_buf16;
1923 0 : conv_params->dst_stride = tmp_buf_stride;
1924 0 : assert(conv_params->do_average == 0);
1925 :
1926 0 : convolveHbd[subpel_x != 0][subpel_y != 0][1](
1927 : src_ptr,
1928 : src_stride,
1929 : dst_ptr,
1930 : dst_stride,
1931 : bwidth,
1932 : bheight,
1933 : filter_params_x,
1934 : filter_params_y,
1935 : subpel_x,
1936 : subpel_y,
1937 : conv_params,
1938 : EB_10BIT);
1939 :
1940 0 : if (!plane && comp_data->type == COMPOUND_DIFFWTD) {
1941 : //CHKN for DIFF: need to compute the mask comp_data->seg_mask is the output computed from the two preds org_dst and tmp_buf16
1942 : //for WEDGE the mask is fixed from the table based on wedge_sign/index
1943 0 : av1_build_compound_diffwtd_mask_d16(
1944 0 : seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
1945 : tmp_buf16, tmp_buf_stride, bheight, bwidth, conv_params, bitdepth);
1946 : }
1947 :
1948 0 : build_masked_compound_no_round((uint8_t *)dst_ptr, dst_stride, org_dst, org_dst_stride,
1949 : tmp_buf16, tmp_buf_stride, comp_data, seg_mask,
1950 0 : blk_geom->bsize, bheight, bwidth, conv_params, bitdepth);
1951 :
1952 0 : }
1953 : #endif
1954 :
1955 0 : void av1_make_masked_warp_inter_predictor(
1956 : uint8_t *src_ptr,
1957 : uint32_t src_stride,
1958 : uint16_t buf_width,
1959 : uint16_t buf_height,
1960 : uint8_t *dst_ptr,
1961 : uint32_t dst_stride,
1962 : const BlockGeom *blk_geom,
1963 : uint8_t bwidth,
1964 : uint8_t bheight,
1965 : ConvolveParams *conv_params,
1966 : InterInterCompoundData *comp_data,
1967 : uint8_t bitdepth,
1968 : uint8_t plane,
1969 : uint16_t pu_origin_x,
1970 : uint16_t pu_origin_y,
1971 : EbWarpedMotionParams *wm_params_l1
1972 : )
1973 : {
1974 0 : EbBool is16bit = (EbBool)(bitdepth > EB_8BIT);
1975 :
1976 : //We come here when we have a prediction done using regular path for the ref0 stored in conv_param.dst.
1977 : //use regular path to generate a prediction for ref1 into a temporary buffer,
1978 : //then blend that temporary buffer with that from the first reference.
1979 :
1980 : DECLARE_ALIGNED(16, uint8_t, seg_mask[2 * MAX_SB_SQUARE]);
1981 :
1982 : #define INTER_PRED_BYTES_PER_PIXEL 2
1983 : DECLARE_ALIGNED(32, uint8_t,
1984 : tmp_buf[INTER_PRED_BYTES_PER_PIXEL * MAX_SB_SQUARE]);
1985 : #undef INTER_PRED_BYTES_PER_PIXEL
1986 0 : uint8_t *tmp_dst = tmp_buf;
1987 0 : const int tmp_buf_stride = MAX_SB_SIZE;
1988 :
1989 0 : CONV_BUF_TYPE *org_dst = conv_params->dst;//save the ref0 prediction pointer
1990 0 : int org_dst_stride = conv_params->dst_stride;
1991 0 : CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
1992 0 : conv_params->dst = tmp_buf16;
1993 0 : conv_params->dst_stride = tmp_buf_stride;
1994 0 : assert(conv_params->do_average == 0);
1995 :
1996 0 : uint8_t ss_x = plane == 0 ? 0 : 1; // subsamplings
1997 0 : uint8_t ss_y = plane == 0 ? 0 : 1;
1998 :
1999 0 : eb_av1_warp_plane(
2000 : wm_params_l1,
2001 : (int) is16bit,
2002 : bitdepth,
2003 : src_ptr,
2004 : (int)buf_width,
2005 : (int)buf_height,
2006 : src_stride,
2007 : tmp_dst,
2008 : pu_origin_x,
2009 : pu_origin_y,
2010 : bwidth,
2011 : bheight,
2012 : MAX_SB_SQUARE,
2013 : ss_x, //int subsampling_x,
2014 : ss_y, //int subsampling_y,
2015 : conv_params);
2016 :
2017 0 : if (!plane && comp_data->type == COMPOUND_DIFFWTD) {
2018 : //CHKN for DIFF: need to compute the mask comp_data->seg_mask is the output computed from the two preds org_dst and tmp_buf16
2019 : //for WEDGE the mask is fixed from the table based on wedge_sign/index
2020 0 : av1_build_compound_diffwtd_mask_d16(
2021 0 : seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
2022 : tmp_buf16, tmp_buf_stride, bheight, bwidth, conv_params, bitdepth);
2023 : }
2024 :
2025 0 : build_masked_compound_no_round(dst_ptr, dst_stride, org_dst, org_dst_stride,
2026 : tmp_buf16, tmp_buf_stride, comp_data, seg_mask,
2027 0 : blk_geom->bsize, bheight, bwidth, conv_params, bitdepth);
2028 :
2029 0 : }
2030 :
2031 :
2032 0 : void aom_subtract_block_c(int rows, int cols, int16_t *diff,
2033 : ptrdiff_t diff_stride, const uint8_t *src,
2034 : ptrdiff_t src_stride, const uint8_t *pred,
2035 : ptrdiff_t pred_stride) {
2036 : int r, c;
2037 :
2038 0 : for (r = 0; r < rows; r++) {
2039 0 : for (c = 0; c < cols; c++) diff[c] = src[c] - pred[c];
2040 :
2041 0 : diff += diff_stride;
2042 0 : pred += pred_stride;
2043 0 : src += src_stride;
2044 : }
2045 0 : }
2046 :
2047 0 : static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
2048 : const uint8_t *src0, int src0_stride,
2049 : const uint8_t *src1, int src1_stride, int h, int w) {
2050 : int i, j, m, diff;
2051 0 : for (i = 0; i < h; ++i) {
2052 0 : for (j = 0; j < w; ++j) {
2053 0 : diff =
2054 0 : abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
2055 0 : m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
2056 0 : mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
2057 : }
2058 : }
2059 0 : }
2060 :
2061 0 : void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
2062 : DIFFWTD_MASK_TYPE mask_type,
2063 : const uint8_t *src0, int src0_stride,
2064 : const uint8_t *src1, int src1_stride,
2065 : int h, int w) {
2066 0 : switch (mask_type) {
2067 0 : case DIFFWTD_38:
2068 0 : diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
2069 0 : break;
2070 0 : case DIFFWTD_38_INV:
2071 0 : diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
2072 0 : break;
2073 0 : default: assert(0);
2074 : }
2075 0 : }
2076 : #define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
2077 :
2078 : /**
2079 : * Computes SSE of a compound predictor constructed from 2 fundamental
2080 : * predictors p0 and p1 using blending with mask.
2081 : *
2082 : * r1: Residuals of p1.
2083 : * (source - p1)
2084 : * d: Difference of p1 and p0.
2085 : * (p1 - p0)
2086 : * m: The blending mask
2087 : * N: Number of pixels
2088 : *
2089 : * 'r1', 'd', and 'm' are contiguous.
2090 : *
2091 : * Computes:
2092 : * Sum((MAX_MASK_VALUE*r1 + mask*d)**2), which is equivalent to:
2093 : * Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2),
2094 : * where r0 is (source - p0), and r1 is (source - p1), which is in turn
2095 : * is equivalent to:
2096 : * Sum((source*MAX_MASK_VALUE - (mask*p0 + (MAX_MASK_VALUE-mask)*p1))**2),
2097 : * which is the SSE of the residuals of the compound predictor scaled up by
2098 : * MAX_MASK_VALUE**2.
2099 : *
2100 : * Note that we clamp the partial term in the loop to 16 bits signed. This is
2101 : * to facilitate equivalent SIMD implementation. It should have no effect if
2102 : * residuals are within 16 - WEDGE_WEIGHT_BITS (=10) signed, which always
2103 : * holds for 8 bit input, and on real input, it should hold practically always,
2104 : * as residuals are expected to be small.
2105 : */
2106 0 : uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d,
2107 : const uint8_t *m, int N) {
2108 0 : uint64_t csse = 0;
2109 : int i;
2110 :
2111 0 : for (i = 0; i < N; i++) {
2112 0 : int32_t t = MAX_MASK_VALUE * r1[i] + m[i] * d[i];
2113 0 : t = clamp(t, INT16_MIN, INT16_MAX);
2114 0 : csse += t * t;
2115 : }
2116 0 : return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
2117 : }
2118 : static const uint8_t bsize_curvfit_model_cat_lookup[BlockSizeS_ALL] = {
2119 : 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 0, 0, 1, 1, 2, 2
2120 : };
2121 310486000 : static int sse_norm_curvfit_model_cat_lookup(double sse_norm) {
2122 310486000 : return (sse_norm > 16.0);
2123 : }
2124 : static const double interp_rgrid_curv[4][65] = {
2125 : {
2126 : 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
2127 : 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
2128 : 0.000000, 23.801499, 28.387688, 33.388795, 42.298282,
2129 : 41.525408, 51.597692, 49.566271, 54.632979, 60.321507,
2130 : 67.730678, 75.766165, 85.324032, 96.600012, 120.839562,
2131 : 173.917577, 255.974908, 354.107573, 458.063476, 562.345966,
2132 : 668.568424, 772.072881, 878.598490, 982.202274, 1082.708946,
2133 : 1188.037853, 1287.702240, 1395.588773, 1490.825830, 1584.231230,
2134 : 1691.386090, 1766.822555, 1869.630904, 1926.743565, 2002.949495,
2135 : 2047.431137, 2138.486068, 2154.743767, 2209.242472, 2277.593051,
2136 : 2290.996432, 2307.452938, 2343.567091, 2397.654644, 2469.425868,
2137 : 2558.591037, 2664.860422, 2787.944296, 2927.552932, 3083.396602,
2138 : 3255.185579, 3442.630134, 3645.440541, 3863.327072, 4096.000000,
2139 : },
2140 : {
2141 : 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
2142 : 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
2143 : 0.000000, 8.998436, 9.439592, 9.731837, 10.865931,
2144 : 11.561347, 12.578139, 14.205101, 16.770584, 19.094853,
2145 : 21.330863, 23.298907, 26.901921, 34.501017, 57.891733,
2146 : 112.234763, 194.853189, 288.302032, 380.499422, 472.625309,
2147 : 560.226809, 647.928463, 734.155122, 817.489721, 906.265783,
2148 : 999.260562, 1094.489206, 1197.062998, 1293.296825, 1378.926484,
2149 : 1472.760990, 1552.663779, 1635.196884, 1692.451951, 1759.741063,
2150 : 1822.162720, 1916.515921, 1966.686071, 2031.647506, 2033.700134,
2151 : 2087.847688, 2161.688858, 2242.536028, 2334.023491, 2436.337802,
2152 : 2549.665519, 2674.193198, 2810.107395, 2957.594666, 3116.841567,
2153 : 3288.034655, 3471.360486, 3667.005616, 3875.156602, 4096.000000,
2154 : },
2155 : {
2156 : 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
2157 : 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
2158 : 0.000000, 2.377584, 2.557185, 2.732445, 2.851114,
2159 : 3.281800, 3.765589, 4.342578, 5.145582, 5.611038,
2160 : 6.642238, 7.945977, 11.800522, 17.346624, 37.501413,
2161 : 87.216800, 165.860942, 253.865564, 332.039345, 408.518863,
2162 : 478.120452, 547.268590, 616.067676, 680.022540, 753.863541,
2163 : 834.529973, 919.489191, 1008.264989, 1092.230318, 1173.971886,
2164 : 1249.514122, 1330.510941, 1399.523249, 1466.923387, 1530.533471,
2165 : 1586.515722, 1695.197774, 1746.648696, 1837.136959, 1909.075485,
2166 : 1975.074651, 2060.159200, 2155.335095, 2259.762505, 2373.710437,
2167 : 2497.447898, 2631.243895, 2775.367434, 2930.087523, 3095.673170,
2168 : 3272.393380, 3460.517161, 3660.313520, 3872.051464, 4096.000000,
2169 : },
2170 : {
2171 : 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
2172 : 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
2173 : 0.000000, 0.296997, 0.342545, 0.403097, 0.472889,
2174 : 0.614483, 0.842937, 1.050824, 1.326663, 1.717750,
2175 : 2.530591, 3.582302, 6.995373, 9.973335, 24.042464,
2176 : 56.598240, 113.680735, 180.018689, 231.050567, 266.101082,
2177 : 294.957934, 323.326511, 349.434429, 380.443211, 408.171987,
2178 : 441.214916, 475.716772, 512.900000, 551.186939, 592.364455,
2179 : 624.527378, 661.940693, 679.185473, 724.800679, 764.781792,
2180 : 873.050019, 950.299001, 939.292954, 1052.406153, 1033.893184,
2181 : 1112.182406, 1219.174326, 1337.296681, 1471.648357, 1622.492809,
2182 : 1790.093491, 1974.713858, 2176.617364, 2396.067465, 2633.327614,
2183 : 2888.661266, 3162.331876, 3454.602899, 3765.737789, 4096.000000,
2184 : },
2185 : };
2186 :
2187 : static const double interp_dgrid_curv[2][65] = {
2188 : {
2189 : 16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770,
2190 : 15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870,
2191 : 15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387,
2192 : 13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790,
2193 : 7.487633, 5.688649, 4.267515, 3.196300, 2.434201, 1.834064,
2194 : 1.369920, 1.035921, 0.775279, 0.574895, 0.427232, 0.314123,
2195 : 0.233236, 0.171440, 0.128188, 0.092762, 0.067569, 0.049324,
2196 : 0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733,
2197 : 0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848,
2198 : 0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550,
2199 : 0.000348, 0.000193, 0.000085, 0.000021, 0.000000,
2200 : },
2201 : {
2202 : 16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501,
2203 : 15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967,
2204 : 15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212,
2205 : 13.073692, 12.222005, 11.237799, 9.985848, 8.898823, 7.423519,
2206 : 5.995325, 4.773152, 3.744032, 2.938217, 2.294526, 1.762412,
2207 : 1.327145, 1.020728, 0.765535, 0.570548, 0.425833, 0.313825,
2208 : 0.232959, 0.171324, 0.128174, 0.092750, 0.067558, 0.049319,
2209 : 0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733,
2210 : 0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848,
2211 : 0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550,
2212 : 0.000348, 0.000193, 0.000085, 0.000021, -0.000000,
2213 : },
2214 : };
2215 :
2216 :
2217 : /*
2218 : Precalucation factors to interp_cubic()
2219 : interp_cubic() OUT is: p[1] + 0.5 * x * (p[2] - p[0] +
2220 : x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] +
2221 : x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
2222 : Precalucation:
2223 : interp_cubic() OUT is: D + x * (C + x * (B + x * A))
2224 : For precalculated factors:
2225 : double A = 0.5 *(3.0 * (p[1] - p[2]) + p[3] - p[0]);
2226 : double B = 0.5 *(2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3]);
2227 : double C = 0.5 * (p[2] - p[0]);
2228 : double D = p[1];
2229 :
2230 : Precalculated values of array factors:
2231 : A is: (0 to sizeof(ARRAY[])-1)
2232 : B is: (0 to sizeof(ARRAY[A][])-4)
2233 : PRECALC[A][B][0] = 0.5 *(3.0 * (ARRAY[A][B+1] - ARRAY[A][B+2]) + ARRAY[A][B+3] - ARRAY[A][B])
2234 : PRECALC[A][B][1] = 0.5 *(2.0 * p[0] - 5.0 * ARRAY[A][B+1] + 4.0 * ARRAY[A][B+2]) - ARRAY[A][B+3]);
2235 : PRECALC[A][B][2] = 0.5 * (ARRAY[A][B+2] - ARRAY[A][B]);
2236 : PRECALC[A][B][3] = ARRAY[A][B+1]
2237 : */
2238 :
2239 310536000 : void av1_model_rd_curvfit(BlockSize bsize, double sse_norm, double xqr,
2240 : double *rate_f, double *distbysse_f) {
2241 310536000 : const double x_start = -15.5;
2242 310536000 : const double x_end = 16.5;
2243 310536000 : const double x_step = 0.5;
2244 310536000 : const double epsilon = 1e-6;
2245 310536000 : const int rcat = bsize_curvfit_model_cat_lookup[bsize];
2246 310536000 : const int dcat = sse_norm_curvfit_model_cat_lookup(sse_norm);
2247 : (void)x_end;
2248 :
2249 310814000 : xqr = AOMMAX(xqr, x_start + x_step + epsilon);
2250 310814000 : xqr = AOMMIN(xqr, x_end - x_step - epsilon);
2251 310814000 : const double x = (xqr - x_start) / x_step;
2252 310814000 : const int xi = (int)floor(x);
2253 310814000 : assert(xi > 0);
2254 :
2255 310814000 : const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
2256 310814000 : *rate_f = prate[1];
2257 310814000 : const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
2258 310814000 : *distbysse_f = pdist[1];
2259 :
2260 310814000 : }
2261 :
2262 : // Fits a curve for rate and distortion using as feature:
2263 : // log2(sse_norm/qstep^2)
2264 309433000 : static void model_rd_with_curvfit(
2265 : PictureControlSet *picture_control_set_ptr,
2266 : BlockSize plane_bsize,
2267 : int64_t sse, int num_samples, int *rate,
2268 : int64_t *dist,
2269 : uint32_t rdmult
2270 : )
2271 : {
2272 : (void)plane_bsize;
2273 309433000 : const int dequant_shift = 3;
2274 : #if 0
2275 : int32_t current_q_index = MAX(0, MIN(QINDEX_RANGE - 1, picture_control_set_ptr->parent_pcs_ptr->frm_hdr.quantization_params.base_q_idx));
2276 : #else
2277 309433000 : int32_t current_q_index = picture_control_set_ptr->parent_pcs_ptr->frm_hdr.quantization_params.base_q_idx;
2278 : #endif
2279 309433000 : Dequants *const dequants = &picture_control_set_ptr->parent_pcs_ptr->deq;
2280 309433000 : int16_t quantizer = dequants->y_dequant_Q3[current_q_index][1];
2281 :
2282 309433000 : const int qstep = AOMMAX(quantizer >> dequant_shift, 1);
2283 :
2284 309433000 : if (sse == 0) {
2285 673 : if (rate) *rate = 0;
2286 673 : if (dist) *dist = 0;
2287 673 : return;
2288 : }
2289 309432000 : aom_clear_system_state();
2290 309449000 : const double sse_norm = (double)sse / num_samples;
2291 309449000 : const double xqr = (double)LOG2F((uint32_t)sse_norm / (qstep * qstep));
2292 :
2293 : double rate_f, dist_by_sse_norm_f;
2294 309495000 : av1_model_rd_curvfit(plane_bsize, sse_norm, xqr, &rate_f, &dist_by_sse_norm_f);
2295 :
2296 310500000 : const double dist_f = dist_by_sse_norm_f * sse_norm;
2297 310500000 : int rate_i = (int)((rate_f * num_samples) + 0.5);
2298 310500000 : int64_t dist_i = (int64_t)((dist_f * num_samples) + 0.5);
2299 310500000 : aom_clear_system_state();
2300 :
2301 : // Check if skip is better
2302 311086000 : if (rate_i == 0) {
2303 0 : dist_i = sse << 4;
2304 : }
2305 311086000 : else if (RDCOST(rdmult, rate_i, dist_i) >= RDCOST(rdmult, 0, sse << 4)) {
2306 233120000 : rate_i = 0;
2307 233120000 : dist_i = sse << 4;
2308 : }
2309 :
2310 311086000 : if (rate) *rate = rate_i;
2311 311086000 : if (dist) *dist = dist_i;
2312 : }
2313 :
2314 :
2315 : /**
2316 : * Compute the element-wise difference of the squares of 2 arrays.
2317 : *
2318 : * d: Difference of the squares of the inputs: a**2 - b**2
2319 : * a: First input array
2320 : * b: Second input array
2321 : * N: Number of elements
2322 : *
2323 : * 'd', 'a', and 'b' are contiguous.
2324 : *
2325 : * The result is saturated to signed 16 bits.
2326 : */
2327 0 : void av1_wedge_compute_delta_squares_c(int16_t *d, const int16_t *a,
2328 : const int16_t *b, int N) {
2329 : int i;
2330 :
2331 0 : for (i = 0; i < N; i++)
2332 0 : d[i] = clamp(a[i] * a[i] - b[i] * b[i], INT16_MIN, INT16_MAX);
2333 0 : }
2334 :
2335 0 : uint64_t aom_sum_squares_i16_c(const int16_t *src, uint32_t n) {
2336 0 : uint64_t ss = 0;
2337 : do {
2338 0 : const int16_t v = *src++;
2339 0 : ss += v * v;
2340 0 : } while (--n);
2341 :
2342 0 : return ss;
2343 : }
2344 : /**
2345 : * Choose the mask sign for a compound predictor.
2346 : *
2347 : * ds: Difference of the squares of the residuals.
2348 : * r0**2 - r1**2
2349 : * m: The blending mask
2350 : * N: Number of pixels
2351 : * limit: Pre-computed threshold value.
2352 : * MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
2353 : *
2354 : * 'ds' and 'm' are contiguous.
2355 : *
2356 : * Returns true if the negated mask has lower SSE compared to the positive
2357 : * mask. Computation is based on:
2358 : * Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2)
2359 : * >
2360 : * Sum(((MAX_MASK_VALUE-mask)*r0 + mask*r1)**2)
2361 : *
2362 : * which can be simplified to:
2363 : *
2364 : * Sum(mask*(r0**2 - r1**2)) > MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
2365 : *
2366 : * The right hand side does not depend on the mask, and needs to be passed as
2367 : * the 'limit' parameter.
2368 : *
2369 : * After pre-computing (r0**2 - r1**2), which is passed in as 'ds', the left
2370 : * hand side is simply a scalar product between an int16_t and uint8_t vector.
2371 : *
2372 : * Note that for efficiency, ds is stored on 16 bits. Real input residuals
2373 : * being small, this should not cause a noticeable issue.
2374 : */
2375 0 : int8_t av1_wedge_sign_from_residuals_c(const int16_t *ds, const uint8_t *m,
2376 : int N, int64_t limit) {
2377 0 : int64_t acc = 0;
2378 :
2379 : do {
2380 0 : acc += *ds++ * *m++;
2381 0 : } while (--N);
2382 :
2383 0 : return acc > limit;
2384 : }
2385 :
2386 11529000 : static void pick_wedge(
2387 : PictureControlSet *picture_control_set_ptr,
2388 : ModeDecisionContext *context_ptr,
2389 : const BlockSize bsize,
2390 : const uint8_t *const p0,
2391 : const int16_t *const residual1,
2392 : const int16_t *const diff10,
2393 : int8_t *const best_wedge_sign,
2394 : int8_t *const best_wedge_index)
2395 : {
2396 :
2397 11529000 : EbPictureBufferDesc *src_pic = picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
2398 11529000 : uint8_t *src_buf = src_pic->buffer_y + (context_ptr->cu_origin_x + src_pic->origin_x) + (context_ptr->cu_origin_y + src_pic->origin_y) * src_pic->stride_y;
2399 :
2400 11529000 : const int bw = block_size_wide[bsize];
2401 11529000 : const int bh = block_size_high[bsize];
2402 11529000 : const int N = bw * bh;
2403 11529000 : assert(N >= 64);
2404 : int rate;
2405 : int64_t dist;
2406 11529000 : int64_t rd, best_rd = INT64_MAX;
2407 : int8_t wedge_index;
2408 : int8_t wedge_sign;
2409 11529000 : int8_t wedge_types = (1 << get_wedge_bits_lookup(bsize));
2410 : const uint8_t *mask;
2411 : uint64_t sse;
2412 11529100 : const int bd_round = 0;
2413 : DECLARE_ALIGNED(32, int16_t, residual0[MAX_SB_SQUARE]); // src - pred0
2414 :
2415 11529100 : aom_subtract_block(bh, bw, residual0, bw, src_buf/*src->buf*/, src_pic->stride_y/*src->stride*/, p0, bw);
2416 :
2417 11528800 : int64_t sign_limit = ((int64_t)aom_sum_squares_i16(residual0, N) -
2418 11531700 : (int64_t)aom_sum_squares_i16(residual1, N)) *
2419 : (1 << WEDGE_WEIGHT_BITS) / 2;
2420 11531600 : int16_t *ds = residual0;
2421 :
2422 11531600 : av1_wedge_compute_delta_squares(ds, residual0, residual1, N);
2423 :
2424 195232000 : for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
2425 183702000 : mask = av1_get_contiguous_soft_mask(wedge_index, 0, bsize);
2426 :
2427 183642000 : wedge_sign = av1_wedge_sign_from_residuals(ds, mask, N, sign_limit);
2428 :
2429 183767000 : mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
2430 183740000 : sse = av1_wedge_sse_from_residuals(residual1, diff10, mask, N);
2431 183212000 : sse = ROUND_POWER_OF_TWO(sse, bd_round);
2432 :
2433 183212000 : model_rd_with_curvfit(picture_control_set_ptr, bsize, sse, N, &rate, &dist, context_ptr->full_lambda);
2434 :
2435 183628000 : rd = RDCOST(context_ptr->full_lambda, rate, dist);
2436 :
2437 183628000 : if (rd < best_rd) {
2438 45607700 : *best_wedge_index = wedge_index;
2439 45607700 : *best_wedge_sign = wedge_sign;
2440 45607700 : best_rd = rd;
2441 : }
2442 : }
2443 11529700 : }
2444 :
2445 : extern aom_variance_fn_ptr_t mefn_ptr[BlockSizeS_ALL];
2446 :
2447 0 : static int8_t estimate_wedge_sign(
2448 : PictureControlSet *picture_control_set_ptr,
2449 : ModeDecisionContext *context_ptr,
2450 : const BlockSize bsize,
2451 : const uint8_t *pred0,
2452 : int stride0,
2453 : const uint8_t *pred1,
2454 : int stride1)
2455 : {
2456 : static const BlockSize split_qtr[BlockSizeS_ALL] = {
2457 : // 4X4
2458 : BLOCK_INVALID,
2459 : // 4X8, 8X4, 8X8
2460 : BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4,
2461 : // 8X16, 16X8, 16X16
2462 : BLOCK_4X8, BLOCK_8X4, BLOCK_8X8,
2463 : // 16X32, 32X16, 32X32
2464 : BLOCK_8X16, BLOCK_16X8, BLOCK_16X16,
2465 : // 32X64, 64X32, 64X64
2466 : BLOCK_16X32, BLOCK_32X16, BLOCK_32X32,
2467 : // 64x128, 128x64, 128x128
2468 : BLOCK_32X64, BLOCK_64X32, BLOCK_64X64,
2469 : // 4X16, 16X4, 8X32
2470 : BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X16,
2471 : // 32X8, 16X64, 64X16
2472 : BLOCK_16X4, BLOCK_8X32, BLOCK_32X8
2473 : };
2474 :
2475 0 : const int bw = block_size_wide[bsize];
2476 0 : const int bh = block_size_high[bsize];
2477 : uint32_t esq[2][4];
2478 : int64_t tl, br;
2479 :
2480 0 : const BlockSize f_index = split_qtr[bsize];
2481 0 : assert(f_index != BLOCK_INVALID);
2482 : (void)f_index;
2483 :
2484 0 : const aom_variance_fn_ptr_t *fn_ptr = &mefn_ptr[bsize];
2485 0 : EbPictureBufferDesc *src_pic = picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
2486 0 : uint8_t *src_buf = src_pic->buffer_y + (context_ptr->cu_origin_x + src_pic->origin_x) + (context_ptr->cu_origin_y + src_pic->origin_y) * src_pic->stride_y;
2487 :
2488 0 : fn_ptr->vf(src_buf, src_pic->stride_y, pred0, stride0, &esq[0][0]);
2489 0 : fn_ptr->vf(src_buf + bw / 2, src_pic->stride_y, pred0 + bw / 2, stride0, &esq[0][1]);
2490 0 : fn_ptr->vf(src_buf + bh / 2 * src_pic->stride_y, src_pic->stride_y, pred0 + bh / 2 * stride0, stride0, &esq[0][2]);
2491 0 : fn_ptr->vf(src_buf + bh / 2 * src_pic->stride_y + bw / 2, src_pic->stride_y, pred0 + bh / 2 * stride0 + bw / 2, stride0, &esq[0][3]);
2492 0 : fn_ptr->vf(src_buf, src_pic->stride_y, pred1, stride1, &esq[1][0]);
2493 0 : fn_ptr->vf(src_buf + bw / 2, src_pic->stride_y, pred1 + bw / 2, stride1, &esq[1][1]);
2494 0 : fn_ptr->vf(src_buf + bh / 2 * src_pic->stride_y, src_pic->stride_y, pred1 + bh / 2 * stride1, stride0, &esq[1][2]);
2495 0 : fn_ptr->vf(src_buf + bh / 2 * src_pic->stride_y + bw / 2, src_pic->stride_y, pred1 + bh / 2 * stride1 + bw / 2, stride0, &esq[1][3]);
2496 :
2497 0 : tl = ((int64_t)esq[0][0] + esq[0][1] + esq[0][2]) -
2498 0 : ((int64_t)esq[1][0] + esq[1][1] + esq[1][2]);
2499 0 : br = ((int64_t)esq[1][3] + esq[1][1] + esq[1][2]) -
2500 0 : ((int64_t)esq[0][3] + esq[0][1] + esq[0][2]);
2501 0 : return (tl + br > 0);
2502 : }
2503 : // Choose the best wedge index the specified sign
2504 : #if II_COMP_FLAG
2505 4453380 : int64_t pick_wedge_fixed_sign(
2506 : #else
2507 : static int64_t pick_wedge_fixed_sign(
2508 : #endif
2509 : #if II_COMP_FLAG
2510 : ModeDecisionCandidate *candidate_ptr,
2511 : #endif
2512 : PictureControlSet *picture_control_set_ptr,
2513 : ModeDecisionContext *context_ptr,
2514 : //const AV1_COMP *const cpi,
2515 : //const MACROBLOCK *const x,
2516 : const BlockSize bsize,
2517 : const int16_t *const residual1,
2518 : const int16_t *const diff10,
2519 : const int8_t wedge_sign,
2520 : int8_t *const best_wedge_index) {
2521 : //const MACROBLOCKD *const xd = &x->e_mbd;
2522 :
2523 4453380 : const int bw = block_size_wide[bsize];
2524 4453380 : const int bh = block_size_high[bsize];
2525 4453380 : const int N = bw * bh;
2526 4453380 : assert(N >= 64);
2527 : int rate;
2528 : int64_t dist;
2529 4453380 : int64_t rd, best_rd = INT64_MAX;
2530 : int8_t wedge_index;
2531 4453380 : int8_t wedge_types = (1 << get_wedge_bits_lookup(bsize));
2532 : const uint8_t *mask;
2533 : uint64_t sse;
2534 : //const int hbd = 0;// is_cur_buf_hbd(xd);
2535 4453370 : const int bd_round = 0;//hbd ? (xd->bd - 8) * 2 : 0;
2536 75587200 : for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
2537 71130600 : mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
2538 71123600 : sse = av1_wedge_sse_from_residuals(residual1, diff10, mask, N);
2539 71064600 : sse = ROUND_POWER_OF_TWO(sse, bd_round);
2540 :
2541 71064600 : model_rd_with_curvfit(picture_control_set_ptr,bsize, /*0,*/ sse, N, &rate, &dist, context_ptr->full_lambda);
2542 : // model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, sse, N, &rate, &dist);
2543 :
2544 : // rate += x->wedge_idx_cost[bsize][wedge_index];
2545 : #if II_COMP_FLAG
2546 71133800 : rate += candidate_ptr->md_rate_estimation_ptr->wedge_idx_fac_bits[bsize][wedge_index];
2547 : #endif
2548 71133800 : rd = RDCOST(/*x->rdmult*/context_ptr->full_lambda, rate, dist);
2549 :
2550 71133800 : if (rd < best_rd) {
2551 16137100 : *best_wedge_index = wedge_index;
2552 16137100 : best_rd = rd;
2553 : }
2554 : }
2555 4456550 : return best_rd ;//- RDCOST(x->rdmult, x->wedge_idx_cost[bsize][*best_wedge_index], 0);
2556 : }
2557 :
2558 11531000 : static void pick_interinter_wedge(
2559 : ModeDecisionCandidate *candidate_ptr,
2560 : PictureControlSet *picture_control_set_ptr,
2561 : ModeDecisionContext *context_ptr,
2562 : InterInterCompoundData *interinter_comp,
2563 : const BlockSize bsize,
2564 : const uint8_t *const p0,
2565 : const uint8_t *const p1,
2566 : const int16_t *const residual1,
2567 : const int16_t *const diff10)
2568 : {
2569 : (void)candidate_ptr;
2570 11531000 : const int bw = block_size_wide[bsize];
2571 : //int64_t rd;
2572 11531000 : int8_t wedge_index = -1;
2573 11531000 : int8_t wedge_sign = 0;
2574 :
2575 11531000 : assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
2576 : //TODO: OMK+CHKN to check on FIX_RATE_E_WEDGE
2577 :
2578 : // Two method
2579 : // Fast seatch method to be added OMK
2580 11530800 : if (picture_control_set_ptr->parent_pcs_ptr->wedge_mode == 2 || picture_control_set_ptr->parent_pcs_ptr->wedge_mode == 3) {
2581 0 : wedge_sign = estimate_wedge_sign(picture_control_set_ptr, context_ptr, bsize, p0, bw, p1, bw);
2582 : }
2583 : else {
2584 11531000 : pick_wedge(picture_control_set_ptr, context_ptr,
2585 : bsize, p0, residual1, diff10, &wedge_sign,
2586 : &wedge_index);
2587 : }
2588 :
2589 11529500 : interinter_comp->wedge_sign = wedge_sign;
2590 11529500 : interinter_comp->wedge_index = wedge_index;
2591 :
2592 11529500 : }
2593 :
2594 19870400 : static void pick_interinter_seg(
2595 : PictureControlSet *picture_control_set_ptr,
2596 : ModeDecisionContext *context_ptr,
2597 : InterInterCompoundData *interinter_comp,
2598 : const BlockSize bsize,
2599 : const uint8_t *const p0,
2600 : const uint8_t *const p1,
2601 : const int16_t *const residual1,
2602 : const int16_t *const diff10)
2603 : {
2604 19870400 : const int bw = block_size_wide[bsize];
2605 19870400 : const int bh = block_size_high[bsize];
2606 19870400 : const int N = 1 << num_pels_log2_lookup[bsize];
2607 : int rate;
2608 : int64_t dist;
2609 : DIFFWTD_MASK_TYPE cur_mask_type;
2610 19870400 : int64_t best_rd = INT64_MAX;
2611 19870400 : DIFFWTD_MASK_TYPE best_mask_type = 0;
2612 : DECLARE_ALIGNED(16, uint8_t, seg_mask0[2 * MAX_SB_SQUARE]);
2613 : DECLARE_ALIGNED(16, uint8_t, seg_mask1[2 * MAX_SB_SQUARE]);
2614 19870400 : uint8_t *tmp_mask[2] = { seg_mask0, seg_mask1 };
2615 :
2616 : // try each mask type and its inverse
2617 59594000 : for (cur_mask_type = 0; cur_mask_type < DIFFWTD_MASK_TYPES; cur_mask_type++) {
2618 :
2619 : // build mask and inverse
2620 39718900 : av1_build_compound_diffwtd_mask(tmp_mask[cur_mask_type], cur_mask_type,
2621 : p0, bw, p1, bw, bh, bw);
2622 : // compute rd for mask
2623 39723700 : uint64_t sse = av1_wedge_sse_from_residuals(residual1, diff10, tmp_mask[cur_mask_type], N);
2624 :
2625 39706300 : sse = ROUND_POWER_OF_TWO(sse, 0 );
2626 :
2627 39706300 : model_rd_with_curvfit(picture_control_set_ptr, bsize, sse, N, &rate, &dist, context_ptr->full_lambda);
2628 :
2629 39723600 : const int64_t rd0 = RDCOST(context_ptr->full_lambda , rate, dist);
2630 :
2631 39723600 : if (rd0 < best_rd) {
2632 28870000 : best_mask_type = cur_mask_type;
2633 28870000 : best_rd = rd0;
2634 : }
2635 : }
2636 :
2637 19875200 : interinter_comp->mask_type = best_mask_type;
2638 :
2639 19875200 : }
2640 :
2641 31397900 : void pick_interinter_mask(
2642 : ModeDecisionCandidate *candidate_ptr,
2643 : PictureControlSet *picture_control_set_ptr,
2644 : ModeDecisionContext *context_ptr,
2645 : InterInterCompoundData *interinter_comp,
2646 : const BlockSize bsize,
2647 : const uint8_t *const p0,
2648 : const uint8_t *const p1,
2649 : const int16_t *const residual1,
2650 : const int16_t *const diff10)
2651 : {
2652 :
2653 31397900 : if (interinter_comp->type == COMPOUND_WEDGE)
2654 11531000 : pick_interinter_wedge(candidate_ptr, picture_control_set_ptr, context_ptr, interinter_comp, bsize, p0, p1, residual1, diff10);
2655 19866900 : else if (interinter_comp->type == COMPOUND_DIFFWTD)
2656 19876100 : pick_interinter_seg(picture_control_set_ptr, context_ptr, interinter_comp, bsize, p0, p1, residual1, diff10);
2657 : else
2658 0 : assert(0);
2659 :
2660 31401700 : }
2661 :
2662 31390300 : void search_compound_diff_wedge(
2663 : PictureControlSet *picture_control_set_ptr,
2664 : ModeDecisionContext *context_ptr,
2665 : ModeDecisionCandidate *candidate_ptr)
2666 : {
2667 :
2668 : //if (*calc_pred_masked_compound)
2669 : {
2670 31390300 : EbPictureBufferDesc *src_pic = picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
2671 31390300 : uint8_t *src_buf = src_pic->buffer_y + (context_ptr->cu_origin_x + src_pic->origin_x) + (context_ptr->cu_origin_y + src_pic->origin_y) * src_pic->stride_y;
2672 :
2673 31390300 : uint32_t bwidth = context_ptr->blk_geom->bwidth;
2674 31390300 : uint32_t bheight = context_ptr->blk_geom->bheight;
2675 : EbPictureBufferDesc pred_desc;
2676 31390300 : pred_desc.origin_x = pred_desc.origin_y = 0;
2677 31390300 : pred_desc.stride_y = bwidth;
2678 :
2679 31390300 : SequenceControlSet* sequence_control_set_ptr = ((SequenceControlSet*)(picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr));
2680 : EbPictureBufferDesc *ref_pic_list0;
2681 31390300 : EbPictureBufferDesc *ref_pic_list1 = NULL;
2682 : Mv mv_0;
2683 : Mv mv_1;
2684 31390300 : mv_0.x = candidate_ptr->motion_vector_xl0;
2685 31390300 : mv_0.y = candidate_ptr->motion_vector_yl0;
2686 31390300 : mv_1.x = candidate_ptr->motion_vector_xl1;
2687 31390300 : mv_1.y = candidate_ptr->motion_vector_yl1;
2688 : MvUnit mv_unit;
2689 31390300 : mv_unit.mv[0] = mv_0;
2690 31390300 : mv_unit.mv[1] = mv_1;
2691 31390300 : int8_t ref_idx_l0 = candidate_ptr->ref_frame_index_l0;
2692 31390300 : int8_t ref_idx_l1 = candidate_ptr->ref_frame_index_l1;
2693 : MvReferenceFrame rf[2];
2694 31390300 : av1_set_ref_frame(rf, candidate_ptr->ref_frame_type);
2695 : uint8_t list_idx0, list_idx1;
2696 31403900 : list_idx0 = get_list_idx(rf[0]);
2697 31400500 : if (rf[1] == NONE_FRAME)
2698 0 : list_idx1 = get_list_idx(rf[0]);
2699 : else
2700 31400500 : list_idx1 = get_list_idx(rf[1]);
2701 31400000 : assert(list_idx0 < MAX_NUM_OF_REF_PIC_LIST);
2702 31400000 : assert(list_idx1 < MAX_NUM_OF_REF_PIC_LIST);
2703 31400000 : if (ref_idx_l0 >= 0)
2704 31400300 : ref_pic_list0 = ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[list_idx0][ref_idx_l0]->object_ptr)->reference_picture;
2705 : else
2706 0 : ref_pic_list0 = (EbPictureBufferDesc*)EB_NULL;
2707 31400000 : if (ref_idx_l1 >= 0)
2708 31400600 : ref_pic_list1 = ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[list_idx1][ref_idx_l1]->object_ptr)->reference_picture;
2709 : else
2710 0 : ref_pic_list1 = (EbPictureBufferDesc*)EB_NULL;
2711 :
2712 : //CHKN get seperate prediction of each ref(Luma only)
2713 : //ref0 prediction
2714 31400000 : mv_unit.pred_direction = UNI_PRED_LIST_0;
2715 31400000 : pred_desc.buffer_y = context_ptr->pred0;
2716 :
2717 : //we call the regular inter prediction path here(no compound)
2718 31400000 : av1_inter_prediction_function_table[context_ptr->hbd_mode_decision > EB_8_BIT_MD](
2719 : picture_control_set_ptr,
2720 : 0,//fixed interpolation filter for compound search
2721 : context_ptr->cu_ptr,
2722 31400000 : candidate_ptr->ref_frame_type,
2723 : &mv_unit,
2724 : 0,//use_intrabc,
2725 : #if OBMC_FLAG
2726 : SIMPLE_TRANSLATION,
2727 : 0,
2728 : 0,
2729 : #endif
2730 : 1,//compound_idx not used
2731 : NULL,// interinter_comp not used
2732 : #if II_COMP_FLAG
2733 : NULL,
2734 : NULL,
2735 : NULL,
2736 : NULL,
2737 : 0,
2738 : 0,
2739 : 0,
2740 : 0,
2741 : #endif
2742 31400000 : context_ptr->cu_origin_x,
2743 31400000 : context_ptr->cu_origin_y,
2744 : bwidth,
2745 : bheight,
2746 : ref_pic_list0,
2747 : ref_pic_list1,
2748 : &pred_desc, //output
2749 : 0, //output origin_x,
2750 : 0, //output origin_y,
2751 : 0,//do chroma
2752 31400000 : (uint8_t)sequence_control_set_ptr->static_config.encoder_bit_depth);
2753 :
2754 : //ref1 prediction
2755 31378500 : mv_unit.pred_direction = UNI_PRED_LIST_1;
2756 31378500 : pred_desc.buffer_y = context_ptr->pred1;
2757 :
2758 : //we call the regular inter prediction path here(no compound)
2759 31378500 : av1_inter_prediction_function_table[context_ptr->hbd_mode_decision > EB_8_BIT_MD](
2760 : picture_control_set_ptr,
2761 : 0,//fixed interpolation filter for compound search
2762 : context_ptr->cu_ptr,
2763 31378500 : candidate_ptr->ref_frame_type,
2764 : &mv_unit,
2765 : 0,//use_intrabc,
2766 : #if OBMC_FLAG
2767 : SIMPLE_TRANSLATION,
2768 : 0,
2769 : 0,
2770 : #endif
2771 : 1,//compound_idx not used
2772 : NULL,// interinter_comp not used
2773 : #if II_COMP_FLAG
2774 : NULL,
2775 : NULL,
2776 : NULL,
2777 : NULL,
2778 : 0,
2779 : 0,
2780 : 0,
2781 : 0,
2782 : #endif
2783 31378500 : context_ptr->cu_origin_x,
2784 31378500 : context_ptr->cu_origin_y,
2785 : bwidth,
2786 : bheight,
2787 : ref_pic_list0,
2788 : ref_pic_list1,
2789 : &pred_desc, //output
2790 : 0, //output origin_x,
2791 : 0, //output origin_y,
2792 : 0,//do chroma
2793 31378500 : (uint8_t)sequence_control_set_ptr->static_config.encoder_bit_depth);
2794 :
2795 31377100 : aom_subtract_block(bheight, bwidth, context_ptr->residual1, bwidth, src_buf, src_pic->stride_y, context_ptr->pred1, bwidth);
2796 31389400 : aom_subtract_block(bheight, bwidth, context_ptr->diff10, bwidth, context_ptr->pred1, bwidth, context_ptr->pred0, bwidth);
2797 :
2798 : //*calc_pred_masked_compound = 0;
2799 31395900 : if (picture_control_set_ptr->parent_pcs_ptr->wedge_mode == 1 || picture_control_set_ptr->parent_pcs_ptr->wedge_mode == 3)
2800 0 : if (candidate_ptr->interinter_comp.type == COMPOUND_DIFFWTD && context_ptr->variance_ready == 0) {
2801 0 : const aom_variance_fn_ptr_t *fn_ptr = &mefn_ptr[context_ptr->blk_geom->bsize];
2802 :
2803 : unsigned int sse;
2804 0 : (void)fn_ptr->vf(context_ptr->pred0, bwidth, context_ptr->pred1, pred_desc.stride_y, &sse);
2805 :
2806 0 : context_ptr->prediction_mse = ROUND_POWER_OF_TWO(sse, num_pels_log2_lookup[context_ptr->blk_geom->bsize]);
2807 0 : context_ptr->variance_ready = 1;
2808 : }
2809 :
2810 : }
2811 31395900 : pick_interinter_mask(
2812 : candidate_ptr,
2813 : picture_control_set_ptr,
2814 : context_ptr,
2815 : &candidate_ptr->interinter_comp,
2816 31395900 : context_ptr->blk_geom->bsize,
2817 31395900 : context_ptr->pred0,
2818 31395900 : context_ptr->pred1,
2819 31395900 : context_ptr->residual1,
2820 31395900 : context_ptr->diff10);
2821 31386300 : }
2822 :
2823 0 : int64_t aom_sse_c(const uint8_t *a, int a_stride, const uint8_t *b,
2824 : int b_stride, int width, int height) {
2825 : int y, x;
2826 0 : int64_t sse = 0;
2827 :
2828 0 : for (y = 0; y < height; y++) {
2829 0 : for (x = 0; x < width; x++) {
2830 0 : const int32_t diff = abs(a[x] - b[x]);
2831 0 : sse += diff * diff;
2832 : }
2833 :
2834 0 : a += a_stride;
2835 0 : b += b_stride;
2836 : }
2837 0 : return sse;
2838 : }
2839 :
2840 : #if II_COMP_FLAG
2841 17810900 : void model_rd_for_sb_with_curvfit(
2842 : #else
2843 : static void model_rd_for_sb_with_curvfit(
2844 : #endif
2845 : PictureControlSet *picture_control_set_ptr,
2846 : ModeDecisionContext *context_ptr,
2847 : BlockSize bsize, int bw, int bh,
2848 : uint8_t* src_buf, uint32_t src_stride, uint8_t* pred_buf, uint32_t pred_stride,
2849 : int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
2850 : int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
2851 : int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) {
2852 : (void)mi_row;
2853 : (void)mi_col;
2854 : // Note our transform coeffs are 8 times an orthogonal transform.
2855 : // Hence quantizer step is also 8 times. To get effective quantizer
2856 : // we need to divide by 8 before sending to modeling function.
2857 :
2858 17810900 : int64_t rate_sum = 0;
2859 17810900 : int64_t dist_sum = 0;
2860 17810900 : int64_t total_sse = 0;
2861 :
2862 35622000 : for (int plane = plane_from; plane <= plane_to; ++plane) {
2863 17810800 : int32_t subsampling = plane == 0 ? 0 : 1;
2864 : const BlockSize plane_bsize =
2865 17810800 : get_plane_block_size(bsize, subsampling, subsampling);
2866 : int64_t dist, sse;
2867 : int rate;
2868 17809700 : const int shift = 0;
2869 17809700 : sse = aom_sse(src_buf, src_stride, pred_buf, pred_stride, bw, bh);
2870 17810400 : sse = ROUND_POWER_OF_TWO(sse, shift * 2);
2871 17810400 : model_rd_with_curvfit(picture_control_set_ptr , plane_bsize, sse, bw * bh, &rate, &dist, context_ptr->full_lambda);
2872 :
2873 17811100 : total_sse += sse;
2874 17811100 : rate_sum += rate;
2875 17811100 : dist_sum += dist;
2876 :
2877 17811100 : if (plane_rate) plane_rate[plane] = rate;
2878 17811100 : if (plane_sse) plane_sse[plane] = sse;
2879 17811100 : if (plane_dist) plane_dist[plane] = dist;
2880 : }
2881 :
2882 17811300 : if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0;
2883 17811300 : if (skip_sse_sb) *skip_sse_sb = total_sse << 4;
2884 17811300 : *out_rate_sum = (int)rate_sum;
2885 17811300 : *out_dist_sum = dist_sum;
2886 17811300 : }
2887 :
2888 : int get_comp_index_context_enc(
2889 : PictureParentControlSet *pcs_ptr,
2890 : int cur_frame_index,
2891 : int bck_frame_index,
2892 : int fwd_frame_index,
2893 : const MacroBlockD *xd);
2894 0 : void search_compound_avg_dist(
2895 : PictureControlSet *picture_control_set_ptr,
2896 : ModeDecisionContext *context_ptr,
2897 : ModeDecisionCandidate *candidate_ptr)
2898 : {
2899 : int64_t est_rd[2];
2900 :
2901 0 : MbModeInfo *const mbmi = &context_ptr->cu_ptr->av1xd->mi[0]->mbmi;
2902 : MvReferenceFrame rf[2];
2903 0 : av1_set_ref_frame(rf, candidate_ptr->ref_frame_type);
2904 0 : mbmi->block_mi.ref_frame[0] = rf[0];
2905 0 : mbmi->block_mi.ref_frame[1] = rf[1];
2906 0 : const int comp_index_ctx = get_comp_index_context_enc(
2907 0 : picture_control_set_ptr->parent_pcs_ptr,
2908 0 : picture_control_set_ptr->parent_pcs_ptr->cur_order_hint,
2909 0 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[0] - 1],
2910 0 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[1] - 1],
2911 0 : context_ptr->cu_ptr->av1xd);
2912 :
2913 : //COMPOUND AVERAGE
2914 : COMPOUND_TYPE comp_i;
2915 :
2916 0 : for (comp_i = COMPOUND_AVERAGE; comp_i <= COMPOUND_DISTWTD; comp_i++)
2917 : {
2918 : //assign compound type temporary for RD test
2919 0 : candidate_ptr->interinter_comp.type = comp_i;
2920 0 : candidate_ptr->comp_group_idx = 0;
2921 0 : candidate_ptr->compound_idx = (comp_i == COMPOUND_AVERAGE) ? 1 : 0;
2922 :
2923 0 : EbPictureBufferDesc *src_pic = picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
2924 0 : uint8_t *src_buf = src_pic->buffer_y + (context_ptr->cu_origin_x + src_pic->origin_x) + (context_ptr->cu_origin_y + src_pic->origin_y) * src_pic->stride_y;
2925 :
2926 0 : uint32_t bwidth = context_ptr->blk_geom->bwidth;
2927 0 : uint32_t bheight = context_ptr->blk_geom->bheight;
2928 : EbPictureBufferDesc pred_desc;
2929 0 : pred_desc.origin_x = pred_desc.origin_y = 0;
2930 0 : pred_desc.stride_y = bwidth;
2931 0 : pred_desc.buffer_y = context_ptr->pred0;
2932 :
2933 0 : SequenceControlSet* sequence_control_set_ptr = ((SequenceControlSet*)(picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr));
2934 : EbPictureBufferDesc *ref_pic_list0;
2935 0 : EbPictureBufferDesc *ref_pic_list1 = NULL;
2936 : Mv mv_0;
2937 : Mv mv_1;
2938 0 : mv_0.x = candidate_ptr->motion_vector_xl0;
2939 0 : mv_0.y = candidate_ptr->motion_vector_yl0;
2940 0 : mv_1.x = candidate_ptr->motion_vector_xl1;
2941 0 : mv_1.y = candidate_ptr->motion_vector_yl1;
2942 : MvUnit mv_unit;
2943 0 : mv_unit.mv[0] = mv_0;
2944 0 : mv_unit.mv[1] = mv_1;
2945 0 : mv_unit.pred_direction = BI_PRED;
2946 0 : int8_t ref_idx_l0 = candidate_ptr->ref_frame_index_l0;
2947 0 : int8_t ref_idx_l1 = candidate_ptr->ref_frame_index_l1;
2948 : MvReferenceFrame rf[2];
2949 0 : av1_set_ref_frame(rf, candidate_ptr->ref_frame_type);
2950 : uint8_t list_idx0, list_idx1;
2951 0 : list_idx0 = get_list_idx(rf[0]);
2952 0 : if (rf[1] == NONE_FRAME)
2953 0 : list_idx1 = get_list_idx(rf[0]);
2954 : else
2955 0 : list_idx1 = get_list_idx(rf[1]);
2956 0 : assert(list_idx0 < MAX_NUM_OF_REF_PIC_LIST);
2957 0 : assert(list_idx1 < MAX_NUM_OF_REF_PIC_LIST);
2958 0 : if (ref_idx_l0 >= 0)
2959 0 : ref_pic_list0 = ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[list_idx0][ref_idx_l0]->object_ptr)->reference_picture;
2960 : else
2961 0 : ref_pic_list0 = (EbPictureBufferDesc*)EB_NULL;
2962 0 : if (ref_idx_l1 >= 0)
2963 0 : ref_pic_list1 = ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[list_idx1][ref_idx_l1]->object_ptr)->reference_picture;
2964 : else
2965 0 : ref_pic_list1 = (EbPictureBufferDesc*)EB_NULL;
2966 :
2967 :
2968 0 : av1_inter_prediction_function_table[context_ptr->hbd_mode_decision > EB_8_BIT_MD](
2969 : picture_control_set_ptr,
2970 : 0,//fixed interpolation filter for compound search
2971 : context_ptr->cu_ptr,
2972 0 : candidate_ptr->ref_frame_type,
2973 : &mv_unit,
2974 : 0,//use_intrabc,
2975 : #if OBMC_FLAG
2976 : SIMPLE_TRANSLATION,
2977 : 0,
2978 : 0,
2979 : #endif
2980 0 : candidate_ptr->compound_idx,
2981 : &candidate_ptr->interinter_comp,
2982 : #if II_COMP_FLAG
2983 : NULL,
2984 : NULL,
2985 : NULL,
2986 : NULL,
2987 : 0,
2988 : 0,
2989 : 0,
2990 : 0,
2991 : #endif
2992 0 : context_ptr->cu_origin_x,
2993 0 : context_ptr->cu_origin_y,
2994 : bwidth,
2995 : bheight,
2996 : ref_pic_list0,
2997 : ref_pic_list1,
2998 : &pred_desc, //output
2999 : 0, //output origin_x,
3000 : 0, //output origin_y,
3001 : 0,//do chroma
3002 0 : (uint8_t)sequence_control_set_ptr->static_config.encoder_bit_depth);
3003 :
3004 : int32_t est_rate;
3005 : int64_t est_dist;
3006 :
3007 0 : model_rd_for_sb_with_curvfit(picture_control_set_ptr , context_ptr, context_ptr->blk_geom->bsize, bwidth, bheight,
3008 0 : src_buf, src_pic->stride_y, pred_desc.buffer_y, pred_desc.stride_y,
3009 : 0, 0, 0, 0, &est_rate,
3010 : &est_dist, NULL, NULL, NULL, NULL, NULL);
3011 :
3012 0 : est_rate += candidate_ptr->md_rate_estimation_ptr->comp_idx_fac_bits[comp_index_ctx][candidate_ptr->compound_idx];
3013 :
3014 0 : est_rd[comp_i] =
3015 0 : RDCOST(context_ptr->full_lambda , est_rate, est_dist);
3016 : }
3017 :
3018 : //assign the best compound type
3019 0 : if (est_rd[COMPOUND_AVERAGE] <= est_rd[COMPOUND_DISTWTD]) {
3020 0 : candidate_ptr->interinter_comp.type = COMPOUND_AVERAGE;
3021 0 : candidate_ptr->comp_group_idx = 0;
3022 0 : candidate_ptr->compound_idx = 1;
3023 : }
3024 : else {
3025 0 : candidate_ptr->interinter_comp.type = COMPOUND_DISTWTD;
3026 0 : candidate_ptr->comp_group_idx = 0;
3027 0 : candidate_ptr->compound_idx = 0;
3028 : }
3029 :
3030 0 : }
3031 :
3032 : #if II_COMP_FLAG
3033 29671400 : void combine_interintra(INTERINTRA_MODE mode,
3034 : int8_t use_wedge_interintra, int wedge_index,
3035 : int wedge_sign, BlockSize bsize,
3036 : BlockSize plane_bsize, uint8_t *comppred,
3037 : int compstride, const uint8_t *interpred,
3038 : int interstride, const uint8_t *intrapred,
3039 : int intrastride)
3040 : {
3041 29671400 : const int bw = block_size_wide[plane_bsize];
3042 29671400 : const int bh = block_size_high[plane_bsize];
3043 :
3044 29671400 : if (use_wedge_interintra) {
3045 5566590 : if (is_interintra_wedge_used(bsize)) {
3046 : const uint8_t *mask =
3047 5566560 : av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
3048 5566550 : const int subw = 2 * mi_size_wide[bsize] == bw;
3049 5566550 : const int subh = 2 * mi_size_high[bsize] == bh;
3050 5566550 : aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
3051 5566550 : interpred, interstride, mask, block_size_wide[bsize],
3052 : bw, bh, subw, subh);
3053 : }
3054 5566740 : return;
3055 : }
3056 : else {
3057 : uint8_t mask[MAX_SB_SQUARE];
3058 24104800 : build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
3059 24111400 : aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
3060 : interstride, mask, bw, bw, bh, 0, 0);
3061 : }
3062 : }
3063 : #endif
3064 : #if II_COMP_FLAG
3065 : extern void eb_av1_predict_intra_block(
3066 : TileInfo * tile,
3067 : STAGE stage,
3068 : const BlockGeom * blk_geom,
3069 : const Av1Common *cm,
3070 : int32_t wpx,
3071 : int32_t hpx,
3072 : TxSize tx_size,
3073 : PredictionMode mode,
3074 : int32_t angle_delta,
3075 : int32_t use_palette,
3076 : #if PAL_SUP
3077 : PaletteInfo *palette_info,
3078 : #endif
3079 : FilterIntraMode filter_intra_mode,
3080 : uint8_t* topNeighArray,
3081 : uint8_t* leftNeighArray,
3082 : EbPictureBufferDesc *recon_buffer,
3083 : int32_t col_off,
3084 : int32_t row_off,
3085 : int32_t plane,
3086 : BlockSize bsize,
3087 : uint32_t tu_org_x_pict,
3088 : uint32_t tu_org_y_pict,
3089 : uint32_t bl_org_x_pict,
3090 : uint32_t bl_org_y_pict,
3091 : uint32_t bl_org_x_mb,
3092 : uint32_t bl_org_y_mb);
3093 : #if INTER_INTRA_HBD
3094 : extern void eb_av1_predict_intra_block_16bit(
3095 : TileInfo * tile,
3096 : STAGE stage,
3097 : const BlockGeom * blk_geom,
3098 : const Av1Common *cm,
3099 : int32_t wpx,
3100 : int32_t hpx,
3101 : TxSize tx_size,
3102 : PredictionMode mode,
3103 : int32_t angle_delta,
3104 : int32_t use_palette,
3105 : #if PAL_SUP
3106 : PaletteInfo *palette_info,
3107 : #endif
3108 : FilterIntraMode filter_intra_mode,
3109 : uint16_t* topNeighArray,
3110 : uint16_t* leftNeighArray,
3111 : EbPictureBufferDesc *recon_buffer,
3112 : int32_t col_off,
3113 : int32_t row_off,
3114 : int32_t plane,
3115 : BlockSize bsize,
3116 : uint32_t tu_org_x_pict,
3117 : uint32_t tu_org_y_pict,
3118 : uint32_t bl_org_x_pict,
3119 : uint32_t bl_org_y_pict,
3120 : uint32_t bl_org_x_mb,
3121 : uint32_t bl_org_y_mb);
3122 : #endif
3123 : #define INTERINTRA_WEDGE_SIGN 0
3124 : #endif
3125 : #if OBMC_FLAG
3126 :
3127 : struct build_prediction_hbd_ctxt {
3128 : const AV1_COMMON *cm;
3129 : int mi_row;
3130 : int mi_col;
3131 : uint16_t **tmp_buf;
3132 : int *tmp_width;
3133 : int *tmp_height;
3134 : int *tmp_stride;
3135 : int mb_to_far_edge;
3136 :
3137 : PictureControlSet *picture_control_set_ptr;
3138 : MvUnit mv_unit ;
3139 : uint16_t pu_origin_x ;
3140 : uint16_t pu_origin_y ;
3141 : EbPictureBufferDesc *ref_pic_list0 ;
3142 : EbPictureBufferDesc prediction_ptr ;
3143 : uint16_t dst_origin_x ;
3144 : uint16_t dst_origin_y ;
3145 : EbBool perform_chroma ;
3146 :
3147 :
3148 : };
3149 :
3150 : struct build_prediction_ctxt {
3151 : const AV1_COMMON *cm;
3152 : int mi_row;
3153 : int mi_col;
3154 : uint8_t **tmp_buf;
3155 : int *tmp_width;
3156 : int *tmp_height;
3157 : int *tmp_stride;
3158 : int mb_to_far_edge;
3159 :
3160 : PictureControlSet *picture_control_set_ptr;
3161 : MvUnit mv_unit ;
3162 : uint16_t pu_origin_x ;
3163 : uint16_t pu_origin_y ;
3164 : EbPictureBufferDesc *ref_pic_list0 ;
3165 : EbPictureBufferDesc prediction_ptr ;
3166 : uint16_t dst_origin_x ;
3167 : uint16_t dst_origin_y ;
3168 : EbBool perform_chroma ;
3169 :
3170 :
3171 : };
3172 : // input: log2 of length, 0(4), 1(8), ...
3173 : static const int max_neighbor_obmc[6] = { 0, 1, 2, 3, 4, 4 };
3174 :
3175 :
3176 : typedef void(*overlappable_nb_visitor_t)(
3177 : uint8_t is16bit,
3178 : MacroBlockD *xd,
3179 : int rel_mi_pos,
3180 : uint8_t nb_mi_size,
3181 : MbModeInfo *nb_mi,
3182 : void *fun_ctxt,
3183 : const int num_planes);
3184 :
3185 13860500 : static INLINE void foreach_overlappable_nb_above(
3186 : uint8_t is16bit ,
3187 : const AV1_COMMON *cm,
3188 : MacroBlockD *xd,
3189 : int mi_col,
3190 : int nb_max,
3191 : overlappable_nb_visitor_t fun,
3192 : void *fun_ctxt) {
3193 13860500 : const int num_planes = 2;
3194 13860500 : if (!xd->up_available) return;
3195 :
3196 13141500 : int nb_count = 0;
3197 :
3198 : // prev_row_mi points into the mi array, starting at the beginning of the
3199 : // previous row.
3200 13141500 : ModeInfo **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
3201 13141500 : const int end_col = AOMMIN(mi_col + xd->n4_w, cm->mi_cols);
3202 : uint8_t mi_step;
3203 26891100 : for (int above_mi_col = mi_col; above_mi_col < end_col && nb_count < nb_max;
3204 13749600 : above_mi_col += mi_step) {
3205 13750500 : ModeInfo /*MbModeInfo*/ **above_mi = prev_row_mi + above_mi_col;
3206 13750500 : mi_step =
3207 13750500 : AOMMIN(mi_size_wide[above_mi[0]->mbmi.block_mi.sb_type], mi_size_wide[BLOCK_64X64]);
3208 : // If we're considering a block with width 4, it should be treated as
3209 : // half of a pair of blocks with chroma information in the second. Move
3210 : // above_mi_col back to the start of the pair if needed, set above_mbmi
3211 : // to point at the block with chroma information, and set mi_step to 2 to
3212 : // step over the entire pair at the end of the iteration.
3213 13750500 : if (mi_step == 1) {
3214 34256 : above_mi_col &= ~1;
3215 34256 : above_mi = prev_row_mi + above_mi_col + 1;
3216 34256 : mi_step = 2;
3217 : }
3218 13750500 : if (is_neighbor_overlappable( &(*above_mi)->mbmi)) {
3219 13715000 : ++nb_count;
3220 :
3221 13715000 : fun(
3222 : is16bit,
3223 : xd,
3224 : above_mi_col - mi_col,
3225 13715000 : AOMMIN(xd->n4_w, mi_step),
3226 13715000 : &(*above_mi)->mbmi ,
3227 : fun_ctxt,
3228 : num_planes);
3229 : }
3230 : }
3231 : }
3232 :
3233 13868800 : static INLINE void foreach_overlappable_nb_left(
3234 : uint8_t is16bit ,
3235 : const AV1_COMMON *cm,
3236 : MacroBlockD *xd,
3237 : int mi_row,
3238 : int nb_max,
3239 : overlappable_nb_visitor_t fun,
3240 : void *fun_ctxt) {
3241 13868800 : const int num_planes = 2;
3242 13868800 : if (!xd->left_available) return;
3243 :
3244 13316500 : int nb_count = 0;
3245 :
3246 : // prev_col_mi points into the mi array, starting at the top of the
3247 : // previous column
3248 :
3249 13316500 : ModeInfo **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
3250 13316500 : const int end_row = AOMMIN(mi_row + xd->n4_h, cm->mi_rows);
3251 : uint8_t mi_step;
3252 27783500 : for (int left_mi_row = mi_row; left_mi_row < end_row && nb_count < nb_max;
3253 14467100 : left_mi_row += mi_step) {
3254 14467900 : ModeInfo **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
3255 14467900 : mi_step =
3256 14467900 : AOMMIN(mi_size_high[left_mi[0]->mbmi.block_mi.sb_type], mi_size_high[BLOCK_64X64]);
3257 14467900 : if (mi_step == 1) {
3258 34122 : left_mi_row &= ~1;
3259 34122 : left_mi = prev_col_mi + (left_mi_row + 1) * xd->mi_stride;
3260 34122 : mi_step = 2;
3261 : }
3262 14467900 : if (is_neighbor_overlappable( &(*left_mi)->mbmi)) {
3263 14429100 : ++nb_count;
3264 :
3265 14429100 : fun(
3266 : is16bit,
3267 : xd,
3268 : left_mi_row - mi_row,
3269 14429100 : AOMMIN(xd->n4_h, mi_step),
3270 14429100 : &(*left_mi)->mbmi ,
3271 : fun_ctxt,
3272 : num_planes);
3273 : }
3274 : }
3275 : }
3276 : // HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
3277 : // block-size of current plane is smaller than 8x8, always only blend with the
3278 : // left neighbor(s) (skip blending with the above side).
3279 : #define DISABLE_CHROMA_U8X8_OBMC 0 // 0: one-sided obmc; 1: disable
3280 :
3281 29563700 : int av1_skip_u4x4_pred_in_obmc(BlockSize bsize,
3282 : int dir, int subsampling_x, int subsampling_y) {
3283 29563700 : assert(is_motion_variation_allowed_bsize(bsize));
3284 :
3285 : const BlockSize bsize_plane =
3286 29562400 : get_plane_block_size(bsize,subsampling_x,subsampling_y);
3287 29559900 : switch (bsize_plane) {
3288 : #if DISABLE_CHROMA_U8X8_OBMC
3289 : case BLOCK_4X4:
3290 : case BLOCK_8X4:
3291 : case BLOCK_4X8: return 1; break;
3292 : #else
3293 1039730 : case BLOCK_4X4:
3294 : case BLOCK_8X4:
3295 1039730 : case BLOCK_4X8: return dir == 0; break;
3296 : #endif
3297 28520200 : default: return 0;
3298 : }
3299 : }
3300 :
3301 396686 : void av1_setup_build_prediction_by_above_pred(
3302 : MacroBlockD *xd, int rel_mi_col, uint8_t above_mi_width,
3303 : MbModeInfo *above_mbmi, struct build_prediction_ctxt *ctxt,
3304 : const int num_planes,uint8_t is16bit)
3305 : {
3306 : (void)num_planes;
3307 396686 : const int above_mi_col = ctxt->mi_col + rel_mi_col;
3308 :
3309 : //use above mbmi to set up the reference object from where to read
3310 :
3311 396686 : ctxt->mv_unit.mv[0].x = above_mbmi->block_mi.mv[0].as_mv.col;
3312 396686 : ctxt->mv_unit.mv[0].y = above_mbmi->block_mi.mv[0].as_mv.row;
3313 396686 : ctxt->mv_unit.pred_direction = UNI_PRED_LIST_0;
3314 :
3315 396686 : uint8_t ref_idx_l0 = get_ref_frame_idx(above_mbmi->block_mi.ref_frame[0]);
3316 396682 : uint8_t list_idx0 = get_list_idx(above_mbmi->block_mi.ref_frame[0]);
3317 :
3318 396687 : if (is16bit)
3319 0 : ctxt->ref_pic_list0 = ((EbReferenceObject*)ctxt->picture_control_set_ptr->ref_pic_ptr_array[list_idx0][ref_idx_l0]->object_ptr)->reference_picture16bit;
3320 : else
3321 396687 : ctxt->ref_pic_list0 = ((EbReferenceObject*)ctxt->picture_control_set_ptr->ref_pic_ptr_array[list_idx0][ref_idx_l0]->object_ptr)->reference_picture;
3322 :
3323 396687 : xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
3324 396687 : xd->mb_to_right_edge = ctxt->mb_to_far_edge +
3325 396687 : (xd->n4_w - rel_mi_col - above_mi_width) * MI_SIZE * 8;
3326 396687 : }
3327 417268 : void av1_setup_build_prediction_by_left_pred(MacroBlockD *xd, int rel_mi_row,
3328 : uint8_t left_mi_height,
3329 : MbModeInfo *left_mbmi,
3330 : struct build_prediction_ctxt *ctxt,
3331 : const int num_planes,uint8_t is16bit)
3332 : {
3333 : (void)num_planes;
3334 417268 : const int left_mi_row = ctxt->mi_row + rel_mi_row;
3335 :
3336 417268 : ctxt->mv_unit.mv[0].x = left_mbmi->block_mi.mv[0].as_mv.col;
3337 417268 : ctxt->mv_unit.mv[0].y = left_mbmi->block_mi.mv[0].as_mv.row;
3338 417268 : ctxt->mv_unit.pred_direction = UNI_PRED_LIST_0;
3339 :
3340 :
3341 417268 : uint8_t ref_idx_l0 = get_ref_frame_idx(left_mbmi->block_mi.ref_frame[0]);
3342 417267 : uint8_t list_idx0 = get_list_idx(left_mbmi->block_mi.ref_frame[0]);
3343 :
3344 417267 : if (is16bit)
3345 0 : ctxt->ref_pic_list0 = ((EbReferenceObject*)ctxt->picture_control_set_ptr->ref_pic_ptr_array[list_idx0][ref_idx_l0]->object_ptr)->reference_picture16bit;
3346 : else
3347 417267 : ctxt->ref_pic_list0 = ((EbReferenceObject*)ctxt->picture_control_set_ptr->ref_pic_ptr_array[list_idx0][ref_idx_l0]->object_ptr)->reference_picture;
3348 :
3349 417267 : xd->mb_to_top_edge = 8 * MI_SIZE * (-left_mi_row);
3350 417267 : xd->mb_to_bottom_edge =
3351 417267 : ctxt->mb_to_far_edge +
3352 417267 : (xd->n4_h - rel_mi_row - left_mi_height) * MI_SIZE * 8;
3353 417267 : }
3354 :
3355 0 : EbErrorType get_single_prediction_for_obmc_luma_hbd(
3356 : uint32_t interp_filters,
3357 : MacroBlockD *xd,
3358 : MvUnit *mv_unit,
3359 : uint16_t pu_origin_x,
3360 : uint16_t pu_origin_y,
3361 : uint8_t bwidth,
3362 : uint8_t bheight,
3363 : EbPictureBufferDesc *ref_pic_list0,
3364 : EbPictureBufferDesc *prediction_ptr,
3365 : uint16_t dst_origin_x,
3366 : uint16_t dst_origin_y)
3367 : {
3368 0 : EbErrorType return_error = EB_ErrorNone;
3369 0 : uint8_t is_compound = 0;
3370 : DECLARE_ALIGNED(32, uint16_t, tmp_dstY[128 * 128]);//move this to context if stack does not hold.
3371 :
3372 : MV mv, mv_q4;
3373 : int32_t subpel_x, subpel_y;
3374 : uint16_t * src_ptr;
3375 : uint16_t * dst_ptr;
3376 : int32_t src_stride;
3377 : int32_t dst_stride;
3378 : ConvolveParams conv_params;
3379 : InterpFilterParams filter_params_x, filter_params_y;
3380 :
3381 : {
3382 : //List0-Y
3383 0 : mv.col = mv_unit->mv[REF_LIST_0].x;
3384 0 : mv.row = mv_unit->mv[REF_LIST_0].y;
3385 0 : assert(ref_pic_list0 != NULL);
3386 0 : src_ptr = (uint16_t*)ref_pic_list0->buffer_y + ref_pic_list0->origin_x + pu_origin_x + (ref_pic_list0->origin_y + pu_origin_y) * ref_pic_list0->stride_y;
3387 0 : dst_ptr = (uint16_t*)prediction_ptr->buffer_y + prediction_ptr->origin_x + dst_origin_x + (prediction_ptr->origin_y + dst_origin_y) * prediction_ptr->stride_y;
3388 0 : src_stride = ref_pic_list0->stride_y;
3389 0 : dst_stride = prediction_ptr->stride_y;
3390 :
3391 0 : mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bwidth, bheight, 0, 0);//mv_q4 has 1 extra bit for fractionnal to accomodate chroma when accessing filter coeffs.
3392 0 : subpel_x = mv_q4.col & SUBPEL_MASK;
3393 0 : subpel_y = mv_q4.row & SUBPEL_MASK;
3394 0 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
3395 0 : conv_params = get_conv_params_no_round(0, 0, 0, tmp_dstY, 128, is_compound, EB_10BIT);
3396 0 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
3397 : &filter_params_y, bwidth, bheight);
3398 :
3399 0 : convolveHbd[subpel_x != 0][subpel_y != 0][is_compound](
3400 : src_ptr,
3401 : src_stride,
3402 : dst_ptr,
3403 : dst_stride,
3404 : bwidth,
3405 : bheight,
3406 : &filter_params_x,
3407 : &filter_params_y,
3408 : subpel_x,
3409 : subpel_y,
3410 : &conv_params,
3411 : 10);
3412 :
3413 : }
3414 0 : return return_error;
3415 : }
3416 0 : EbErrorType get_single_prediction_for_obmc_chroma_hbd(
3417 : uint32_t interp_filters,
3418 : MacroBlockD *xd,
3419 : MvUnit *mv_unit,
3420 : uint16_t pu_origin_x,
3421 : uint16_t pu_origin_y,
3422 : uint8_t bwidth,
3423 : uint8_t bheight,
3424 : EbPictureBufferDesc *ref_pic_list0,
3425 : EbPictureBufferDesc *prediction_ptr,
3426 : uint16_t dst_origin_x,
3427 : uint16_t dst_origin_y)
3428 : {
3429 0 : EbErrorType return_error = EB_ErrorNone;
3430 0 : uint8_t is_compound = 0;
3431 :
3432 : DECLARE_ALIGNED(32, uint16_t, tmp_dstCb[64 * 64]);
3433 : DECLARE_ALIGNED(32, uint16_t, tmp_dstCr[64 * 64]);
3434 :
3435 : MV mv, mv_q4;
3436 : int32_t subpel_x, subpel_y;
3437 : uint16_t * src_ptr;
3438 : uint16_t * dst_ptr;
3439 : int32_t src_stride;
3440 : int32_t dst_stride;
3441 : ConvolveParams conv_params;
3442 : InterpFilterParams filter_params_x, filter_params_y;
3443 : {
3444 : //List0-Y
3445 0 : mv.col = mv_unit->mv[REF_LIST_0].x;
3446 0 : mv.row = mv_unit->mv[REF_LIST_0].y;
3447 0 : assert(ref_pic_list0 != NULL);
3448 :
3449 : {
3450 : //List0-Cb
3451 0 : src_ptr = (uint16_t*)ref_pic_list0->buffer_cb + (ref_pic_list0->origin_x + ((pu_origin_x >> 3) << 3)) / 2 + (ref_pic_list0->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic_list0->stride_cb;
3452 0 : dst_ptr = (uint16_t*)prediction_ptr->buffer_cb + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cb;
3453 0 : src_stride = ref_pic_list0->stride_cb;
3454 0 : dst_stride = prediction_ptr->stride_cb;
3455 :
3456 0 : mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bwidth, bheight, 1, 1);
3457 0 : subpel_x = mv_q4.col & SUBPEL_MASK;
3458 0 : subpel_y = mv_q4.row & SUBPEL_MASK;
3459 0 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
3460 0 : conv_params = get_conv_params_no_round(0, 0, 0, tmp_dstCb, 64, is_compound, EB_10BIT);
3461 :
3462 0 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
3463 : &filter_params_y, bwidth, bheight);
3464 :
3465 0 : convolveHbd[subpel_x != 0][subpel_y != 0][is_compound](
3466 : src_ptr,
3467 : src_stride,
3468 : dst_ptr,
3469 : dst_stride,
3470 : bwidth,
3471 : bheight,
3472 : &filter_params_x,
3473 : &filter_params_y,
3474 : subpel_x,
3475 : subpel_y,
3476 : &conv_params,
3477 : 10);
3478 :
3479 : //List0-Cr
3480 0 : src_ptr = (uint16_t*)ref_pic_list0->buffer_cr + (ref_pic_list0->origin_x + ((pu_origin_x >> 3) << 3)) / 2 + (ref_pic_list0->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic_list0->stride_cr;
3481 0 : dst_ptr = (uint16_t*) prediction_ptr->buffer_cr + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cr;
3482 0 : src_stride = ref_pic_list0->stride_cr;
3483 0 : dst_stride = prediction_ptr->stride_cr;
3484 :
3485 0 : mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bwidth, bheight, 1, 1);
3486 0 : subpel_x = mv_q4.col & SUBPEL_MASK;
3487 0 : subpel_y = mv_q4.row & SUBPEL_MASK;
3488 0 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
3489 0 : conv_params = get_conv_params_no_round(0, 0, 0, tmp_dstCr, 64, is_compound, EB_10BIT);
3490 0 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
3491 : &filter_params_y, bwidth, bheight);
3492 :
3493 0 : convolveHbd[subpel_x != 0][subpel_y != 0][is_compound](
3494 : src_ptr,
3495 : src_stride,
3496 : dst_ptr,
3497 : dst_stride,
3498 : bwidth,
3499 : bheight,
3500 : &filter_params_x,
3501 : &filter_params_y,
3502 : subpel_x,
3503 : subpel_y,
3504 : &conv_params,
3505 : 10);
3506 : }
3507 : }
3508 :
3509 0 : return return_error;
3510 : }
3511 :
3512 813937 : EbErrorType get_single_prediction_for_obmc_luma(
3513 : uint32_t interp_filters,
3514 : MacroBlockD *xd,
3515 : MvUnit *mv_unit,
3516 : uint16_t pu_origin_x,
3517 : uint16_t pu_origin_y,
3518 : uint8_t bwidth,
3519 : uint8_t bheight,
3520 : EbPictureBufferDesc *ref_pic_list0,
3521 : EbPictureBufferDesc *prediction_ptr,
3522 : uint16_t dst_origin_x,
3523 : uint16_t dst_origin_y)
3524 : {
3525 813937 : EbErrorType return_error = EB_ErrorNone;
3526 813937 : uint8_t is_compound = 0;
3527 : DECLARE_ALIGNED(32, uint16_t, tmp_dstY[128 * 128]);//move this to context if stack does not hold.
3528 :
3529 : MV mv, mv_q4;
3530 : int32_t subpel_x, subpel_y;
3531 : uint8_t * src_ptr;
3532 : uint8_t * dst_ptr;
3533 : int32_t src_stride;
3534 : int32_t dst_stride;
3535 : ConvolveParams conv_params;
3536 : InterpFilterParams filter_params_x, filter_params_y;
3537 :
3538 : {
3539 : //List0-Y
3540 813937 : mv.col = mv_unit->mv[REF_LIST_0].x;
3541 813937 : mv.row = mv_unit->mv[REF_LIST_0].y;
3542 813937 : assert(ref_pic_list0 != NULL);
3543 813937 : src_ptr = ref_pic_list0->buffer_y + ref_pic_list0->origin_x + pu_origin_x + (ref_pic_list0->origin_y + pu_origin_y) * ref_pic_list0->stride_y;
3544 813937 : dst_ptr = prediction_ptr->buffer_y + prediction_ptr->origin_x + dst_origin_x + (prediction_ptr->origin_y + dst_origin_y) * prediction_ptr->stride_y;
3545 813937 : src_stride = ref_pic_list0->stride_y;
3546 813937 : dst_stride = prediction_ptr->stride_y;
3547 :
3548 813937 : mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bwidth, bheight, 0, 0);//mv_q4 has 1 extra bit for fractionnal to accomodate chroma when accessing filter coeffs.
3549 813955 : subpel_x = mv_q4.col & SUBPEL_MASK;
3550 813955 : subpel_y = mv_q4.row & SUBPEL_MASK;
3551 813955 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
3552 813955 : conv_params = get_conv_params_no_round(0, 0, 0, tmp_dstY, 128, is_compound, EB_8BIT);
3553 813952 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
3554 : &filter_params_y, bwidth, bheight);
3555 :
3556 813947 : convolve[subpel_x != 0][subpel_y != 0][is_compound](
3557 : src_ptr,
3558 : src_stride,
3559 : dst_ptr,
3560 : dst_stride,
3561 : bwidth,
3562 : bheight,
3563 : &filter_params_x,
3564 : &filter_params_y,
3565 : subpel_x,
3566 : subpel_y,
3567 : &conv_params);
3568 :
3569 : }
3570 813954 : return return_error;
3571 : }
3572 :
3573 614604 : EbErrorType get_single_prediction_for_obmc_chroma(
3574 : uint32_t interp_filters,
3575 : MacroBlockD *xd,
3576 : MvUnit *mv_unit,
3577 : uint16_t pu_origin_x,
3578 : uint16_t pu_origin_y,
3579 : uint8_t bwidth,
3580 : uint8_t bheight,
3581 : EbPictureBufferDesc *ref_pic_list0,
3582 : EbPictureBufferDesc *prediction_ptr,
3583 : uint16_t dst_origin_x,
3584 : uint16_t dst_origin_y)
3585 : {
3586 614604 : EbErrorType return_error = EB_ErrorNone;
3587 614604 : uint8_t is_compound = 0;
3588 :
3589 : DECLARE_ALIGNED(32, uint16_t, tmp_dstCb[64 * 64]);
3590 : DECLARE_ALIGNED(32, uint16_t, tmp_dstCr[64 * 64]);
3591 :
3592 : MV mv, mv_q4;
3593 : int32_t subpel_x, subpel_y;
3594 : uint8_t * src_ptr;
3595 : uint8_t * dst_ptr;
3596 : int32_t src_stride;
3597 : int32_t dst_stride;
3598 : ConvolveParams conv_params;
3599 : InterpFilterParams filter_params_x, filter_params_y;
3600 :
3601 :
3602 : {
3603 : //List0-Y
3604 :
3605 614604 : mv.col = mv_unit->mv[REF_LIST_0].x;
3606 614604 : mv.row = mv_unit->mv[REF_LIST_0].y;
3607 614604 : assert(ref_pic_list0 != NULL);
3608 :
3609 : {
3610 : //List0-Cb
3611 614604 : src_ptr = ref_pic_list0->buffer_cb + (ref_pic_list0->origin_x + ((pu_origin_x >> 3) << 3)) / 2 + (ref_pic_list0->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic_list0->stride_cb;
3612 614604 : dst_ptr = prediction_ptr->buffer_cb + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cb;
3613 614604 : src_stride = ref_pic_list0->stride_cb;
3614 614604 : dst_stride = prediction_ptr->stride_cb;
3615 :
3616 614604 : mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bwidth, bheight, 1, 1);
3617 614612 : subpel_x = mv_q4.col & SUBPEL_MASK;
3618 614612 : subpel_y = mv_q4.row & SUBPEL_MASK;
3619 614612 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
3620 614612 : conv_params = get_conv_params_no_round(0, 0, 0, tmp_dstCb, 64, is_compound, EB_8BIT);
3621 :
3622 614611 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
3623 : &filter_params_y, bwidth, bheight);
3624 :
3625 614602 : convolve[subpel_x != 0][subpel_y != 0][is_compound](
3626 : src_ptr,
3627 : src_stride,
3628 : dst_ptr,
3629 : dst_stride,
3630 : bwidth,
3631 : bheight,
3632 : &filter_params_x,
3633 : &filter_params_y,
3634 : subpel_x,
3635 : subpel_y,
3636 : &conv_params);
3637 :
3638 : //List0-Cr
3639 614603 : src_ptr = ref_pic_list0->buffer_cr + (ref_pic_list0->origin_x + ((pu_origin_x >> 3) << 3)) / 2 + (ref_pic_list0->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic_list0->stride_cr;
3640 614603 : dst_ptr = prediction_ptr->buffer_cr + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cr;
3641 614603 : src_stride = ref_pic_list0->stride_cr;
3642 614603 : dst_stride = prediction_ptr->stride_cr;
3643 :
3644 614603 : mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bwidth, bheight, 1, 1);
3645 614607 : subpel_x = mv_q4.col & SUBPEL_MASK;
3646 614607 : subpel_y = mv_q4.row & SUBPEL_MASK;
3647 614607 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
3648 614607 : conv_params = get_conv_params_no_round(0, 0, 0, tmp_dstCr, 64, is_compound, EB_8BIT);
3649 614610 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
3650 : &filter_params_y, bwidth, bheight);
3651 :
3652 614604 : convolve[subpel_x != 0][subpel_y != 0][is_compound](
3653 : src_ptr,
3654 : src_stride,
3655 : dst_ptr,
3656 : dst_stride,
3657 : bwidth,
3658 : bheight,
3659 : &filter_params_x,
3660 : &filter_params_y,
3661 : subpel_x,
3662 : subpel_y,
3663 : &conv_params);
3664 :
3665 : }
3666 : }
3667 :
3668 614604 : return return_error;
3669 : }
3670 396680 : static INLINE void build_prediction_by_above_pred(
3671 : uint8_t is16bit,
3672 : MacroBlockD *xd,
3673 : int rel_mi_col,
3674 : uint8_t above_mi_width,
3675 : MbModeInfo *above_mbmi,
3676 : void *fun_ctxt,
3677 : const int num_planes)
3678 : {
3679 396680 : struct build_prediction_ctxt *ctxt = (struct build_prediction_ctxt *)fun_ctxt;
3680 396680 : const int above_mi_col = ctxt->mi_col + rel_mi_col;
3681 : int mi_x, mi_y;
3682 396680 : MbModeInfo backup_mbmi = *above_mbmi;
3683 :
3684 396680 : av1_setup_build_prediction_by_above_pred(xd, rel_mi_col, above_mi_width,
3685 : &backup_mbmi, ctxt, num_planes,is16bit);
3686 :
3687 396688 : ctxt->prediction_ptr.origin_x = ctxt->prediction_ptr.origin_y = 0;
3688 396688 : ctxt->prediction_ptr.buffer_y = ctxt->tmp_buf[0];
3689 396688 : ctxt->prediction_ptr.buffer_cb = ctxt->tmp_buf[1];
3690 396688 : ctxt->prediction_ptr.buffer_cr = ctxt->tmp_buf[2];
3691 396688 : ctxt->prediction_ptr.stride_y = ctxt->tmp_stride[0];
3692 396688 : ctxt->prediction_ptr.stride_cb = ctxt->tmp_stride[1];
3693 396688 : ctxt->prediction_ptr.stride_cr = ctxt->tmp_stride[2];
3694 :
3695 396688 : ctxt->dst_origin_x = rel_mi_col << MI_SIZE_LOG2;
3696 396688 : ctxt->dst_origin_y = 0;
3697 :
3698 396688 : mi_x = above_mi_col << MI_SIZE_LOG2;
3699 396688 : mi_y = ctxt->mi_row << MI_SIZE_LOG2;
3700 :
3701 396688 : const BlockSize bsize = xd->sb_type;
3702 :
3703 1190060 : for (int j = 0; j < num_planes; ++j) {
3704 :
3705 793368 : int subsampling_x = j > 0 ? 1 : 0;
3706 793368 : int subsampling_y = j > 0 ? 1 : 0;
3707 :
3708 793368 : int bw = (above_mi_width * MI_SIZE) >> subsampling_x;
3709 793368 : int bh = clamp(block_size_high[bsize] >> (subsampling_y + 1), 4,
3710 793368 : block_size_high[BLOCK_64X64] >> (subsampling_y + 1));
3711 :
3712 :
3713 793358 : if (av1_skip_u4x4_pred_in_obmc(bsize, 0, subsampling_x, subsampling_y)) continue;
3714 :
3715 594017 : if(j==0)
3716 396677 : if (is16bit)
3717 0 : get_single_prediction_for_obmc_luma_hbd(
3718 : above_mbmi->block_mi.interp_filters,
3719 : xd,
3720 : &ctxt->mv_unit,
3721 : mi_x,
3722 : mi_y,
3723 : bw,
3724 : bh,
3725 : ctxt->ref_pic_list0,
3726 : &ctxt->prediction_ptr,
3727 0 : ctxt->dst_origin_x,
3728 0 : ctxt->dst_origin_y);
3729 : else
3730 396677 : get_single_prediction_for_obmc_luma(
3731 : above_mbmi->block_mi.interp_filters,
3732 : xd,
3733 : &ctxt->mv_unit,
3734 : mi_x,
3735 : mi_y,
3736 : bw,
3737 : bh,
3738 : ctxt->ref_pic_list0,
3739 : &ctxt->prediction_ptr,
3740 396677 : ctxt->dst_origin_x,
3741 396677 : ctxt->dst_origin_y);
3742 : else
3743 197340 : if (is16bit)
3744 0 : get_single_prediction_for_obmc_chroma_hbd(
3745 : above_mbmi->block_mi.interp_filters,
3746 : xd,
3747 : &ctxt->mv_unit,
3748 : mi_x,
3749 : mi_y,
3750 : bw,
3751 : bh,
3752 : ctxt->ref_pic_list0,
3753 : &ctxt->prediction_ptr,
3754 0 : ctxt->dst_origin_x,
3755 0 : ctxt->dst_origin_y);
3756 : else
3757 197340 : get_single_prediction_for_obmc_chroma(
3758 : above_mbmi->block_mi.interp_filters,
3759 : xd,
3760 : &ctxt->mv_unit,
3761 : mi_x,
3762 : mi_y,
3763 : bw,
3764 : bh,
3765 : ctxt->ref_pic_list0,
3766 : &ctxt->prediction_ptr,
3767 197340 : ctxt->dst_origin_x,
3768 197340 : ctxt->dst_origin_y);
3769 :
3770 : }
3771 396691 : }
3772 417265 : static INLINE void build_prediction_by_left_pred(
3773 : uint8_t is16bit,
3774 : MacroBlockD *xd,
3775 : int rel_mi_row,
3776 : uint8_t left_mi_height,
3777 : MbModeInfo *left_mbmi,
3778 : void *fun_ctxt,
3779 : const int num_planes)
3780 : {
3781 417265 : struct build_prediction_ctxt *ctxt = (struct build_prediction_ctxt *)fun_ctxt;
3782 417265 : const int left_mi_row = ctxt->mi_row + rel_mi_row;
3783 : int mi_x, mi_y;
3784 417265 : MbModeInfo backup_mbmi = *left_mbmi;
3785 :
3786 417265 : av1_setup_build_prediction_by_left_pred(xd, rel_mi_row,
3787 : left_mi_height,
3788 : &backup_mbmi, ctxt, num_planes,is16bit);
3789 :
3790 417267 : mi_x = ctxt->mi_col << MI_SIZE_LOG2;
3791 417267 : mi_y = left_mi_row << MI_SIZE_LOG2;
3792 :
3793 417267 : ctxt->prediction_ptr.origin_x = ctxt->prediction_ptr.origin_y = 0;
3794 417267 : ctxt->prediction_ptr.buffer_y = ctxt->tmp_buf[0];
3795 417267 : ctxt->prediction_ptr.buffer_cb = ctxt->tmp_buf[1];
3796 417267 : ctxt->prediction_ptr.buffer_cr = ctxt->tmp_buf[2];
3797 417267 : ctxt->prediction_ptr.stride_y = ctxt->tmp_stride[0];
3798 417267 : ctxt->prediction_ptr.stride_cb = ctxt->tmp_stride[1];
3799 417267 : ctxt->prediction_ptr.stride_cr = ctxt->tmp_stride[2];
3800 :
3801 417267 : ctxt->dst_origin_x = 0;
3802 417267 : ctxt->dst_origin_y = rel_mi_row << MI_SIZE_LOG2;
3803 :
3804 417267 : const BlockSize bsize = xd->sb_type;
3805 :
3806 1251790 : for (int j = 0; j < num_planes; ++j)
3807 : {
3808 834525 : int subsampling_x = j > 0 ? 1 : 0;
3809 834525 : int subsampling_y = j > 0 ? 1 : 0;
3810 :
3811 834525 : int bw = clamp(block_size_wide[bsize] >> (subsampling_x + 1), 4, block_size_wide[BLOCK_64X64] >> (subsampling_x + 1));
3812 834526 : int bh = (left_mi_height << MI_SIZE_LOG2) >> subsampling_y;
3813 :
3814 834526 : if (av1_skip_u4x4_pred_in_obmc(bsize, 1,subsampling_x, subsampling_y)) continue;
3815 :
3816 834532 : if (j == 0)
3817 417267 : if (is16bit)
3818 0 : get_single_prediction_for_obmc_luma_hbd(
3819 : left_mbmi->block_mi.interp_filters,
3820 : xd,
3821 : &ctxt->mv_unit,
3822 : mi_x,
3823 : mi_y,
3824 : bw,
3825 : bh,
3826 : ctxt->ref_pic_list0,
3827 : &ctxt->prediction_ptr,
3828 0 : ctxt->dst_origin_x,
3829 0 : ctxt->dst_origin_y);
3830 : else
3831 417267 : get_single_prediction_for_obmc_luma(
3832 : left_mbmi->block_mi.interp_filters,
3833 : xd,
3834 : &ctxt->mv_unit,
3835 : mi_x,
3836 : mi_y,
3837 : bw,
3838 : bh,
3839 : ctxt->ref_pic_list0,
3840 : &ctxt->prediction_ptr,
3841 417267 : ctxt->dst_origin_x,
3842 417267 : ctxt->dst_origin_y);
3843 : else
3844 417265 : if (is16bit)
3845 0 : get_single_prediction_for_obmc_chroma_hbd(
3846 : left_mbmi->block_mi.interp_filters,
3847 : xd,
3848 : &ctxt->mv_unit,
3849 : mi_x,
3850 : mi_y,
3851 : bw,
3852 : bh,
3853 : ctxt->ref_pic_list0,
3854 : &ctxt->prediction_ptr,
3855 0 : ctxt->dst_origin_x,
3856 0 : ctxt->dst_origin_y);
3857 : else
3858 417265 : get_single_prediction_for_obmc_chroma(
3859 : left_mbmi->block_mi.interp_filters,
3860 : xd,
3861 : &ctxt->mv_unit,
3862 : mi_x,
3863 : mi_y,
3864 : bw,
3865 : bh,
3866 : ctxt->ref_pic_list0,
3867 : &ctxt->prediction_ptr,
3868 417265 : ctxt->dst_origin_x,
3869 417265 : ctxt->dst_origin_y);
3870 : }
3871 417266 : }
3872 :
3873 0 : static void build_prediction_by_above_preds_hbd(
3874 : EbBool perform_chroma,
3875 : BlockSize bsize,
3876 : PictureControlSet *picture_control_set_ptr,
3877 : MacroBlockD *xd,
3878 : int mi_row,
3879 : int mi_col,
3880 : uint16_t *tmp_buf[MAX_MB_PLANE],
3881 : int tmp_stride[MAX_MB_PLANE] )
3882 : {
3883 0 : if (!xd->up_available) return;
3884 :
3885 0 : uint8_t is16bit = 1;
3886 : // Adjust mb_to_bottom_edge to have the correct value for the OBMC
3887 : // prediction block. This is half the height of the original block,
3888 : // except for 128-wide blocks, where we only use a height of 32.
3889 0 : int this_height = xd->n4_h * MI_SIZE;
3890 0 : int pred_height = AOMMIN(this_height / 2, 32);
3891 0 : xd->mb_to_bottom_edge += (this_height - pred_height) * 8;
3892 :
3893 : struct build_prediction_hbd_ctxt ctxt ;
3894 :
3895 0 : ctxt.cm = picture_control_set_ptr->parent_pcs_ptr->av1_cm;
3896 0 : ctxt.mi_row= mi_row;
3897 0 : ctxt.mi_col= mi_col;
3898 0 : ctxt.tmp_buf= tmp_buf;
3899 0 : ctxt.tmp_width= 0;
3900 0 : ctxt.tmp_height= 0;
3901 0 : ctxt.tmp_stride= tmp_stride;
3902 0 : ctxt.mb_to_far_edge= xd->mb_to_right_edge;
3903 :
3904 0 : ctxt.picture_control_set_ptr = picture_control_set_ptr;
3905 0 : ctxt.perform_chroma = perform_chroma;
3906 0 : xd->sb_type = bsize;
3907 :
3908 0 : foreach_overlappable_nb_above(is16bit,picture_control_set_ptr->parent_pcs_ptr->av1_cm, xd, mi_col,
3909 0 : max_neighbor_obmc[mi_size_wide_log2[bsize]],
3910 : build_prediction_by_above_pred, &ctxt);
3911 :
3912 0 : xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
3913 0 : xd->mb_to_right_edge = ctxt.mb_to_far_edge;
3914 0 : xd->mb_to_bottom_edge -= (this_height - pred_height) * 8;
3915 : }
3916 :
3917 401382 : static void build_prediction_by_above_preds(
3918 : EbBool perform_chroma,
3919 : BlockSize bsize,
3920 : PictureControlSet *picture_control_set_ptr,
3921 : MacroBlockD *xd,
3922 : int mi_row,
3923 : int mi_col,
3924 : uint8_t *tmp_buf[MAX_MB_PLANE],
3925 : int tmp_stride[MAX_MB_PLANE] )
3926 : {
3927 401382 : if (!xd->up_available) return;
3928 :
3929 379757 : uint8_t is16bit = 0;
3930 : // Adjust mb_to_bottom_edge to have the correct value for the OBMC
3931 : // prediction block. This is half the height of the original block,
3932 : // except for 128-wide blocks, where we only use a height of 32.
3933 379757 : int this_height = xd->n4_h * MI_SIZE;
3934 379757 : int pred_height = AOMMIN(this_height / 2, 32);
3935 379757 : xd->mb_to_bottom_edge += (this_height - pred_height) * 8;
3936 :
3937 : struct build_prediction_ctxt ctxt ;
3938 :
3939 379757 : ctxt.cm = picture_control_set_ptr->parent_pcs_ptr->av1_cm;
3940 379757 : ctxt.mi_row= mi_row;
3941 379757 : ctxt.mi_col= mi_col;
3942 379757 : ctxt.tmp_buf= tmp_buf;
3943 379757 : ctxt.tmp_width= 0;
3944 379757 : ctxt.tmp_height= 0;
3945 379757 : ctxt.tmp_stride= tmp_stride;
3946 379757 : ctxt.mb_to_far_edge= xd->mb_to_right_edge;
3947 :
3948 379757 : ctxt.picture_control_set_ptr = picture_control_set_ptr;
3949 379757 : ctxt.perform_chroma = perform_chroma;
3950 379757 : xd->sb_type = bsize;
3951 :
3952 379757 : foreach_overlappable_nb_above(
3953 : is16bit,
3954 379757 : picture_control_set_ptr->parent_pcs_ptr->av1_cm,
3955 : xd,
3956 : mi_col,
3957 379757 : max_neighbor_obmc[mi_size_wide_log2[bsize]],
3958 : build_prediction_by_above_pred,
3959 : &ctxt);
3960 :
3961 379776 : xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
3962 379776 : xd->mb_to_right_edge = ctxt.mb_to_far_edge;
3963 379776 : xd->mb_to_bottom_edge -= (this_height - pred_height) * 8;
3964 : }
3965 :
3966 0 : static void build_prediction_by_left_preds_hbd(
3967 : EbBool perform_chroma,
3968 : BlockSize bsize,
3969 : PictureControlSet *picture_control_set_ptr,
3970 : MacroBlockD *xd,
3971 : int mi_row,
3972 : int mi_col,
3973 : uint16_t *tmp_buf[MAX_MB_PLANE],
3974 : int tmp_stride[MAX_MB_PLANE])
3975 : {
3976 0 : if (!xd->left_available) return;
3977 :
3978 0 : uint8_t is16bit = 1;
3979 : // Adjust mb_to_right_edge to have the correct value for the OBMC
3980 : // prediction block. This is half the width of the original block,
3981 : // except for 128-wide blocks, where we only use a width of 32.
3982 0 : int this_width = xd->n4_w * MI_SIZE;
3983 0 : int pred_width = AOMMIN(this_width / 2, 32);
3984 0 : xd->mb_to_right_edge += (this_width - pred_width) * 8;
3985 :
3986 : struct build_prediction_hbd_ctxt ctxt ;
3987 :
3988 0 : ctxt.cm = picture_control_set_ptr->parent_pcs_ptr->av1_cm;
3989 0 : ctxt.mi_row= mi_row;
3990 0 : ctxt.mi_col= mi_col;
3991 0 : ctxt.tmp_buf= tmp_buf;
3992 0 : ctxt.tmp_width= 0;
3993 0 : ctxt.tmp_height= 0;
3994 0 : ctxt.tmp_stride= tmp_stride;
3995 0 : ctxt.mb_to_far_edge= xd->mb_to_bottom_edge;
3996 :
3997 0 : ctxt.picture_control_set_ptr = picture_control_set_ptr;
3998 0 : ctxt.perform_chroma = perform_chroma;
3999 :
4000 0 : xd->sb_type = bsize;
4001 :
4002 0 : foreach_overlappable_nb_left(
4003 : is16bit,
4004 0 : picture_control_set_ptr->parent_pcs_ptr->av1_cm,
4005 : xd,
4006 : mi_row,
4007 0 : max_neighbor_obmc[mi_size_high_log2[bsize]],
4008 : build_prediction_by_left_pred,
4009 : &ctxt);
4010 :
4011 0 : xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
4012 0 : xd->mb_to_right_edge -= (this_width - pred_width) * 8;
4013 0 : xd->mb_to_bottom_edge = ctxt.mb_to_far_edge;
4014 : }
4015 :
4016 401400 : static void build_prediction_by_left_preds(
4017 : EbBool perform_chroma,
4018 : BlockSize bsize,
4019 : PictureControlSet *picture_control_set_ptr,
4020 : MacroBlockD *xd,
4021 : int mi_row,
4022 : int mi_col,
4023 : uint8_t *tmp_buf[MAX_MB_PLANE],
4024 : int tmp_stride[MAX_MB_PLANE])
4025 : {
4026 401400 : if (!xd->left_available) return;
4027 :
4028 384532 : uint8_t is16bit =0;
4029 : // Adjust mb_to_right_edge to have the correct value for the OBMC
4030 : // prediction block. This is half the width of the original block,
4031 : // except for 128-wide blocks, where we only use a width of 32.
4032 384532 : int this_width = xd->n4_w * MI_SIZE;
4033 384532 : int pred_width = AOMMIN(this_width / 2, 32);
4034 384532 : xd->mb_to_right_edge += (this_width - pred_width) * 8;
4035 :
4036 : struct build_prediction_ctxt ctxt ;
4037 :
4038 384532 : ctxt.cm = picture_control_set_ptr->parent_pcs_ptr->av1_cm;
4039 384532 : ctxt.mi_row= mi_row;
4040 384532 : ctxt.mi_col= mi_col;
4041 384532 : ctxt.tmp_buf= tmp_buf;
4042 384532 : ctxt.tmp_width= 0;
4043 384532 : ctxt.tmp_height= 0;
4044 384532 : ctxt.tmp_stride= tmp_stride;
4045 384532 : ctxt.mb_to_far_edge= xd->mb_to_bottom_edge;
4046 :
4047 384532 : ctxt.picture_control_set_ptr = picture_control_set_ptr;
4048 384532 : ctxt.perform_chroma = perform_chroma;
4049 :
4050 384532 : xd->sb_type = bsize;
4051 :
4052 384532 : foreach_overlappable_nb_left(
4053 : is16bit,
4054 384532 : picture_control_set_ptr->parent_pcs_ptr->av1_cm,
4055 : xd,
4056 : mi_row,
4057 384532 : max_neighbor_obmc[mi_size_high_log2[bsize]],
4058 : build_prediction_by_left_pred, &ctxt);
4059 :
4060 384533 : xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
4061 384533 : xd->mb_to_right_edge -= (this_width - pred_width) * 8;
4062 384533 : xd->mb_to_bottom_edge = ctxt.mb_to_far_edge;
4063 : }
4064 :
4065 :
4066 : struct obmc_inter_pred_ctxt {
4067 : uint8_t **adjacent;
4068 : uint16_t **adjacent_hbd;
4069 : int *adjacent_stride;
4070 : uint8_t *final_dst_ptr_y;
4071 : uint16_t *final_dst_ptr_y_hbd;
4072 : uint16_t final_dst_stride_y;
4073 : uint8_t *final_dst_ptr_u;
4074 : uint16_t *final_dst_ptr_u_hbd;
4075 : uint16_t final_dst_stride_u;
4076 : uint8_t *final_dst_ptr_v;
4077 : uint16_t *final_dst_ptr_v_hbd;
4078 : uint16_t final_dst_stride_v;
4079 : EbBool perform_chroma;
4080 : };
4081 : // obmc_mask_N[overlap_position]
4082 : static const uint8_t obmc_mask_1[1] = { 64 };
4083 : DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
4084 :
4085 : DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
4086 :
4087 : static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
4088 :
4089 : static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
4090 : 56, 58, 60, 61, 64, 64, 64, 64 };
4091 :
4092 : static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
4093 : 45, 47, 48, 50, 51, 52, 53, 55,
4094 : 56, 57, 58, 59, 60, 60, 61, 62,
4095 : 64, 64, 64, 64, 64, 64, 64, 64 };
4096 :
4097 : static const uint8_t obmc_mask_64[64] = {
4098 : 33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
4099 : 45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
4100 : 56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
4101 : 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
4102 : };
4103 :
4104 28436900 : const uint8_t *av1_get_obmc_mask(int length) {
4105 28436900 : switch (length) {
4106 0 : case 1: return obmc_mask_1;
4107 342460 : case 2: return obmc_mask_2;
4108 12491300 : case 4: return obmc_mask_4;
4109 9055840 : case 8: return obmc_mask_8;
4110 5033190 : case 16: return obmc_mask_16;
4111 1534580 : case 32: return obmc_mask_32;
4112 0 : case 64: return obmc_mask_64;
4113 0 : default: assert(0); return NULL;
4114 : }
4115 : }
4116 :
4117 :
4118 0 : void eb_aom_highbd_blend_a64_hmask_c(uint16_t *dst, uint32_t dst_stride,
4119 : const uint16_t *src0, uint32_t src0_stride,
4120 : const uint16_t *src1, uint32_t src1_stride,
4121 : const uint8_t *mask, int w, int h, int bd) {
4122 : (void)bd;
4123 : int i, j;
4124 :
4125 0 : assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
4126 0 : assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
4127 :
4128 0 : assert(h >= 1);
4129 0 : assert(w >= 1);
4130 0 : assert(IS_POWER_OF_TWO(h));
4131 0 : assert(IS_POWER_OF_TWO(w));
4132 :
4133 0 : assert(bd == 8 || bd == 10 || bd == 12);
4134 :
4135 0 : for (i = 0; i < h; ++i) {
4136 0 : for (j = 0; j < w; ++j) {
4137 0 : dst[i * dst_stride + j] = AOM_BLEND_A64(
4138 : mask[j], src0[i * src0_stride + j], src1[i * src1_stride + j]);
4139 : }
4140 : }
4141 0 : }
4142 0 : void eb_aom_highbd_blend_a64_vmask_c(uint16_t *dst, uint32_t dst_stride,
4143 : const uint16_t *src0, uint32_t src0_stride,
4144 : const uint16_t *src1, uint32_t src1_stride,
4145 : const uint8_t *mask, int w, int h, int bd) {
4146 : (void)bd;
4147 : int i, j;
4148 :
4149 0 : assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
4150 0 : assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
4151 :
4152 0 : assert(h >= 1);
4153 0 : assert(w >= 1);
4154 0 : assert(IS_POWER_OF_TWO(h));
4155 0 : assert(IS_POWER_OF_TWO(w));
4156 :
4157 0 : assert(bd == 8 || bd == 10 || bd == 12);
4158 :
4159 0 : for (i = 0; i < h; ++i) {
4160 0 : const int m = mask[i];
4161 0 : for (j = 0; j < w; ++j) {
4162 0 : dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
4163 : src1[i * src1_stride + j]);
4164 : }
4165 : }
4166 0 : }
4167 :
4168 0 : static INLINE void build_obmc_inter_pred_above_hbd(
4169 : uint8_t is16bit ,MacroBlockD *xd, int rel_mi_col,
4170 : uint8_t above_mi_width,
4171 : MbModeInfo *above_mi,
4172 : void *fun_ctxt,
4173 : const int num_planes)
4174 : {
4175 : (void)above_mi;
4176 : (void)is16bit;
4177 : (void)num_planes;
4178 0 : struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
4179 0 : const BlockSize bsize = xd->sb_type;
4180 :
4181 :
4182 0 : const int overlap =
4183 0 : AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
4184 :
4185 0 : int32_t tot_planes = (ctxt->perform_chroma ? 3 : 1);
4186 :
4187 0 : for (int plane = 0; plane < tot_planes; ++plane)
4188 : {
4189 0 : int subsampling_x = plane > 0 ? 1 : 0;
4190 0 : int subsampling_y = plane > 0 ? 1 : 0;
4191 :
4192 0 : const int bw = (above_mi_width * MI_SIZE) >> subsampling_x;
4193 0 : const int bh = overlap >> subsampling_y;
4194 0 : const int plane_col = (rel_mi_col * MI_SIZE) >> subsampling_x;
4195 :
4196 0 : if (av1_skip_u4x4_pred_in_obmc(bsize, 0, subsampling_x, subsampling_y)) continue;
4197 :
4198 :
4199 0 : const int dst_stride = plane == 0 ? ctxt->final_dst_stride_y : plane == 1 ? ctxt->final_dst_stride_u : ctxt->final_dst_stride_v;
4200 0 : uint16_t *const dst = plane == 0 ? &ctxt->final_dst_ptr_y_hbd[plane_col] : plane == 1 ? &ctxt->final_dst_ptr_u_hbd[plane_col] : &ctxt->final_dst_ptr_v_hbd[plane_col];
4201 :
4202 0 : const int tmp_stride = ctxt->adjacent_stride[plane];
4203 0 : const uint16_t *const tmp = &ctxt->adjacent_hbd[plane][plane_col];
4204 0 : const uint8_t *const mask = av1_get_obmc_mask(bh);
4205 :
4206 0 : eb_aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
4207 : tmp_stride, mask, bw, bh, 10);
4208 :
4209 : }
4210 0 : }
4211 :
4212 :
4213 12923900 : static INLINE void build_obmc_inter_pred_above(
4214 : uint8_t is16bit ,
4215 : MacroBlockD *xd,
4216 : int rel_mi_col,
4217 : uint8_t above_mi_width,
4218 : MbModeInfo *above_mi,
4219 : void *fun_ctxt,
4220 : const int num_planes)
4221 : {
4222 : (void)above_mi;
4223 : (void)is16bit;
4224 : (void)num_planes;
4225 12923900 : struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
4226 12923900 : const BlockSize bsize = xd->sb_type;
4227 :
4228 12923900 : const int overlap =
4229 12923900 : AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
4230 :
4231 12923900 : int32_t tot_planes = (ctxt->perform_chroma ? 3 : 1);
4232 :
4233 26544800 : for (int plane = 0; plane < tot_planes; ++plane)
4234 : {
4235 13621400 : int subsampling_x = plane > 0 ? 1 : 0;
4236 13621400 : int subsampling_y = plane > 0 ? 1 : 0;
4237 :
4238 13621400 : const int bw = (above_mi_width * MI_SIZE) >> subsampling_x;
4239 13621400 : const int bh = overlap >> subsampling_y;
4240 13621400 : const int plane_col = (rel_mi_col * MI_SIZE) >> subsampling_x;
4241 :
4242 13621400 : if (av1_skip_u4x4_pred_in_obmc(bsize, 0, subsampling_x, subsampling_y)) continue;
4243 :
4244 :
4245 13325500 : const int dst_stride = plane == 0 ? ctxt->final_dst_stride_y : plane == 1 ? ctxt->final_dst_stride_u : ctxt->final_dst_stride_v;
4246 13325500 : uint8_t *const dst = plane == 0 ? &ctxt->final_dst_ptr_y[plane_col] : plane == 1 ? &ctxt->final_dst_ptr_u[plane_col] : &ctxt->final_dst_ptr_v[plane_col];
4247 :
4248 13325500 : const int tmp_stride = ctxt->adjacent_stride[plane];
4249 13325500 : const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
4250 13325500 : const uint8_t *const mask = av1_get_obmc_mask(bh);
4251 :
4252 13325800 : aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
4253 : mask, bw, bh);
4254 : }
4255 12923400 : }
4256 :
4257 0 : static INLINE void build_obmc_inter_pred_left_hbd(
4258 : uint8_t is16bit ,
4259 : MacroBlockD *xd,
4260 : int rel_mi_row,
4261 : uint8_t left_mi_height,
4262 : MbModeInfo *left_mi,
4263 : void *fun_ctxt,
4264 : const int num_planes)
4265 : {
4266 : (void)left_mi;
4267 : (void)is16bit;
4268 : (void)num_planes;
4269 0 : struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
4270 0 : const BlockSize bsize = xd->sb_type;
4271 0 : const int overlap = AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
4272 :
4273 0 : int32_t tot_planes = (ctxt->perform_chroma ? 3 : 1);
4274 :
4275 0 : for (int plane = 0; plane < tot_planes ; ++plane)
4276 : {
4277 0 : int subsampling_x = plane > 0 ? 1 : 0;
4278 0 : int subsampling_y = plane > 0 ? 1 : 0;
4279 :
4280 : //const struct macroblockd_plane *pd = &xd->plane[plane];
4281 0 : const int bw = overlap >> subsampling_x;
4282 0 : const int bh = (left_mi_height * MI_SIZE) >> subsampling_y;
4283 0 : const int plane_row = (rel_mi_row * MI_SIZE) >> subsampling_y;
4284 :
4285 0 : if (av1_skip_u4x4_pred_in_obmc(bsize,1,subsampling_x, subsampling_y)) continue;
4286 :
4287 0 : const int dst_stride = plane == 0 ? ctxt->final_dst_stride_y : plane == 1 ? ctxt->final_dst_stride_u : ctxt->final_dst_stride_v;
4288 0 : uint16_t *const dst = plane == 0 ? &ctxt->final_dst_ptr_y_hbd[plane_row * dst_stride] : plane == 1 ? &ctxt->final_dst_ptr_u_hbd[plane_row * dst_stride] : &ctxt->final_dst_ptr_v_hbd[plane_row * dst_stride];
4289 0 : const int tmp_stride = ctxt->adjacent_stride[plane];
4290 0 : const uint16_t *const tmp = &ctxt->adjacent_hbd[plane][plane_row * tmp_stride];
4291 0 : const uint8_t *const mask = av1_get_obmc_mask(bw);
4292 :
4293 :
4294 0 : eb_aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
4295 : tmp_stride, mask, bw, bh, 10);
4296 :
4297 : }
4298 0 : }
4299 :
4300 13596700 : static INLINE void build_obmc_inter_pred_left(
4301 : uint8_t is16bit ,
4302 : MacroBlockD *xd,
4303 : int rel_mi_row,
4304 : uint8_t left_mi_height,
4305 : MbModeInfo *left_mi,
4306 : void *fun_ctxt,
4307 : const int num_planes)
4308 : {
4309 : (void)left_mi;
4310 : (void)is16bit;
4311 : (void)num_planes;
4312 13596700 : struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
4313 13596700 : const BlockSize bsize = xd->sb_type;
4314 13596700 : const int overlap = AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
4315 :
4316 13596700 : int32_t tot_planes = (ctxt->perform_chroma ? 3 : 1);
4317 :
4318 27909000 : for (int plane = 0; plane < tot_planes ; ++plane)
4319 : {
4320 14313300 : int subsampling_x = plane > 0 ? 1 : 0;
4321 14313300 : int subsampling_y = plane > 0 ? 1 : 0;
4322 :
4323 : //const struct macroblockd_plane *pd = &xd->plane[plane];
4324 14313300 : const int bw = overlap >> subsampling_x;
4325 14313300 : const int bh = (left_mi_height * MI_SIZE) >> subsampling_y;
4326 14313300 : const int plane_row = (rel_mi_row * MI_SIZE) >> subsampling_y;
4327 :
4328 14313300 : if (av1_skip_u4x4_pred_in_obmc(bsize,1,subsampling_x, subsampling_y)) continue;
4329 :
4330 14312800 : const int dst_stride = plane == 0 ? ctxt->final_dst_stride_y : plane == 1 ? ctxt->final_dst_stride_u : ctxt->final_dst_stride_v;
4331 14312800 : uint8_t *const dst = plane == 0 ? &ctxt->final_dst_ptr_y[plane_row * dst_stride] : plane == 1 ? &ctxt->final_dst_ptr_u[plane_row * dst_stride] : &ctxt->final_dst_ptr_v[plane_row * dst_stride];
4332 14312800 : const int tmp_stride = ctxt->adjacent_stride[plane];
4333 14312800 : const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
4334 14312800 : const uint8_t *const mask = av1_get_obmc_mask(bw);
4335 :
4336 :
4337 14312600 : aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
4338 : mask, bw, bh);
4339 : }
4340 13595800 : }
4341 :
4342 :
4343 : // This function combines motion compensated predictions that are generated by
4344 : // top/left neighboring blocks' inter predictors with the regular inter
4345 : // prediction. We assume the original prediction (bmc) is stored in
4346 : // xd->plane[].dst.buf
4347 0 : void av1_build_obmc_inter_prediction_hbd(
4348 : uint16_t *final_dst_ptr_y,
4349 : uint16_t final_dst_stride_y,
4350 : uint16_t *final_dst_ptr_u,
4351 : uint16_t final_dst_stride_u,
4352 : uint16_t *final_dst_ptr_v,
4353 : uint16_t final_dst_stride_v,
4354 : EbBool perform_chroma,
4355 : BlockSize bsize,
4356 : PictureControlSet *picture_control_set_ptr,
4357 : MacroBlockD *xd,
4358 : int mi_row,
4359 : int mi_col,
4360 : uint16_t *above[MAX_MB_PLANE],
4361 : int above_stride[MAX_MB_PLANE],
4362 : uint16_t *left[MAX_MB_PLANE],
4363 : int left_stride[MAX_MB_PLANE])
4364 : {
4365 0 : uint8_t is16bit = 1;
4366 : // handle above row
4367 : struct obmc_inter_pred_ctxt ctxt_above ;
4368 :
4369 0 : ctxt_above.adjacent =(uint8_t**)above;
4370 0 : ctxt_above.adjacent_hbd = above;
4371 0 : ctxt_above.adjacent_stride = above_stride;
4372 :
4373 0 : ctxt_above.final_dst_ptr_y = (uint8_t*)final_dst_ptr_y;
4374 0 : ctxt_above.final_dst_ptr_y_hbd = final_dst_ptr_y;
4375 0 : ctxt_above.final_dst_stride_y = final_dst_stride_y;
4376 0 : ctxt_above.final_dst_ptr_u = (uint8_t*)final_dst_ptr_u;
4377 0 : ctxt_above.final_dst_ptr_u_hbd = final_dst_ptr_u;
4378 0 : ctxt_above.final_dst_stride_u = final_dst_stride_u;
4379 0 : ctxt_above.final_dst_ptr_v = (uint8_t*)final_dst_ptr_v;
4380 0 : ctxt_above.final_dst_ptr_v_hbd = final_dst_ptr_v;
4381 0 : ctxt_above.final_dst_stride_v = final_dst_stride_v;
4382 0 : ctxt_above.perform_chroma = perform_chroma;
4383 :
4384 0 : foreach_overlappable_nb_above(
4385 : is16bit,
4386 0 : picture_control_set_ptr->parent_pcs_ptr->av1_cm,
4387 : xd,
4388 : mi_col,
4389 0 : max_neighbor_obmc[mi_size_wide_log2[bsize]],
4390 : build_obmc_inter_pred_above_hbd,
4391 : &ctxt_above);
4392 :
4393 : // handle left column
4394 : struct obmc_inter_pred_ctxt ctxt_left ;
4395 :
4396 0 : ctxt_left.adjacent = (uint8_t**)left;
4397 0 : ctxt_left.adjacent_hbd = left;
4398 0 : ctxt_left.adjacent_stride = left_stride;
4399 :
4400 0 : ctxt_left.final_dst_ptr_y = (uint8_t*)final_dst_ptr_y;
4401 0 : ctxt_left.final_dst_ptr_y_hbd = final_dst_ptr_y;
4402 0 : ctxt_left.final_dst_stride_y = final_dst_stride_y;
4403 0 : ctxt_left.final_dst_ptr_u = (uint8_t*)final_dst_ptr_u;
4404 0 : ctxt_left.final_dst_ptr_u_hbd = final_dst_ptr_u;
4405 0 : ctxt_left.final_dst_stride_u = final_dst_stride_u;
4406 0 : ctxt_left.final_dst_ptr_v = (uint8_t*)final_dst_ptr_v;
4407 0 : ctxt_left.final_dst_ptr_v_hbd = final_dst_ptr_v;
4408 0 : ctxt_left.final_dst_stride_v = final_dst_stride_v;
4409 0 : ctxt_left.perform_chroma = perform_chroma;
4410 :
4411 0 : foreach_overlappable_nb_left(
4412 : is16bit,
4413 0 : picture_control_set_ptr->parent_pcs_ptr->av1_cm,
4414 : xd,
4415 : mi_row,
4416 0 : max_neighbor_obmc[mi_size_high_log2[bsize]],
4417 : build_obmc_inter_pred_left_hbd,
4418 : &ctxt_left);
4419 0 : }
4420 :
4421 : // This function combines motion compensated predictions that are generated by
4422 : // top/left neighboring blocks' inter predictors with the regular inter
4423 : // prediction. We assume the original prediction (bmc) is stored in
4424 : // xd->plane[].dst.buf
4425 13103800 : void av1_build_obmc_inter_prediction(
4426 : uint8_t *final_dst_ptr_y,
4427 : uint16_t final_dst_stride_y,
4428 : uint8_t *final_dst_ptr_u,
4429 : uint16_t final_dst_stride_u,
4430 : uint8_t *final_dst_ptr_v,
4431 : uint16_t final_dst_stride_v,
4432 : EbBool perform_chroma,
4433 : BlockSize bsize,
4434 : PictureControlSet *picture_control_set_ptr,
4435 : MacroBlockD *xd,
4436 : int mi_row,
4437 : int mi_col,
4438 : uint8_t *above[MAX_MB_PLANE],
4439 : int above_stride[MAX_MB_PLANE],
4440 : uint8_t *left[MAX_MB_PLANE],
4441 : int left_stride[MAX_MB_PLANE])
4442 : {
4443 13103800 : uint8_t is16bit = 0;
4444 : // handle above row
4445 : struct obmc_inter_pred_ctxt ctxt_above ;
4446 :
4447 13103800 : ctxt_above.adjacent = above;
4448 13103800 : ctxt_above.adjacent_hbd = (uint16_t**)above;
4449 13103800 : ctxt_above.adjacent_stride = above_stride;
4450 :
4451 13103800 : ctxt_above.final_dst_ptr_y = final_dst_ptr_y;
4452 13103800 : ctxt_above.final_dst_ptr_y_hbd = (uint16_t*)final_dst_ptr_y;
4453 13103800 : ctxt_above.final_dst_stride_y = final_dst_stride_y;
4454 13103800 : ctxt_above.final_dst_ptr_u = final_dst_ptr_u;
4455 13103800 : ctxt_above.final_dst_ptr_u_hbd = (uint16_t*)final_dst_ptr_u;
4456 13103800 : ctxt_above.final_dst_stride_u = final_dst_stride_u;
4457 13103800 : ctxt_above.final_dst_ptr_v = final_dst_ptr_v;
4458 13103800 : ctxt_above.final_dst_ptr_v_hbd = (uint16_t*)final_dst_ptr_v;
4459 13103800 : ctxt_above.final_dst_stride_v = final_dst_stride_v;
4460 13103800 : ctxt_above.perform_chroma = perform_chroma;
4461 :
4462 13103800 : foreach_overlappable_nb_above(
4463 : is16bit,
4464 13103800 : picture_control_set_ptr->parent_pcs_ptr->av1_cm,
4465 : xd,
4466 : mi_col,
4467 13103800 : max_neighbor_obmc[mi_size_wide_log2[bsize]],
4468 : build_obmc_inter_pred_above,
4469 : &ctxt_above);
4470 :
4471 : // handle left column
4472 : struct obmc_inter_pred_ctxt ctxt_left;
4473 :
4474 13102300 : ctxt_left.adjacent = left;
4475 13102300 : ctxt_left.adjacent_hbd = (uint16_t**)left;
4476 13102300 : ctxt_left.adjacent_stride = left_stride;
4477 :
4478 13102300 : ctxt_left.final_dst_ptr_y = final_dst_ptr_y;
4479 13102300 : ctxt_left.final_dst_ptr_y_hbd = (uint16_t*)final_dst_ptr_y;
4480 13102300 : ctxt_left.final_dst_stride_y = final_dst_stride_y;
4481 13102300 : ctxt_left.final_dst_ptr_u = final_dst_ptr_u;
4482 13102300 : ctxt_left.final_dst_ptr_u_hbd = (uint16_t*)final_dst_ptr_u;
4483 13102300 : ctxt_left.final_dst_stride_u = final_dst_stride_u;
4484 13102300 : ctxt_left.final_dst_ptr_v = final_dst_ptr_v;
4485 13102300 : ctxt_left.final_dst_ptr_v_hbd = (uint16_t*)final_dst_ptr_v;
4486 13102300 : ctxt_left.final_dst_stride_v = final_dst_stride_v;
4487 13102300 : ctxt_left.perform_chroma = perform_chroma;
4488 :
4489 13102300 : foreach_overlappable_nb_left(
4490 : is16bit,
4491 13102300 : picture_control_set_ptr->parent_pcs_ptr->av1_cm,
4492 : xd,
4493 : mi_row,
4494 13102300 : max_neighbor_obmc[mi_size_high_log2[bsize]],
4495 : build_obmc_inter_pred_left,
4496 : &ctxt_left);
4497 13103000 : }
4498 : struct calc_target_weighted_pred_ctxt {
4499 : int32_t *mask_buf;
4500 : int32_t *wsrc_buf;
4501 : const uint8_t *tmp;
4502 : int tmp_stride;
4503 : int overlap;
4504 : };
4505 :
4506 395029 : static INLINE void calc_target_weighted_pred_above(
4507 : uint8_t is16bit,
4508 : MacroBlockD *xd,
4509 : int rel_mi_col,
4510 : uint8_t nb_mi_width,
4511 : MbModeInfo *nb_mi,
4512 : void *fun_ctxt,
4513 : const int num_planes)
4514 : {
4515 : (void)nb_mi;
4516 : (void)num_planes;
4517 : (void)is16bit;
4518 395029 : struct calc_target_weighted_pred_ctxt *ctxt =
4519 : (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
4520 :
4521 395029 : const int bw = xd->n4_w << MI_SIZE_LOG2;
4522 395029 : const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
4523 :
4524 395031 : int32_t *wsrc = ctxt->wsrc_buf + (rel_mi_col * MI_SIZE);
4525 395031 : int32_t *mask = ctxt->mask_buf + (rel_mi_col * MI_SIZE);
4526 395031 : const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
4527 :
4528 : {
4529 3971690 : for (int row = 0; row < ctxt->overlap; ++row) {
4530 3576660 : const uint8_t m0 = mask1d[row];
4531 3576660 : const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
4532 71445200 : for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) {
4533 67868600 : wsrc[col] = m1 * tmp[col];
4534 67868600 : mask[col] = m0;
4535 : }
4536 3576660 : wsrc += bw;
4537 3576660 : mask += bw;
4538 3576660 : tmp += ctxt->tmp_stride;
4539 : }
4540 : }
4541 395031 : }
4542 :
4543 415741 : static INLINE void calc_target_weighted_pred_left(
4544 : uint8_t is16bit,
4545 : MacroBlockD *xd,
4546 : int rel_mi_row,
4547 : uint8_t nb_mi_height,
4548 : MbModeInfo *nb_mi,
4549 : void *fun_ctxt,
4550 : const int num_planes)
4551 : {
4552 : (void)nb_mi;
4553 : (void)num_planes;
4554 : (void)is16bit;
4555 :
4556 415741 : struct calc_target_weighted_pred_ctxt *ctxt =
4557 : (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
4558 :
4559 415741 : const int bw = xd->n4_w << MI_SIZE_LOG2;
4560 415741 : const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
4561 :
4562 415740 : int32_t *wsrc = ctxt->wsrc_buf + (rel_mi_row * MI_SIZE * bw);
4563 415740 : int32_t *mask = ctxt->mask_buf + (rel_mi_row * MI_SIZE * bw);
4564 415740 : const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
4565 :
4566 : {
4567 7434320 : for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) {
4568 77138500 : for (int col = 0; col < ctxt->overlap; ++col) {
4569 70120000 : const uint8_t m0 = mask1d[col];
4570 70120000 : const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
4571 70120000 : wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
4572 70120000 : (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
4573 70120000 : mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
4574 : }
4575 7018580 : wsrc += bw;
4576 7018580 : mask += bw;
4577 7018580 : tmp += ctxt->tmp_stride;
4578 : }
4579 : }
4580 :
4581 415740 : }
4582 : // This function has a structure similar to av1_build_obmc_inter_prediction
4583 : //
4584 : // The OBMC predictor is computed as:
4585 : //
4586 : // PObmc(x,y) =
4587 : // AOM_BLEND_A64(Mh(x),
4588 : // AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
4589 : // PLeft(x, y))
4590 : //
4591 : // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
4592 : // rounding, this can be written as:
4593 : //
4594 : // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
4595 : // Mh(x) * Mv(y) * P(x,y) +
4596 : // Mh(x) * Cv(y) * Pabove(x,y) +
4597 : // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
4598 : //
4599 : // Where :
4600 : //
4601 : // Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
4602 : // Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
4603 : //
4604 : // This function computes 'wsrc' and 'mask' as:
4605 : //
4606 : // wsrc(x, y) =
4607 : // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
4608 : // Mh(x) * Cv(y) * Pabove(x,y) +
4609 : // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
4610 : //
4611 : // mask(x, y) = Mh(x) * Mv(y)
4612 : //
4613 : // These can then be used to efficiently approximate the error for any
4614 : // predictor P in the context of the provided neighbouring predictors by
4615 : // computing:
4616 : //
4617 : // error(x, y) =
4618 : // wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
4619 : //
4620 399602 : static void calc_target_weighted_pred(
4621 : PictureControlSet *picture_control_set_ptr,
4622 : ModeDecisionContext *context_ptr,
4623 : const AV1_COMMON *cm,
4624 : const MacroBlockD *xd,
4625 : int mi_row,
4626 : int mi_col,
4627 : const uint8_t *above,
4628 : int above_stride,
4629 : const uint8_t *left,
4630 : int left_stride)
4631 : {
4632 399602 : uint8_t is16bit =0;
4633 399602 : const BlockSize bsize = context_ptr->blk_geom->bsize;
4634 399602 : const int bw = xd->n4_w << MI_SIZE_LOG2;
4635 399602 : const int bh = xd->n4_h << MI_SIZE_LOG2;
4636 399602 : int32_t *mask_buf = context_ptr->mask_buf;
4637 399602 : int32_t *wsrc_buf = context_ptr->wsrc_buf;
4638 :
4639 399602 : const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
4640 :
4641 399602 : memset(wsrc_buf,0, sizeof(int32_t)*bw * bh);
4642 150056000 : for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
4643 :
4644 : // handle above row
4645 399602 : if (xd->up_available) {
4646 378202 : const int overlap =
4647 378202 : AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
4648 378202 : struct calc_target_weighted_pred_ctxt ctxt = {
4649 : mask_buf,
4650 : wsrc_buf,
4651 : above,
4652 : above_stride,
4653 : overlap };
4654 :
4655 378202 : foreach_overlappable_nb_above(
4656 : is16bit,
4657 : cm,
4658 : (MacroBlockD *)xd,
4659 : mi_col,
4660 378202 : max_neighbor_obmc[mi_size_wide_log2[bsize]],
4661 : calc_target_weighted_pred_above,
4662 : &ctxt);
4663 : }
4664 :
4665 150037000 : for (int i = 0; i < bw * bh; ++i) {
4666 149638000 : wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
4667 149638000 : mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
4668 : }
4669 :
4670 : // handle left column
4671 399601 : if (xd->left_available) {
4672 383089 : const int overlap =
4673 383089 : AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
4674 383089 : struct calc_target_weighted_pred_ctxt ctxt = { mask_buf,
4675 : wsrc_buf, left, left_stride,
4676 : overlap };
4677 :
4678 383089 : foreach_overlappable_nb_left(
4679 : is16bit,
4680 : cm,
4681 : (MacroBlockD *)xd,
4682 : mi_row,
4683 383089 : max_neighbor_obmc[mi_size_high_log2[bsize]],
4684 : calc_target_weighted_pred_left,
4685 : &ctxt);
4686 : }
4687 :
4688 399602 : EbPictureBufferDesc *src_pic = picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
4689 399602 : const uint8_t *src = src_pic->buffer_y + (context_ptr->cu_origin_x + src_pic->origin_x) + (context_ptr->cu_origin_y + src_pic->origin_y) * src_pic->stride_y;
4690 :
4691 7744220 : for (int row = 0; row < bh; ++row) {
4692 156973000 : for (int col = 0; col < bw; ++col) {
4693 149629000 : wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
4694 : }
4695 7344620 : wsrc_buf += bw;
4696 7344620 : src += src_pic->stride_y;
4697 : }
4698 :
4699 399602 : }
4700 : /* perform all neigh predictions and get wighted src to be used for obmc
4701 : motion refinement
4702 : */
4703 399589 : void precompute_obmc_data(
4704 : PictureControlSet *picture_control_set_ptr,
4705 : ModeDecisionContext *context_ptr)
4706 : {
4707 :
4708 : uint8_t * tmp_obmc_bufs[2];
4709 :
4710 399589 : tmp_obmc_bufs[0] = context_ptr->obmc_buff_0;
4711 399589 : tmp_obmc_bufs[1] = context_ptr->obmc_buff_1;
4712 :
4713 :
4714 : uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
4715 399589 : int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4716 399589 : int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4717 :
4718 : {
4719 399589 : dst_buf1[0] = tmp_obmc_bufs[0];
4720 399589 : dst_buf1[1] = tmp_obmc_bufs[0] + MAX_SB_SQUARE;
4721 399589 : dst_buf1[2] = tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
4722 399589 : dst_buf2[0] = tmp_obmc_bufs[1];
4723 399589 : dst_buf2[1] = tmp_obmc_bufs[1] + MAX_SB_SQUARE;
4724 399589 : dst_buf2[2] = tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
4725 : }
4726 :
4727 399589 : int mi_row = context_ptr->cu_origin_y >> 2;
4728 399589 : int mi_col = context_ptr->cu_origin_x >> 2;
4729 :
4730 399589 : build_prediction_by_above_preds(
4731 : 1,
4732 399589 : context_ptr->blk_geom->bsize, picture_control_set_ptr, context_ptr->cu_ptr->av1xd, mi_row, mi_col, dst_buf1,
4733 : dst_stride1);
4734 :
4735 399606 : build_prediction_by_left_preds(
4736 : 1,
4737 399606 : context_ptr->blk_geom->bsize, picture_control_set_ptr, context_ptr->cu_ptr->av1xd, mi_row, mi_col, dst_buf2,
4738 : dst_stride2);
4739 :
4740 :
4741 399602 : calc_target_weighted_pred(
4742 : picture_control_set_ptr,
4743 : context_ptr,
4744 399602 : picture_control_set_ptr->parent_pcs_ptr->av1_cm, context_ptr->cu_ptr->av1xd, mi_row, mi_col, dst_buf1[0],
4745 399602 : dst_stride1[0] , dst_buf2[0] ,
4746 : dst_stride2[0] );
4747 :
4748 399607 : }
4749 : #endif
4750 328108000 : EbErrorType av1_inter_prediction(
4751 : PictureControlSet *picture_control_set_ptr,
4752 : uint32_t interp_filters,
4753 : CodingUnit *cu_ptr,
4754 : uint8_t ref_frame_type,
4755 : MvUnit *mv_unit,
4756 : uint8_t use_intrabc,
4757 : #if OBMC_FLAG
4758 : MotionMode motion_mode,
4759 : uint8_t use_precomputed_obmc,
4760 : struct ModeDecisionContext *md_context,
4761 : #endif
4762 : uint8_t compound_idx,
4763 : InterInterCompoundData *interinter_comp,
4764 : #if II_COMP_FLAG
4765 : TileInfo * tile,
4766 : NeighborArrayUnit *luma_recon_neighbor_array,
4767 : NeighborArrayUnit *cb_recon_neighbor_array ,
4768 : NeighborArrayUnit *cr_recon_neighbor_array ,
4769 : uint8_t is_interintra_used ,
4770 : INTERINTRA_MODE interintra_mode,
4771 : uint8_t use_wedge_interintra,
4772 : int32_t interintra_wedge_index,
4773 : #endif
4774 : uint16_t pu_origin_x,
4775 : uint16_t pu_origin_y,
4776 : uint8_t bwidth,
4777 : uint8_t bheight,
4778 : EbPictureBufferDesc *ref_pic_list0,
4779 : EbPictureBufferDesc *ref_pic_list1,
4780 : EbPictureBufferDesc *prediction_ptr,
4781 : uint16_t dst_origin_x,
4782 : uint16_t dst_origin_y,
4783 : EbBool perform_chroma,
4784 : uint8_t bit_depth)
4785 : {
4786 :
4787 328108000 : EbErrorType return_error = EB_ErrorNone;
4788 328108000 : uint8_t is_compound = (mv_unit->pred_direction == BI_PRED) ? 1 : 0;
4789 : DECLARE_ALIGNED(32, uint16_t, tmp_dstY[128 * 128]);//move this to context if stack does not hold.
4790 : DECLARE_ALIGNED(32, uint16_t, tmp_dstCb[64 * 64]);
4791 : DECLARE_ALIGNED(32, uint16_t, tmp_dstCr[64 * 64]);
4792 :
4793 : MV mv, mv_q4;
4794 :
4795 : int32_t subpel_x, subpel_y;
4796 : uint8_t * src_ptr;
4797 : uint8_t * dst_ptr;
4798 : int32_t src_stride;
4799 : int32_t dst_stride;
4800 : ConvolveParams conv_params;
4801 :
4802 : InterpFilterParams filter_params_x, filter_params_y;
4803 :
4804 328108000 : const BlockGeom * blk_geom = get_blk_geom_mds(cu_ptr->mds_idx);
4805 :
4806 : #if OBMC_FLAG
4807 327811000 : if (motion_mode == OBMC_CAUSAL) {
4808 13100600 : assert(is_compound == 0);
4809 13100600 : assert(blk_geom->bwidth > 4 && blk_geom->bheight > 4);
4810 : }
4811 : #endif
4812 : //special treatment for chroma in 4XN/NX4 blocks
4813 : //if one of the neighbour blocks of the parent square is intra the chroma prediction will follow the normal path using the luma MV of the current nsq block which is the latest sub8x8.
4814 : //for this case: only uniPred is allowed.
4815 :
4816 327811000 : int32_t sub8x8_inter = 0;
4817 327811000 : if(perform_chroma && (blk_geom->has_uv && (blk_geom->bwidth == 4 || blk_geom->bheight == 4)))
4818 :
4819 : {
4820 : //CHKN setup input param
4821 :
4822 235508 : int32_t bw = blk_geom->bwidth_uv;
4823 235508 : int32_t bh = blk_geom->bheight_uv;
4824 : UNUSED(bw);
4825 : UNUSED(bh);
4826 :
4827 235508 : uint32_t mi_x = pu_origin_x; //these are luma picture wise
4828 235508 : uint32_t mi_y = pu_origin_y;
4829 :
4830 235508 : MacroBlockD *xd = cu_ptr->av1xd;
4831 235508 : xd->mi_stride = picture_control_set_ptr->mi_stride;
4832 235508 : const int32_t offset = (mi_y >> MI_SIZE_LOG2) * xd->mi_stride + (mi_x >> MI_SIZE_LOG2);
4833 235508 : xd->mi = picture_control_set_ptr->mi_grid_base + offset;
4834 :
4835 : //CHKN fill current mi from current block
4836 : {
4837 235508 : ModeInfo *miPtr = *xd->mi;
4838 : uint8_t miX, miY;
4839 : MvReferenceFrame rf[2];
4840 235508 : av1_set_ref_frame(rf, ref_frame_type);
4841 718475 : for (miY = 0; miY < (blk_geom->bheight >> MI_SIZE_LOG2); miY++) {
4842 1216110 : for (miX = 0; miX < (blk_geom->bwidth >> MI_SIZE_LOG2); miX++) {
4843 733162 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.use_intrabc = use_intrabc;
4844 733162 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.ref_frame[0] = rf[0];
4845 733162 : if (mv_unit->pred_direction == UNI_PRED_LIST_0) {
4846 411065 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.mv[0].as_mv.col = mv_unit->mv[REF_LIST_0].x;
4847 411065 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.mv[0].as_mv.row = mv_unit->mv[REF_LIST_0].y;
4848 : }
4849 322097 : else if (mv_unit->pred_direction == UNI_PRED_LIST_1) {
4850 322097 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.mv[0].as_mv.col = mv_unit->mv[REF_LIST_1].x;
4851 322097 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.mv[0].as_mv.row = mv_unit->mv[REF_LIST_1].y;
4852 : }
4853 : else {
4854 0 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.mv[0].as_mv.col = mv_unit->mv[REF_LIST_0].x;
4855 0 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.mv[0].as_mv.row = mv_unit->mv[REF_LIST_0].y;
4856 0 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.mv[1].as_mv.col = mv_unit->mv[REF_LIST_1].x;
4857 0 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.mv[1].as_mv.row = mv_unit->mv[REF_LIST_1].y;
4858 : }
4859 : }
4860 : }
4861 : }
4862 :
4863 235527 : int32_t build_for_obmc = 0;
4864 :
4865 235527 : const BlockSize bsize = blk_geom->bsize;//mi->sb_type;
4866 235527 : assert(bsize < BlockSizeS_ALL);
4867 235527 : const int32_t ss_x = 1;// pd->subsampling_x;
4868 235527 : const int32_t ss_y = 1;//pd->subsampling_y;
4869 351983 : sub8x8_inter = (block_size_wide[bsize] < 8 && ss_x) ||
4870 116456 : (block_size_high[bsize] < 8 && ss_y);
4871 :
4872 235527 : if (use_intrabc) sub8x8_inter = 0;
4873 :
4874 : // For sub8x8 chroma blocks, we may be covering more than one luma block's
4875 : // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
4876 : // the top-left corner of the prediction source - the correct top-left corner
4877 : // is at (pre_x, pre_y).
4878 235527 : const int32_t row_start =
4879 235527 : (block_size_high[bsize] == 4) && ss_y && !build_for_obmc ? -1 : 0;
4880 235527 : const int32_t col_start =
4881 235527 : (block_size_wide[bsize] == 4) && ss_x && !build_for_obmc ? -1 : 0;
4882 :
4883 235527 : const int32_t pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
4884 235527 : const int32_t pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
4885 : UNUSED(pre_x);
4886 : UNUSED(pre_y);
4887 :
4888 235527 : sub8x8_inter = sub8x8_inter && !build_for_obmc;
4889 235527 : if (sub8x8_inter) {
4890 585434 : for (int32_t row = row_start; row <= 0 && sub8x8_inter; ++row) {
4891 822189 : for (int32_t col = col_start; col <= 0; ++col) {
4892 472282 : ModeInfo *miPtr = *xd->mi;
4893 472282 : const MbModeInfo *this_mbmi = &miPtr[row * xd->mi_stride + col].mbmi;
4894 :
4895 472282 : if (!is_inter_block(&this_mbmi->block_mi)) sub8x8_inter = 0;
4896 : }
4897 : }
4898 : }
4899 :
4900 235528 : if (sub8x8_inter) {
4901 : // block size
4902 223550 : const int32_t b4_w = block_size_wide[bsize] >> ss_x;
4903 223550 : const int32_t b4_h = block_size_high[bsize] >> ss_y;
4904 223550 : const BlockSize plane_bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
4905 223550 : assert(plane_bsize < BlockSizeS_ALL);
4906 223550 : const int32_t b8_w = block_size_wide[plane_bsize] >> ss_x;
4907 223550 : const int32_t b8_h = block_size_high[plane_bsize] >> ss_y;
4908 :
4909 223550 : assert(!is_compound);
4910 :
4911 223550 : int32_t row = row_start;
4912 : int32_t src_stride;
4913 561323 : for (int32_t y = 0; y < b8_h; y += b4_h) {
4914 337774 : int32_t col = col_start;
4915 791175 : for (int32_t x = 0; x < b8_w; x += b4_w) {
4916 453402 : ModeInfo *miPtr = *xd->mi;
4917 453402 : const MbModeInfo *this_mbmi = &miPtr[row * xd->mi_stride + col].mbmi;
4918 :
4919 453402 : int32_t tmp_dst_stride = 8;
4920 : UNUSED(tmp_dst_stride);
4921 453402 : assert(bw < 8 || bh < 8);
4922 :
4923 453402 : conv_params = get_conv_params_no_round(0, 0, 0, tmp_dstCb, BLOCK_SIZE_64, is_compound, bit_depth);
4924 453398 : conv_params.use_jnt_comp_avg = 0;
4925 453398 : uint8_t ref_idx = get_ref_frame_idx(this_mbmi->block_mi.ref_frame[0]);
4926 453402 : assert(ref_idx < REF_LIST_MAX_DEPTH);
4927 906804 : EbPictureBufferDesc *ref_pic = this_mbmi->block_mi.ref_frame[0] ==
4928 187745 : LAST_FRAME || this_mbmi->block_mi.ref_frame[0] == LAST2_FRAME || this_mbmi->block_mi.ref_frame[0] == LAST3_FRAME || this_mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME ?
4929 454314 : ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[REF_LIST_0][ref_idx]->object_ptr)->reference_picture :
4930 186833 : ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[REF_LIST_1][ref_idx]->object_ptr)->reference_picture;
4931 453402 : assert(ref_pic != NULL);
4932 453402 : src_ptr = ref_pic->buffer_cb + (ref_pic->origin_x + ((pu_origin_x >> 3) << 3)) / 2 + (ref_pic->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic->stride_cb;
4933 453402 : dst_ptr = prediction_ptr->buffer_cb + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cb;
4934 453402 : src_stride = ref_pic->stride_cb;
4935 453402 : dst_stride = prediction_ptr->stride_cb;
4936 453402 : src_ptr = src_ptr + x + y * ref_pic->stride_cb;
4937 453402 : dst_ptr = dst_ptr + x + y * prediction_ptr->stride_cb;
4938 :
4939 453402 : const MV mv = this_mbmi->block_mi.mv[0].as_mv;
4940 453402 : mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv, blk_geom->bwidth_uv, blk_geom->bheight_uv, 1, 1);
4941 453402 : subpel_x = mv_q4.col & SUBPEL_MASK;
4942 453402 : subpel_y = mv_q4.row & SUBPEL_MASK;
4943 453402 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
4944 :
4945 453402 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
4946 453402 : &filter_params_y, blk_geom->bwidth_uv, blk_geom->bheight_uv);
4947 :
4948 453397 : convolve[subpel_x != 0][subpel_y != 0][is_compound](
4949 : src_ptr,
4950 : src_stride,
4951 : dst_ptr,
4952 : dst_stride,
4953 : b4_w,
4954 : b4_h,
4955 : &filter_params_x,
4956 : &filter_params_y,
4957 : subpel_x,
4958 : subpel_y,
4959 : &conv_params);
4960 :
4961 : //Cr
4962 453394 : conv_params = get_conv_params_no_round(0, 0, 0, tmp_dstCr, BLOCK_SIZE_64, is_compound, bit_depth);
4963 453396 : conv_params.use_jnt_comp_avg = 0;
4964 :
4965 453396 : src_ptr = ref_pic->buffer_cr + (ref_pic->origin_x + ((pu_origin_x >> 3) << 3)) / 2 + (ref_pic->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic->stride_cr;
4966 453396 : dst_ptr = prediction_ptr->buffer_cr + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cr;
4967 :
4968 453396 : src_stride = ref_pic->stride_cr;
4969 453396 : dst_stride = prediction_ptr->stride_cr;
4970 453396 : src_ptr = src_ptr + x + y * ref_pic->stride_cr;
4971 453396 : dst_ptr = dst_ptr + x + y * prediction_ptr->stride_cr;
4972 :
4973 453396 : mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv, blk_geom->bwidth_uv, blk_geom->bheight_uv, 1, 1);
4974 453401 : subpel_x = mv_q4.col & SUBPEL_MASK;
4975 453401 : subpel_y = mv_q4.row & SUBPEL_MASK;
4976 453401 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
4977 :
4978 453401 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
4979 453401 : &filter_params_y, blk_geom->bwidth_uv, blk_geom->bheight_uv);
4980 :
4981 453398 : convolve[subpel_x != 0][subpel_y != 0][is_compound](
4982 : src_ptr,
4983 : src_stride,
4984 : dst_ptr,
4985 : dst_stride,
4986 : b4_w,
4987 : b4_h,
4988 : &filter_params_x,
4989 : &filter_params_y,
4990 : subpel_x,
4991 : subpel_y,
4992 : &conv_params);
4993 :
4994 453401 : ++col;
4995 : }
4996 337773 : ++row;
4997 : }
4998 : }
4999 : }
5000 :
5001 : MvReferenceFrame rf[2];
5002 327811000 : av1_set_ref_frame(rf, ref_frame_type);
5003 327490000 : if (mv_unit->pred_direction == UNI_PRED_LIST_0 || mv_unit->pred_direction == BI_PRED) {
5004 : //List0-Y
5005 235061000 : mv.col = mv_unit->mv[REF_LIST_0].x;
5006 235061000 : mv.row = mv_unit->mv[REF_LIST_0].y;
5007 235061000 : assert(ref_pic_list0 != NULL);
5008 235061000 : src_ptr = ref_pic_list0->buffer_y + ref_pic_list0->origin_x + pu_origin_x + (ref_pic_list0->origin_y + pu_origin_y) * ref_pic_list0->stride_y;
5009 235061000 : dst_ptr = prediction_ptr->buffer_y + prediction_ptr->origin_x + dst_origin_x + (prediction_ptr->origin_y + dst_origin_y) * prediction_ptr->stride_y;
5010 235061000 : src_stride = ref_pic_list0->stride_y;
5011 235061000 : dst_stride = prediction_ptr->stride_y;
5012 :
5013 235061000 : mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv, bwidth, bheight, 0, 0);//mv_q4 has 1 extra bit for fractionnal to accomodate chroma when accessing filter coeffs.
5014 236556000 : subpel_x = mv_q4.col & SUBPEL_MASK;
5015 236556000 : subpel_y = mv_q4.row & SUBPEL_MASK;
5016 236556000 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
5017 236556000 : conv_params = get_conv_params_no_round(0, 0, 0, tmp_dstY, 128, is_compound, bit_depth);
5018 236656000 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
5019 : &filter_params_y, bwidth, bheight);
5020 :
5021 235348000 : convolve[subpel_x != 0][subpel_y != 0][is_compound](
5022 : src_ptr,
5023 : src_stride,
5024 : dst_ptr,
5025 : dst_stride,
5026 : bwidth,
5027 : bheight,
5028 : &filter_params_x,
5029 : &filter_params_y,
5030 : subpel_x,
5031 : subpel_y,
5032 : &conv_params);
5033 235718000 : if (perform_chroma && blk_geom->has_uv && sub8x8_inter == 0) {
5034 : //List0-Cb
5035 2097010 : src_ptr = ref_pic_list0->buffer_cb + (ref_pic_list0->origin_x + ((pu_origin_x >> 3) << 3)) / 2 + (ref_pic_list0->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic_list0->stride_cb;
5036 2097010 : dst_ptr = prediction_ptr->buffer_cb + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cb;
5037 2097010 : src_stride = ref_pic_list0->stride_cb;
5038 2097010 : dst_stride = prediction_ptr->stride_cb;
5039 :
5040 2097010 : mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv, blk_geom->bwidth_uv, blk_geom->bheight_uv, 1, 1);
5041 2097660 : subpel_x = mv_q4.col & SUBPEL_MASK;
5042 2097660 : subpel_y = mv_q4.row & SUBPEL_MASK;
5043 2097660 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
5044 2097660 : conv_params = get_conv_params_no_round(0, 0, 0, tmp_dstCb, 64, is_compound, bit_depth);
5045 :
5046 2097800 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
5047 2097800 : &filter_params_y, blk_geom->bwidth_uv, blk_geom->bheight_uv);
5048 :
5049 2097260 : if (use_intrabc && (subpel_x != 0 || subpel_y != 0))
5050 0 : convolve_2d_for_intrabc(
5051 : (const uint8_t *)src_ptr,
5052 : src_stride,
5053 : dst_ptr,
5054 : dst_stride,
5055 0 : blk_geom->bwidth_uv,
5056 0 : blk_geom->bheight_uv,
5057 : subpel_x,
5058 : subpel_y,
5059 : &conv_params);
5060 : else
5061 2097260 : convolve[subpel_x != 0][subpel_y != 0][is_compound](
5062 : src_ptr,
5063 : src_stride,
5064 : dst_ptr,
5065 : dst_stride,
5066 2097260 : blk_geom->bwidth_uv,
5067 2097260 : blk_geom->bheight_uv,
5068 : &filter_params_x,
5069 : &filter_params_y,
5070 : subpel_x,
5071 : subpel_y,
5072 : &conv_params);
5073 :
5074 : //List0-Cr
5075 2097050 : src_ptr = ref_pic_list0->buffer_cr + (ref_pic_list0->origin_x + ((pu_origin_x >> 3) << 3)) / 2 + (ref_pic_list0->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic_list0->stride_cr;
5076 2097050 : dst_ptr = prediction_ptr->buffer_cr + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cr;
5077 2097050 : src_stride = ref_pic_list0->stride_cr;
5078 2097050 : dst_stride = prediction_ptr->stride_cr;
5079 :
5080 2097050 : mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv, blk_geom->bwidth_uv, blk_geom->bheight_uv, 1, 1);
5081 2097720 : subpel_x = mv_q4.col & SUBPEL_MASK;
5082 2097720 : subpel_y = mv_q4.row & SUBPEL_MASK;
5083 2097720 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
5084 2097720 : conv_params = get_conv_params_no_round(0, 0, 0, tmp_dstCr, 64, is_compound, bit_depth);
5085 :
5086 2097750 : if (use_intrabc && (subpel_x != 0 || subpel_y != 0))
5087 0 : convolve_2d_for_intrabc(
5088 : (const uint8_t *)src_ptr,
5089 : src_stride,
5090 : dst_ptr,
5091 : dst_stride,
5092 0 : blk_geom->bwidth_uv,
5093 0 : blk_geom->bheight_uv,
5094 : subpel_x,
5095 : subpel_y,
5096 : &conv_params);
5097 : else
5098 2097750 : convolve[subpel_x != 0][subpel_y != 0][is_compound](
5099 : src_ptr,
5100 : src_stride,
5101 : dst_ptr,
5102 : dst_stride,
5103 2097750 : blk_geom->bwidth_uv,
5104 2097750 : blk_geom->bheight_uv,
5105 : &filter_params_x,
5106 : &filter_params_y,
5107 : subpel_x,
5108 : subpel_y,
5109 : &conv_params);
5110 : }
5111 : }
5112 :
5113 328147000 : if (mv_unit->pred_direction == UNI_PRED_LIST_1 || mv_unit->pred_direction == BI_PRED) {
5114 : //List0-Y
5115 202852000 : mv.col = mv_unit->mv[REF_LIST_1].x;
5116 202852000 : mv.row = mv_unit->mv[REF_LIST_1].y;
5117 202852000 : assert(ref_pic_list1 != NULL);
5118 202852000 : src_ptr = ref_pic_list1->buffer_y + ref_pic_list1->origin_x + pu_origin_x + (ref_pic_list1->origin_y + pu_origin_y) * ref_pic_list1->stride_y;
5119 202852000 : dst_ptr = prediction_ptr->buffer_y + prediction_ptr->origin_x + dst_origin_x + (prediction_ptr->origin_y + dst_origin_y) * prediction_ptr->stride_y;
5120 202852000 : src_stride = ref_pic_list1->stride_y;
5121 202852000 : dst_stride = prediction_ptr->stride_y;
5122 :
5123 202852000 : mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv, bwidth, bheight, 0, 0);//mv_q4 has 1 extra bit for fractionnal to accomodate chroma when accessing filter coeffs.
5124 203993000 : subpel_x = mv_q4.col & SUBPEL_MASK;
5125 203993000 : subpel_y = mv_q4.row & SUBPEL_MASK;
5126 :
5127 203993000 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
5128 203993000 : conv_params = get_conv_params_no_round(0, (mv_unit->pred_direction == BI_PRED) ? 1 : 0, 0, tmp_dstY, 128, is_compound, bit_depth);
5129 :
5130 204348000 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
5131 : &filter_params_y, bwidth, bheight);
5132 :
5133 : //the luma data is applied to chroma below
5134 203190000 : av1_dist_wtd_comp_weight_assign(
5135 203190000 : &picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header,
5136 203190000 : picture_control_set_ptr->parent_pcs_ptr->cur_order_hint,// cur_frame_index,
5137 203190000 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[0] - 1],// bck_frame_index,
5138 203190000 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[1] - 1],// fwd_frame_index,
5139 : compound_idx,
5140 : 0,// order_idx,
5141 : &conv_params.fwd_offset, &conv_params.bck_offset,
5142 : &conv_params.use_dist_wtd_comp_avg, is_compound);
5143 :
5144 203254000 : conv_params.use_jnt_comp_avg = conv_params.use_dist_wtd_comp_avg;
5145 :
5146 203254000 : if (is_compound && is_masked_compound_type(interinter_comp->type)) {
5147 39412400 : conv_params.do_average = 0;
5148 39412400 : av1_make_masked_inter_predictor(
5149 : src_ptr,
5150 : src_stride,
5151 : dst_ptr,
5152 : dst_stride,
5153 : blk_geom,
5154 : bwidth,
5155 : bheight,
5156 : &filter_params_x,
5157 : &filter_params_y,
5158 : subpel_x,
5159 : subpel_y,
5160 : &conv_params,
5161 : interinter_comp,
5162 : bit_depth,
5163 : 0//plane=Luma seg_mask is computed based on luma and used for chroma
5164 : );
5165 : }
5166 : else
5167 163802000 : convolve[subpel_x != 0][subpel_y != 0][is_compound](
5168 : src_ptr,
5169 : src_stride,
5170 : dst_ptr,
5171 : dst_stride,
5172 : bwidth,
5173 : bheight,
5174 : &filter_params_x,
5175 : &filter_params_y,
5176 : subpel_x,
5177 : subpel_y,
5178 : &conv_params);
5179 203858000 : if (perform_chroma && blk_geom->has_uv && sub8x8_inter == 0) {
5180 : //List0-Cb
5181 1526100 : src_ptr = ref_pic_list1->buffer_cb + (ref_pic_list1->origin_x + ((pu_origin_x >> 3) << 3)) / 2 + (ref_pic_list1->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic_list1->stride_cb;
5182 1526100 : dst_ptr = prediction_ptr->buffer_cb + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cb;
5183 1526100 : src_stride = ref_pic_list1->stride_cb;
5184 1526100 : dst_stride = prediction_ptr->stride_cb;
5185 :
5186 1526100 : mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv, blk_geom->bwidth_uv, blk_geom->bheight_uv, 1, 1);
5187 1526110 : subpel_x = mv_q4.col & SUBPEL_MASK;
5188 1526110 : subpel_y = mv_q4.row & SUBPEL_MASK;
5189 1526110 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
5190 1526110 : conv_params = get_conv_params_no_round(0, (mv_unit->pred_direction == BI_PRED) ? 1 : 0, 0, tmp_dstCb, 64, is_compound, bit_depth);
5191 :
5192 1526110 : av1_dist_wtd_comp_weight_assign(
5193 1526110 : &picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header,
5194 1526110 : picture_control_set_ptr->parent_pcs_ptr->cur_order_hint,// cur_frame_index,
5195 1526110 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[0] - 1],// bck_frame_index,
5196 1526110 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[1] - 1],// fwd_frame_index,
5197 : compound_idx,
5198 : 0,// order_idx,
5199 : &conv_params.fwd_offset, &conv_params.bck_offset,
5200 : &conv_params.use_dist_wtd_comp_avg, is_compound);
5201 :
5202 1526090 : conv_params.use_jnt_comp_avg = conv_params.use_dist_wtd_comp_avg;
5203 :
5204 1526090 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
5205 1526090 : &filter_params_y, blk_geom->bwidth_uv, blk_geom->bheight_uv);
5206 :
5207 1526050 : if (is_compound && is_masked_compound_type(interinter_comp->type)) {
5208 210174 : conv_params.do_average = 0;
5209 210174 : av1_make_masked_inter_predictor(
5210 : src_ptr,
5211 : src_stride,
5212 : dst_ptr,
5213 : dst_stride,
5214 : blk_geom,
5215 210174 : blk_geom->bwidth_uv,
5216 210174 : blk_geom->bheight_uv,
5217 : &filter_params_x,
5218 : &filter_params_y,
5219 : subpel_x,
5220 : subpel_y,
5221 : &conv_params,
5222 : interinter_comp,
5223 : bit_depth,
5224 : 1//plane=cb seg_mask is computed based on luma and used for chroma
5225 : );
5226 : }
5227 : else
5228 1315870 : convolve[subpel_x != 0][subpel_y != 0][is_compound](
5229 : src_ptr,
5230 : src_stride,
5231 : dst_ptr,
5232 : dst_stride,
5233 1315870 : blk_geom->bwidth_uv,
5234 1315870 : blk_geom->bheight_uv,
5235 : &filter_params_x,
5236 : &filter_params_y,
5237 : subpel_x,
5238 : subpel_y,
5239 : &conv_params);
5240 :
5241 : //List0-Cr
5242 1526110 : src_ptr = ref_pic_list1->buffer_cr + (ref_pic_list1->origin_x + ((pu_origin_x >> 3) << 3)) / 2 + (ref_pic_list1->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic_list1->stride_cr;
5243 1526110 : dst_ptr = prediction_ptr->buffer_cr + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cr;
5244 1526110 : src_stride = ref_pic_list1->stride_cr;
5245 1526110 : dst_stride = prediction_ptr->stride_cr;
5246 :
5247 1526110 : mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv, blk_geom->bwidth_uv, blk_geom->bheight_uv, 1, 1);
5248 1526110 : subpel_x = mv_q4.col & SUBPEL_MASK;
5249 1526110 : subpel_y = mv_q4.row & SUBPEL_MASK;
5250 1526110 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
5251 1526110 : conv_params = get_conv_params_no_round(0, (mv_unit->pred_direction == BI_PRED) ? 1 : 0, 0, tmp_dstCr, 64, is_compound, bit_depth);
5252 1526120 : av1_dist_wtd_comp_weight_assign(
5253 1526120 : &picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header,
5254 1526120 : picture_control_set_ptr->parent_pcs_ptr->cur_order_hint,// cur_frame_index,
5255 1526120 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[0] - 1],// bck_frame_index,
5256 1526120 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[1] - 1],// fwd_frame_index,
5257 : compound_idx,
5258 : 0,// order_idx,
5259 : &conv_params.fwd_offset, &conv_params.bck_offset,
5260 : &conv_params.use_dist_wtd_comp_avg, is_compound);
5261 :
5262 1526090 : conv_params.use_jnt_comp_avg = conv_params.use_dist_wtd_comp_avg;
5263 :
5264 1526090 : if (is_compound && is_masked_compound_type(interinter_comp->type)) {
5265 210173 : conv_params.do_average = 0;
5266 210173 : av1_make_masked_inter_predictor(
5267 : src_ptr,
5268 : src_stride,
5269 : dst_ptr,
5270 : dst_stride,
5271 : blk_geom,
5272 210173 : blk_geom->bwidth_uv,
5273 210173 : blk_geom->bheight_uv,
5274 : &filter_params_x,
5275 : &filter_params_y,
5276 : subpel_x,
5277 : subpel_y,
5278 : &conv_params,
5279 : interinter_comp,
5280 : bit_depth,
5281 : 1//plane=Cr seg_mask is computed based on luma and used for chroma
5282 : );
5283 : }
5284 : else
5285 1315910 : convolve[subpel_x != 0][subpel_y != 0][is_compound](
5286 : src_ptr,
5287 : src_stride,
5288 : dst_ptr,
5289 : dst_stride,
5290 1315910 : blk_geom->bwidth_uv,
5291 1315910 : blk_geom->bheight_uv,
5292 : &filter_params_x,
5293 : &filter_params_y,
5294 : subpel_x,
5295 : subpel_y,
5296 : &conv_params);
5297 : }
5298 : }
5299 : #if II_COMP_FLAG
5300 328722000 : if ( is_interintra_used ) {
5301 11711000 : int32_t start_plane = 0;
5302 11711000 : int32_t end_plane = perform_chroma && blk_geom->has_uv ? MAX_MB_PLANE: 1;
5303 : // temp buffer for intra pred
5304 : DECLARE_ALIGNED(16, uint8_t, intra_pred[MAX_SB_SQUARE]);
5305 : DECLARE_ALIGNED(16, uint8_t, intra_pred_cb[MAX_SB_SQUARE]);
5306 : DECLARE_ALIGNED(16, uint8_t, intra_pred_cr[MAX_SB_SQUARE]);
5307 :
5308 : int32_t intra_stride;
5309 :
5310 23585000 : for (int32_t plane = start_plane; plane < end_plane; ++plane) {
5311 :
5312 : EbPictureBufferDesc intra_pred_desc;
5313 11871900 : intra_pred_desc.origin_x = intra_pred_desc.origin_y = 0;
5314 11871900 : intra_pred_desc.stride_y = bwidth;
5315 11871900 : intra_pred_desc.stride_cb = bwidth/2;
5316 11871900 : intra_pred_desc.stride_cr = bwidth/2;
5317 11871900 : intra_pred_desc.buffer_y = intra_pred;
5318 11871900 : intra_pred_desc.buffer_cb = intra_pred_cb;
5319 11871900 : intra_pred_desc.buffer_cr = intra_pred_cr;
5320 :
5321 11871900 : const int ssx = plane ? 1 : 0;
5322 11871900 : const int ssy = plane ? 1 : 0;
5323 11871900 : const BlockSize plane_bsize = get_plane_block_size(blk_geom->bsize, ssx, ssy);
5324 : //av1_build_interintra_predictors_sbp
5325 : uint8_t topNeighArray[64 * 2 + 1];
5326 : uint8_t leftNeighArray[64 * 2 + 1];
5327 :
5328 11871000 : uint32_t cu_originx_uv = (pu_origin_x >> 3 << 3) >> 1;
5329 11871000 : uint32_t cu_originy_uv = (pu_origin_y >> 3 << 3) >> 1;
5330 :
5331 11871000 : if (plane == 0) {
5332 11709800 : dst_ptr = prediction_ptr->buffer_y + prediction_ptr->origin_x + dst_origin_x + (prediction_ptr->origin_y + dst_origin_y) * prediction_ptr->stride_y;
5333 11709800 : dst_stride = prediction_ptr->stride_y;
5334 11709800 : intra_stride = intra_pred_desc.stride_y;
5335 :
5336 11709800 : if (pu_origin_y != 0)
5337 11184700 : memcpy(topNeighArray + 1, luma_recon_neighbor_array->top_array + pu_origin_x, blk_geom->bwidth * 2);
5338 :
5339 11709800 : if (pu_origin_x != 0)
5340 11255400 : memcpy(leftNeighArray + 1, luma_recon_neighbor_array->left_array + pu_origin_y, blk_geom->bheight * 2);
5341 :
5342 11709800 : if (pu_origin_y != 0 && pu_origin_x != 0)
5343 10742600 : topNeighArray[0] = leftNeighArray[0] = luma_recon_neighbor_array->top_left_array[MAX_PICTURE_HEIGHT_SIZE + pu_origin_x - pu_origin_y];
5344 :
5345 : }
5346 :
5347 161193 : else if (plane == 1) {
5348 80596 : dst_ptr = prediction_ptr->buffer_cb + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cb;
5349 80596 : dst_stride = prediction_ptr->stride_cb;
5350 80596 : intra_stride = intra_pred_desc.stride_cb;
5351 :
5352 80596 : if (cu_originy_uv != 0)
5353 75211 : memcpy(topNeighArray + 1, cb_recon_neighbor_array->top_array + cu_originx_uv, blk_geom->bwidth_uv * 2);
5354 :
5355 80596 : if (cu_originx_uv != 0)
5356 70003 : memcpy(leftNeighArray + 1, cb_recon_neighbor_array->left_array + cu_originy_uv, blk_geom->bheight_uv * 2);
5357 :
5358 80596 : if (cu_originy_uv != 0 && cu_originx_uv != 0)
5359 64833 : topNeighArray[0] = leftNeighArray[0] = cb_recon_neighbor_array->top_left_array[MAX_PICTURE_HEIGHT_SIZE / 2 + cu_originx_uv - cu_originy_uv / 2];
5360 : }
5361 : else {
5362 80597 : dst_ptr = prediction_ptr->buffer_cr + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cr;
5363 80597 : dst_stride = prediction_ptr->stride_cr;
5364 80597 : intra_stride = intra_pred_desc.stride_cr;
5365 :
5366 80597 : if (cu_originy_uv != 0)
5367 75212 : memcpy(topNeighArray + 1, cr_recon_neighbor_array->top_array + cu_originx_uv, blk_geom->bwidth_uv * 2);
5368 :
5369 80597 : if (cu_originx_uv != 0)
5370 70003 : memcpy(leftNeighArray + 1, cr_recon_neighbor_array->left_array + cu_originy_uv, blk_geom->bheight_uv * 2);
5371 :
5372 80597 : if (cu_originy_uv != 0 && cu_originx_uv != 0)
5373 64833 : topNeighArray[0] = leftNeighArray[0] = cr_recon_neighbor_array->top_left_array[MAX_PICTURE_HEIGHT_SIZE / 2 + cu_originx_uv - cu_originy_uv / 2];
5374 : }
5375 11871000 : TxSize tx_size = blk_geom->txsize[0][0]; // Nader - Intra 128x128 not supported
5376 11871000 : TxSize tx_size_Chroma = blk_geom->txsize_uv[0][0]; //Nader - Intra 128x128 not supported
5377 :
5378 23742100 : eb_av1_predict_intra_block(
5379 : tile,
5380 : !ED_STAGE,
5381 : blk_geom,
5382 11871000 : picture_control_set_ptr->parent_pcs_ptr->av1_cm, //const Av1Common *cm,
5383 11871000 : plane ? blk_geom->bwidth_uv : blk_geom->bwidth, //int32_t wpx,
5384 11871000 : plane ? blk_geom->bheight_uv : blk_geom->bheight, //int32_t hpx,
5385 : plane ? tx_size_Chroma : tx_size, //TxSize tx_size,
5386 11871000 : interintra_to_intra_mode[interintra_mode], //PredictionMode mode,
5387 : 0,
5388 : 0, //int32_t use_palette,
5389 : #if PAL_SUP
5390 : NULL, //inter-intra
5391 : #endif
5392 : FILTER_INTRA_MODES, // FilterIntraMode filter_intra_mode,
5393 : topNeighArray + 1,
5394 : leftNeighArray + 1,
5395 : &intra_pred_desc, //uint8_t *dst,
5396 : //int32_t dst_stride,
5397 : 0, //int32_t col_off,
5398 : 0, //int32_t row_off,
5399 : plane, //int32_t plane,
5400 11871000 : blk_geom->bsize, //uint32_t puSize,
5401 : dst_origin_x,
5402 : dst_origin_y,
5403 : pu_origin_x,
5404 : pu_origin_y,
5405 : 0, //uint32_t cuOrgX used only for prediction Ptr
5406 : 0 //uint32_t cuOrgY used only for prediction Ptr
5407 : );
5408 : //combine_interintra
5409 11873600 : combine_interintra(
5410 : interintra_mode,
5411 : use_wedge_interintra,
5412 : interintra_wedge_index,
5413 : INTERINTRA_WEDGE_SIGN,
5414 11873600 : blk_geom->bsize,
5415 : plane_bsize,
5416 : dst_ptr,
5417 : dst_stride,
5418 : dst_ptr, // Inter pred buff
5419 : dst_stride, // Inter pred stride
5420 : (plane == 0) ? intra_pred : (plane == 1) ? intra_pred_cb : intra_pred_cr, // Intra pred buff
5421 : intra_stride); // Intra pred stride
5422 :
5423 : }
5424 : }
5425 : #endif
5426 : #if OBMC_FLAG
5427 328724000 : if (motion_mode == OBMC_CAUSAL)
5428 : {
5429 :
5430 : uint8_t * tmp_obmc_bufs[2];
5431 :
5432 : DECLARE_ALIGNED(16, uint8_t, obmc_buff_0[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
5433 : DECLARE_ALIGNED(16, uint8_t, obmc_buff_1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
5434 13101400 : tmp_obmc_bufs[0] = obmc_buff_0;
5435 13101400 : tmp_obmc_bufs[1] = obmc_buff_1;
5436 :
5437 : uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
5438 13101400 : int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
5439 13101400 : int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
5440 :
5441 : {
5442 13101400 : dst_buf1[0] = tmp_obmc_bufs[0];
5443 13101400 : dst_buf1[1] = tmp_obmc_bufs[0] + MAX_SB_SQUARE;
5444 13101400 : dst_buf1[2] = tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
5445 13101400 : dst_buf2[0] = tmp_obmc_bufs[1];
5446 13101400 : dst_buf2[1] = tmp_obmc_bufs[1] + MAX_SB_SQUARE;
5447 13101400 : dst_buf2[2] = tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
5448 : }
5449 :
5450 13101400 : int mi_row = pu_origin_y >> 2;
5451 13101400 : int mi_col = pu_origin_x >> 2;
5452 :
5453 13101400 : if (use_precomputed_obmc)
5454 : {
5455 13098700 : dst_buf1[0] = md_context->obmc_buff_0;
5456 13098700 : dst_buf1[1] = md_context->obmc_buff_0 + MAX_SB_SQUARE;
5457 13098700 : dst_buf1[2] = md_context->obmc_buff_0 + MAX_SB_SQUARE*2;
5458 13098700 : dst_buf2[0] = md_context->obmc_buff_1;
5459 13098700 : dst_buf2[1] = md_context->obmc_buff_1 + MAX_SB_SQUARE;
5460 13098700 : dst_buf2[2] = md_context->obmc_buff_1 + MAX_SB_SQUARE*2;
5461 : }
5462 : else
5463 : {
5464 2717 : build_prediction_by_above_preds(
5465 : perform_chroma,
5466 2717 : blk_geom->bsize, picture_control_set_ptr, cu_ptr->av1xd, mi_row, mi_col, dst_buf1,
5467 : dst_stride1);
5468 :
5469 1794 : build_prediction_by_left_preds(
5470 : perform_chroma,
5471 1794 : blk_geom->bsize, picture_control_set_ptr, cu_ptr->av1xd, mi_row, mi_col, dst_buf2,
5472 : dst_stride2);
5473 : }
5474 :
5475 13100500 : uint8_t * final_dst_ptr_y = prediction_ptr->buffer_y + prediction_ptr->origin_x + dst_origin_x + (prediction_ptr->origin_y + dst_origin_y) * prediction_ptr->stride_y;
5476 13100500 : uint16_t final_dst_stride_y = prediction_ptr->stride_y;
5477 :
5478 13100500 : uint8_t * final_dst_ptr_u = prediction_ptr->buffer_cb + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cb;
5479 13100500 : uint16_t final_dst_stride_u = prediction_ptr->stride_cb;
5480 :
5481 13100500 : uint8_t * final_dst_ptr_v = prediction_ptr->buffer_cr + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cr;
5482 13100500 : uint16_t final_dst_stride_v = prediction_ptr->stride_cr;
5483 :
5484 13100500 : av1_build_obmc_inter_prediction(
5485 : final_dst_ptr_y,
5486 : final_dst_stride_y,
5487 : final_dst_ptr_u,
5488 : final_dst_stride_u,
5489 : final_dst_ptr_v,
5490 : final_dst_stride_v,
5491 : perform_chroma,
5492 13100500 : blk_geom->bsize,
5493 : picture_control_set_ptr,
5494 : cu_ptr->av1xd,
5495 : mi_row,
5496 : mi_col,
5497 : dst_buf1,
5498 : dst_stride1,
5499 : dst_buf2,
5500 : dst_stride2);
5501 : }
5502 : #endif
5503 328725000 : return return_error;
5504 : }
5505 :
5506 :
5507 :
5508 0 : EbErrorType av1_inter_prediction_hbd(
5509 : PictureControlSet *picture_control_set_ptr,
5510 : uint32_t interp_filters,
5511 : CodingUnit *cu_ptr,
5512 : uint8_t ref_frame_type,
5513 : MvUnit *mv_unit,
5514 : uint8_t use_intrabc,
5515 : #if OBMC_FLAG
5516 : MotionMode motion_mode,
5517 : uint8_t use_precomputed_obmc,
5518 : struct ModeDecisionContext *md_context,
5519 : #endif
5520 : uint8_t compound_idx,
5521 : InterInterCompoundData *interinter_comp,
5522 : #if II_COMP_FLAG
5523 : TileInfo * tile,
5524 : NeighborArrayUnit *luma_recon_neighbor_array,
5525 : NeighborArrayUnit *cb_recon_neighbor_array ,
5526 : NeighborArrayUnit *cr_recon_neighbor_array ,
5527 : uint8_t is_interintra_used ,
5528 : INTERINTRA_MODE interintra_mode,
5529 : uint8_t use_wedge_interintra,
5530 : int32_t interintra_wedge_index,
5531 : #endif
5532 : uint16_t pu_origin_x,
5533 : uint16_t pu_origin_y,
5534 : uint8_t bwidth,
5535 : uint8_t bheight,
5536 : EbPictureBufferDesc *ref_pic_list0,
5537 : EbPictureBufferDesc *ref_pic_list1,
5538 : EbPictureBufferDesc *prediction_ptr,
5539 : uint16_t dst_origin_x,
5540 : uint16_t dst_origin_y,
5541 : EbBool perform_chroma,
5542 : uint8_t bit_depth)
5543 : {
5544 : (void)use_precomputed_obmc;
5545 : (void) md_context;
5546 :
5547 0 : EbErrorType return_error = EB_ErrorNone;
5548 0 : uint8_t is_compound = (mv_unit->pred_direction == BI_PRED) ? 1 : 0;
5549 : DECLARE_ALIGNED(32, uint16_t, tmp_dstY[128 * 128]);//move this to context if stack does not hold.
5550 : DECLARE_ALIGNED(32, uint16_t, tmp_dstCb[64 * 64]);
5551 : DECLARE_ALIGNED(32, uint16_t, tmp_dstCr[64 * 64]);
5552 : MV mv, mv_q4;
5553 :
5554 : int32_t subpel_x, subpel_y;
5555 : uint16_t * src_ptr;
5556 : uint16_t * dst_ptr;
5557 : int32_t src_stride;
5558 : int32_t dst_stride;
5559 : ConvolveParams conv_params;
5560 : InterpFilterParams filter_params_x, filter_params_y;
5561 :
5562 0 : const BlockGeom * blk_geom = get_blk_geom_mds(cu_ptr->mds_idx);
5563 :
5564 : #if OBMC_FLAG
5565 0 : if (motion_mode == OBMC_CAUSAL) {
5566 0 : assert(is_compound == 0);
5567 0 : assert(blk_geom->bwidth > 4 && blk_geom->bheight > 4);
5568 : }
5569 : #endif
5570 : //special treatment for chroma in 4XN/NX4 blocks
5571 : //if one of the neighbour blocks of the parent square is intra the chroma prediction will follow the normal path using the luma MV of the current nsq block which is the latest sub8x8.
5572 : //for this case: only uniPred is allowed.
5573 :
5574 0 : int32_t sub8x8_inter = 0;
5575 :
5576 0 : if(perform_chroma && (blk_geom->has_uv && (blk_geom->bwidth == 4 || blk_geom->bheight == 4)))
5577 :
5578 : {
5579 : //CHKN setup input param
5580 0 : int32_t bw = blk_geom->bwidth_uv;
5581 0 : int32_t bh = blk_geom->bheight_uv;
5582 : UNUSED(bw);
5583 : UNUSED(bh);
5584 :
5585 0 : uint32_t mi_x = pu_origin_x; //these are luma picture wise
5586 0 : uint32_t mi_y = pu_origin_y;
5587 :
5588 0 : MacroBlockD *xd = cu_ptr->av1xd;
5589 0 : xd->mi_stride = picture_control_set_ptr->mi_stride;
5590 0 : const int32_t offset = (mi_y >> MI_SIZE_LOG2) * xd->mi_stride + (mi_x >> MI_SIZE_LOG2);
5591 0 : xd->mi = picture_control_set_ptr->mi_grid_base + offset;
5592 :
5593 : //CHKN fill current mi from current block
5594 : {
5595 0 : ModeInfo *miPtr = *xd->mi;
5596 : uint8_t miX, miY;
5597 : MvReferenceFrame rf[2];
5598 0 : av1_set_ref_frame(rf, ref_frame_type);
5599 0 : for (miY = 0; miY < (blk_geom->bheight >> MI_SIZE_LOG2); miY++) {
5600 0 : for (miX = 0; miX < (blk_geom->bwidth >> MI_SIZE_LOG2); miX++) {
5601 0 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.use_intrabc = use_intrabc;
5602 0 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.ref_frame[0] = rf[0];
5603 0 : if (mv_unit->pred_direction == UNI_PRED_LIST_0) {
5604 0 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.mv[0].as_mv.col = mv_unit->mv[REF_LIST_0].x;
5605 0 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.mv[0].as_mv.row = mv_unit->mv[REF_LIST_0].y;
5606 : }
5607 0 : else if (mv_unit->pred_direction == UNI_PRED_LIST_1) {
5608 0 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.mv[0].as_mv.col = mv_unit->mv[REF_LIST_1].x;
5609 0 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.mv[0].as_mv.row = mv_unit->mv[REF_LIST_1].y;
5610 : }
5611 : else {
5612 0 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.mv[0].as_mv.col = mv_unit->mv[REF_LIST_0].x;
5613 0 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.mv[0].as_mv.row = mv_unit->mv[REF_LIST_0].y;
5614 0 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.mv[1].as_mv.col = mv_unit->mv[REF_LIST_1].x;
5615 0 : miPtr[miX + miY * xd->mi_stride].mbmi.block_mi.mv[1].as_mv.row = mv_unit->mv[REF_LIST_1].y;
5616 : }
5617 : }
5618 : }
5619 : }
5620 :
5621 0 : int32_t build_for_obmc = 0;
5622 :
5623 0 : const BlockSize bsize = blk_geom->bsize;//mi->sb_type;
5624 0 : assert(bsize < BlockSizeS_ALL);
5625 0 : const int32_t ss_x = 1;// pd->subsampling_x;
5626 0 : const int32_t ss_y = 1;//pd->subsampling_y;
5627 0 : sub8x8_inter = (block_size_wide[bsize] < 8 && ss_x) ||
5628 0 : (block_size_high[bsize] < 8 && ss_y);
5629 :
5630 0 : if (use_intrabc) sub8x8_inter = 0;
5631 : // For sub8x8 chroma blocks, we may be covering more than one luma block's
5632 : // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
5633 : // the top-left corner of the prediction source - the correct top-left corner
5634 : // is at (pre_x, pre_y).
5635 0 : const int32_t row_start =
5636 0 : (block_size_high[bsize] == 4) && ss_y && !build_for_obmc ? -1 : 0;
5637 0 : const int32_t col_start =
5638 0 : (block_size_wide[bsize] == 4) && ss_x && !build_for_obmc ? -1 : 0;
5639 :
5640 0 : const int32_t pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
5641 0 : const int32_t pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
5642 : UNUSED(pre_x);
5643 : UNUSED(pre_y);
5644 :
5645 0 : sub8x8_inter = sub8x8_inter && !build_for_obmc;
5646 0 : if (sub8x8_inter) {
5647 0 : for (int32_t row = row_start; row <= 0 && sub8x8_inter; ++row) {
5648 0 : for (int32_t col = col_start; col <= 0; ++col) {
5649 0 : ModeInfo *miPtr = *xd->mi;
5650 0 : const MbModeInfo *this_mbmi = &miPtr[row * xd->mi_stride + col].mbmi;
5651 :
5652 0 : if (!is_inter_block(&this_mbmi->block_mi)) sub8x8_inter = 0;
5653 : }
5654 : }
5655 : }
5656 :
5657 0 : if (sub8x8_inter) {
5658 : // block size
5659 0 : const int32_t b4_w = block_size_wide[bsize] >> ss_x;
5660 0 : const int32_t b4_h = block_size_high[bsize] >> ss_y;
5661 0 : const BlockSize plane_bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
5662 0 : assert(plane_bsize < BlockSizeS_ALL);
5663 0 : const int32_t b8_w = block_size_wide[plane_bsize] >> ss_x;
5664 0 : const int32_t b8_h = block_size_high[plane_bsize] >> ss_y;
5665 :
5666 0 : assert(!is_compound);
5667 :
5668 0 : int32_t row = row_start;
5669 : int32_t src_stride;
5670 0 : for (int32_t y = 0; y < b8_h; y += b4_h) {
5671 0 : int32_t col = col_start;
5672 0 : for (int32_t x = 0; x < b8_w; x += b4_w) {
5673 0 : ModeInfo *miPtr = *xd->mi;
5674 0 : const MbModeInfo *this_mbmi = &miPtr[row * xd->mi_stride + col].mbmi;
5675 :
5676 0 : int32_t tmp_dst_stride = 8;
5677 : UNUSED(tmp_dst_stride);
5678 0 : assert(bw < 8 || bh < 8);
5679 :
5680 0 : conv_params = get_conv_params_no_round(0, 0, 0, tmp_dstCb, BLOCK_SIZE_64, is_compound, bit_depth);
5681 0 : conv_params.use_jnt_comp_avg = 0;
5682 0 : uint8_t ref_idx = get_ref_frame_idx(this_mbmi->block_mi.ref_frame[0]);
5683 0 : assert(ref_idx < REF_LIST_MAX_DEPTH);
5684 0 : EbPictureBufferDesc *ref_pic = this_mbmi->block_mi.ref_frame[0] ==
5685 0 : LAST_FRAME || this_mbmi->block_mi.ref_frame[0] == LAST2_FRAME || this_mbmi->block_mi.ref_frame[0] == LAST3_FRAME || this_mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME ?
5686 0 : ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[REF_LIST_0][ref_idx]->object_ptr)->reference_picture16bit :
5687 0 : ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[REF_LIST_1][ref_idx]->object_ptr)->reference_picture16bit;
5688 0 : src_ptr = (uint16_t*)ref_pic->buffer_cb + (ref_pic->origin_x + ((pu_origin_x >> 3) << 3)) / 2 + (ref_pic->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic->stride_cb;
5689 0 : dst_ptr = (uint16_t*)prediction_ptr->buffer_cb + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cb;
5690 0 : src_stride = ref_pic->stride_cb;
5691 0 : dst_stride = prediction_ptr->stride_cb;
5692 0 : src_ptr = src_ptr + x + y * ref_pic->stride_cb;
5693 0 : dst_ptr = dst_ptr + x + y * prediction_ptr->stride_cb;
5694 :
5695 0 : const MV mv = this_mbmi->block_mi.mv[0].as_mv;
5696 0 : mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv, blk_geom->bwidth_uv, blk_geom->bheight_uv, 1, 1);
5697 0 : subpel_x = mv_q4.col & SUBPEL_MASK;
5698 0 : subpel_y = mv_q4.row & SUBPEL_MASK;
5699 0 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
5700 :
5701 0 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
5702 0 : &filter_params_y, blk_geom->bwidth_uv, blk_geom->bheight_uv);
5703 :
5704 0 : convolveHbd[subpel_x != 0][subpel_y != 0][is_compound](
5705 : src_ptr,
5706 : src_stride,
5707 : dst_ptr,
5708 : dst_stride,
5709 : b4_w,
5710 : b4_h,
5711 : &filter_params_x,
5712 : &filter_params_y,
5713 : subpel_x,
5714 : subpel_y,
5715 : &conv_params,
5716 : bit_depth);
5717 :
5718 : //Cr
5719 0 : conv_params = get_conv_params_no_round(0, 0, 0, tmp_dstCr, BLOCK_SIZE_64, is_compound, bit_depth);
5720 0 : conv_params.use_jnt_comp_avg = 0;
5721 :
5722 0 : src_ptr = (uint16_t*)ref_pic->buffer_cr + (ref_pic->origin_x + ((pu_origin_x >> 3) << 3)) / 2 + (ref_pic->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic->stride_cr;
5723 0 : dst_ptr = (uint16_t*)prediction_ptr->buffer_cr + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cr;
5724 0 : src_stride = ref_pic->stride_cr;
5725 0 : dst_stride = prediction_ptr->stride_cr;
5726 0 : src_ptr = src_ptr + x + y * ref_pic->stride_cr;
5727 0 : dst_ptr = dst_ptr + x + y * prediction_ptr->stride_cr;
5728 :
5729 0 : mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv, blk_geom->bwidth_uv, blk_geom->bheight_uv, 1, 1);
5730 0 : subpel_x = mv_q4.col & SUBPEL_MASK;
5731 0 : subpel_y = mv_q4.row & SUBPEL_MASK;
5732 0 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
5733 :
5734 0 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
5735 0 : &filter_params_y, blk_geom->bwidth_uv, blk_geom->bheight_uv);
5736 :
5737 0 : convolveHbd[subpel_x != 0][subpel_y != 0][is_compound](
5738 : src_ptr,
5739 : src_stride,
5740 : dst_ptr,
5741 : dst_stride,
5742 : b4_w,
5743 : b4_h,
5744 : &filter_params_x,
5745 : &filter_params_y,
5746 : subpel_x,
5747 : subpel_y,
5748 : &conv_params,
5749 : bit_depth);
5750 :
5751 0 : ++col;
5752 : }
5753 0 : ++row;
5754 : }
5755 : }
5756 : }
5757 : #if INTER_INTER_HBD
5758 : MvReferenceFrame rf[2];
5759 0 : av1_set_ref_frame(rf, ref_frame_type);
5760 : #endif
5761 0 : if (mv_unit->pred_direction == UNI_PRED_LIST_0 || mv_unit->pred_direction == BI_PRED) {
5762 : //List0-Y
5763 0 : mv.col = mv_unit->mv[REF_LIST_0].x;
5764 0 : mv.row = mv_unit->mv[REF_LIST_0].y;
5765 :
5766 0 : src_ptr = (uint16_t*)ref_pic_list0->buffer_y + ref_pic_list0->origin_x + pu_origin_x + (ref_pic_list0->origin_y + pu_origin_y) * ref_pic_list0->stride_y;
5767 0 : dst_ptr = (uint16_t*)prediction_ptr->buffer_y + prediction_ptr->origin_x + dst_origin_x + (prediction_ptr->origin_y + dst_origin_y) * prediction_ptr->stride_y;
5768 0 : src_stride = ref_pic_list0->stride_y;
5769 0 : dst_stride = prediction_ptr->stride_y;
5770 :
5771 0 : mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv, bwidth, bheight, 0, 0);//mv_q4 has 1 extra bit for fractionnal to accomodate chroma when accessing filter coeffs.
5772 0 : subpel_x = mv_q4.col & SUBPEL_MASK;
5773 0 : subpel_y = mv_q4.row & SUBPEL_MASK;
5774 0 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
5775 0 : conv_params = get_conv_params_no_round(0, 0, 0, tmp_dstY, 128, is_compound, bit_depth);
5776 :
5777 0 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
5778 : &filter_params_y, bwidth, bheight);
5779 :
5780 0 : convolveHbd[subpel_x != 0][subpel_y != 0][is_compound](
5781 : src_ptr,
5782 : src_stride,
5783 : dst_ptr,
5784 : dst_stride,
5785 : bwidth,
5786 : bheight,
5787 : &filter_params_x,
5788 : &filter_params_y,
5789 : subpel_x,
5790 : subpel_y,
5791 : &conv_params,
5792 : bit_depth);
5793 :
5794 0 : if (perform_chroma && blk_geom->has_uv && sub8x8_inter == 0) {
5795 : //List0-Cb
5796 0 : src_ptr = (uint16_t*)ref_pic_list0->buffer_cb + (ref_pic_list0->origin_x + ((pu_origin_x >> 3) << 3)) / 2 + (ref_pic_list0->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic_list0->stride_cb;
5797 0 : dst_ptr = (uint16_t*)prediction_ptr->buffer_cb + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cb;
5798 0 : src_stride = ref_pic_list0->stride_cb;
5799 0 : dst_stride = prediction_ptr->stride_cb;
5800 :
5801 0 : mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv, blk_geom->bwidth_uv, blk_geom->bheight_uv, 1, 1);
5802 0 : subpel_x = mv_q4.col & SUBPEL_MASK;
5803 0 : subpel_y = mv_q4.row & SUBPEL_MASK;
5804 0 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
5805 0 : conv_params = get_conv_params_no_round(0, 0, 0, tmp_dstCb, 64, is_compound, bit_depth);
5806 :
5807 0 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
5808 0 : &filter_params_y, blk_geom->bwidth_uv, blk_geom->bheight_uv);
5809 :
5810 0 : if (use_intrabc && (subpel_x != 0 || subpel_y != 0))
5811 0 : highbd_convolve_2d_for_intrabc(
5812 : (const uint16_t *)src_ptr,
5813 : src_stride,
5814 : dst_ptr,
5815 : dst_stride,
5816 0 : blk_geom->bwidth_uv,
5817 0 : blk_geom->bheight_uv,
5818 : subpel_x,
5819 : subpel_y,
5820 : &conv_params,
5821 : bit_depth);
5822 : else
5823 0 : convolveHbd[subpel_x != 0][subpel_y != 0][is_compound](
5824 : src_ptr,
5825 : src_stride,
5826 : dst_ptr,
5827 : dst_stride,
5828 0 : blk_geom->bwidth_uv,
5829 0 : blk_geom->bheight_uv,
5830 : &filter_params_x,
5831 : &filter_params_y,
5832 : subpel_x,
5833 : subpel_y,
5834 : &conv_params,
5835 : bit_depth);
5836 :
5837 : //List0-Cr
5838 0 : src_ptr = (uint16_t*)ref_pic_list0->buffer_cr + (ref_pic_list0->origin_x + ((pu_origin_x >> 3) << 3)) / 2 + (ref_pic_list0->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic_list0->stride_cr;
5839 0 : dst_ptr = (uint16_t*)prediction_ptr->buffer_cr + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cr;
5840 0 : src_stride = ref_pic_list0->stride_cr;
5841 0 : dst_stride = prediction_ptr->stride_cr;
5842 :
5843 0 : mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv, blk_geom->bwidth_uv, blk_geom->bheight_uv, 1, 1);
5844 0 : subpel_x = mv_q4.col & SUBPEL_MASK;
5845 0 : subpel_y = mv_q4.row & SUBPEL_MASK;
5846 0 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
5847 0 : conv_params = get_conv_params_no_round(0, 0, 0, tmp_dstCr, 64, is_compound, bit_depth);
5848 0 : if (use_intrabc && (subpel_x != 0 || subpel_y != 0))
5849 0 : highbd_convolve_2d_for_intrabc(
5850 : (const uint16_t *)src_ptr,
5851 : src_stride,
5852 : dst_ptr,
5853 : dst_stride,
5854 0 : blk_geom->bwidth_uv,
5855 0 : blk_geom->bheight_uv,
5856 : subpel_x,
5857 : subpel_y,
5858 : &conv_params,
5859 : bit_depth);
5860 : else
5861 0 : convolveHbd[subpel_x != 0][subpel_y != 0][is_compound](
5862 : src_ptr,
5863 : src_stride,
5864 : dst_ptr,
5865 : dst_stride,
5866 0 : blk_geom->bwidth_uv,
5867 0 : blk_geom->bheight_uv,
5868 : &filter_params_x,
5869 : &filter_params_y,
5870 : subpel_x,
5871 : subpel_y,
5872 : &conv_params,
5873 : bit_depth);
5874 : }
5875 : }
5876 :
5877 0 : if (mv_unit->pred_direction == UNI_PRED_LIST_1 || mv_unit->pred_direction == BI_PRED) {
5878 : //List0-Y
5879 0 : mv.col = mv_unit->mv[REF_LIST_1].x;
5880 0 : mv.row = mv_unit->mv[REF_LIST_1].y;
5881 :
5882 0 : src_ptr = (uint16_t*)ref_pic_list1->buffer_y + ref_pic_list1->origin_x + pu_origin_x + (ref_pic_list1->origin_y + pu_origin_y) * ref_pic_list1->stride_y;
5883 0 : dst_ptr = (uint16_t*)prediction_ptr->buffer_y + prediction_ptr->origin_x + dst_origin_x + (prediction_ptr->origin_y + dst_origin_y) * prediction_ptr->stride_y;
5884 0 : src_stride = ref_pic_list1->stride_y;
5885 0 : dst_stride = prediction_ptr->stride_y;
5886 :
5887 0 : mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv, bwidth, bheight, 0, 0);//mv_q4 has 1 extra bit for fractionnal to accomodate chroma when accessing filter coeffs.
5888 0 : subpel_x = mv_q4.col & SUBPEL_MASK;
5889 0 : subpel_y = mv_q4.row & SUBPEL_MASK;
5890 :
5891 0 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
5892 0 : conv_params = get_conv_params_no_round(0, (mv_unit->pred_direction == BI_PRED) ? 1 : 0, 0, tmp_dstY, 128, is_compound, bit_depth);
5893 : #if INTER_INTER_HBD
5894 0 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
5895 : &filter_params_y, bwidth, bheight);
5896 :
5897 : //the luma data is applied to chroma below
5898 0 : av1_dist_wtd_comp_weight_assign(
5899 0 : &picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header,
5900 0 : picture_control_set_ptr->parent_pcs_ptr->cur_order_hint,// cur_frame_index,
5901 0 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[0] - 1],// bck_frame_index,
5902 0 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[1] - 1],// fwd_frame_index,
5903 : compound_idx,
5904 : 0,// order_idx,
5905 : &conv_params.fwd_offset, &conv_params.bck_offset,
5906 : &conv_params.use_dist_wtd_comp_avg, is_compound);
5907 :
5908 0 : conv_params.use_jnt_comp_avg = conv_params.use_dist_wtd_comp_avg;
5909 : #endif
5910 :
5911 :
5912 : #if INTER_INTER_HBD
5913 0 : if (is_compound && is_masked_compound_type(interinter_comp->type)) {
5914 0 : conv_params.do_average = 0;
5915 0 : av1_make_masked_inter_predictor_hbd(
5916 : src_ptr,
5917 : src_stride,
5918 : dst_ptr,
5919 : dst_stride,
5920 : blk_geom,
5921 : bwidth,
5922 : bheight,
5923 : &filter_params_x,
5924 : &filter_params_y,
5925 : subpel_x,
5926 : subpel_y,
5927 : &conv_params,
5928 : interinter_comp,
5929 : bit_depth,
5930 : 0//plane=Luma seg_mask is computed based on luma and used for chroma
5931 : );
5932 : }
5933 : else
5934 : #endif
5935 0 : convolveHbd[subpel_x != 0][subpel_y != 0][is_compound](
5936 : src_ptr,
5937 : src_stride,
5938 : dst_ptr,
5939 : dst_stride,
5940 : bwidth,
5941 : bheight,
5942 : &filter_params_x,
5943 : &filter_params_y,
5944 : subpel_x,
5945 : subpel_y,
5946 : &conv_params,
5947 : bit_depth);
5948 :
5949 0 : if (perform_chroma && blk_geom->has_uv && sub8x8_inter == 0) {
5950 : //List0-Cb
5951 0 : src_ptr = (uint16_t*)ref_pic_list1->buffer_cb + (ref_pic_list1->origin_x + ((pu_origin_x >> 3) << 3)) / 2 + (ref_pic_list1->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic_list1->stride_cb;
5952 0 : dst_ptr = (uint16_t*)prediction_ptr->buffer_cb + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cb;
5953 0 : src_stride = ref_pic_list1->stride_cb;
5954 0 : dst_stride = prediction_ptr->stride_cb;
5955 :
5956 0 : mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv, blk_geom->bwidth_uv, blk_geom->bheight_uv, 1, 1);
5957 0 : subpel_x = mv_q4.col & SUBPEL_MASK;
5958 0 : subpel_y = mv_q4.row & SUBPEL_MASK;
5959 0 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
5960 0 : conv_params = get_conv_params_no_round(0, (mv_unit->pred_direction == BI_PRED) ? 1 : 0, 0, tmp_dstCb, 64, is_compound, bit_depth);
5961 : #if INTER_INTER_HBD
5962 0 : av1_dist_wtd_comp_weight_assign(
5963 0 : &picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header,
5964 0 : picture_control_set_ptr->parent_pcs_ptr->cur_order_hint,// cur_frame_index,
5965 0 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[0] - 1],// bck_frame_index,
5966 0 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[1] - 1],// fwd_frame_index,
5967 : compound_idx,
5968 : 0,// order_idx,
5969 : &conv_params.fwd_offset, &conv_params.bck_offset,
5970 : &conv_params.use_dist_wtd_comp_avg, is_compound);
5971 :
5972 0 : conv_params.use_jnt_comp_avg = conv_params.use_dist_wtd_comp_avg;
5973 : #endif
5974 0 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
5975 0 : &filter_params_y, blk_geom->bwidth_uv, blk_geom->bheight_uv);
5976 : #if INTER_INTER_HBD
5977 0 : if (is_compound && is_masked_compound_type(interinter_comp->type)) {
5978 0 : conv_params.do_average = 0;
5979 0 : av1_make_masked_inter_predictor_hbd(
5980 : src_ptr,
5981 : src_stride,
5982 : dst_ptr,
5983 : dst_stride,
5984 : blk_geom,
5985 0 : blk_geom->bwidth_uv,
5986 0 : blk_geom->bheight_uv,
5987 : &filter_params_x,
5988 : &filter_params_y,
5989 : subpel_x,
5990 : subpel_y,
5991 : &conv_params,
5992 : interinter_comp,
5993 : bit_depth,
5994 : 1//plane=cb seg_mask is computed based on luma and used for chroma
5995 : );
5996 : }
5997 : else
5998 : #endif
5999 0 : convolveHbd[subpel_x != 0][subpel_y != 0][is_compound](
6000 : src_ptr,
6001 : src_stride,
6002 : dst_ptr,
6003 : dst_stride,
6004 0 : blk_geom->bwidth_uv,
6005 0 : blk_geom->bheight_uv,
6006 : &filter_params_x,
6007 : &filter_params_y,
6008 : subpel_x,
6009 : subpel_y,
6010 : &conv_params,
6011 : bit_depth);
6012 :
6013 : //List0-Cr
6014 0 : src_ptr = (uint16_t*)ref_pic_list1->buffer_cr + (ref_pic_list1->origin_x + ((pu_origin_x >> 3) << 3)) / 2 + (ref_pic_list1->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic_list1->stride_cr;
6015 0 : dst_ptr = (uint16_t*)prediction_ptr->buffer_cr + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cr;
6016 0 : src_stride = ref_pic_list1->stride_cr;
6017 0 : dst_stride = prediction_ptr->stride_cr;
6018 :
6019 0 : mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv, blk_geom->bwidth_uv, blk_geom->bheight_uv, 1, 1);
6020 0 : subpel_x = mv_q4.col & SUBPEL_MASK;
6021 0 : subpel_y = mv_q4.row & SUBPEL_MASK;
6022 0 : src_ptr = src_ptr + (mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS);
6023 0 : conv_params = get_conv_params_no_round(0, (mv_unit->pred_direction == BI_PRED) ? 1 : 0, 0, tmp_dstCr, 64, is_compound, bit_depth);
6024 :
6025 : #if INTER_INTER_HBD
6026 0 : av1_dist_wtd_comp_weight_assign(
6027 0 : &picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header,
6028 0 : picture_control_set_ptr->parent_pcs_ptr->cur_order_hint,// cur_frame_index,
6029 0 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[0] - 1],// bck_frame_index,
6030 0 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[1] - 1],// fwd_frame_index,
6031 : compound_idx,
6032 : 0,// order_idx,
6033 : &conv_params.fwd_offset, &conv_params.bck_offset,
6034 : &conv_params.use_dist_wtd_comp_avg, is_compound);
6035 :
6036 0 : conv_params.use_jnt_comp_avg = conv_params.use_dist_wtd_comp_avg;
6037 :
6038 0 : if (is_compound && is_masked_compound_type(interinter_comp->type)) {
6039 0 : conv_params.do_average = 0;
6040 0 : av1_make_masked_inter_predictor_hbd(
6041 : src_ptr,
6042 : src_stride,
6043 : dst_ptr,
6044 : dst_stride,
6045 : blk_geom,
6046 0 : blk_geom->bwidth_uv,
6047 0 : blk_geom->bheight_uv,
6048 : &filter_params_x,
6049 : &filter_params_y,
6050 : subpel_x,
6051 : subpel_y,
6052 : &conv_params,
6053 : interinter_comp,
6054 : bit_depth,
6055 : 1//plane=Cr seg_mask is computed based on luma and used for chroma
6056 : );
6057 : }
6058 : else
6059 : #endif
6060 0 : convolveHbd[subpel_x != 0][subpel_y != 0][is_compound](
6061 : src_ptr,
6062 : src_stride,
6063 : dst_ptr,
6064 : dst_stride,
6065 0 : blk_geom->bwidth_uv,
6066 0 : blk_geom->bheight_uv,
6067 : &filter_params_x,
6068 : &filter_params_y,
6069 : subpel_x,
6070 : subpel_y,
6071 : &conv_params,
6072 : bit_depth);
6073 : }
6074 : }
6075 :
6076 : #if INTER_INTRA_HBD
6077 0 : if ( is_interintra_used ) {
6078 0 : int32_t start_plane = 0;
6079 0 : int32_t end_plane = perform_chroma && blk_geom->has_uv ? MAX_MB_PLANE: 1;
6080 : // temp buffer for intra pred
6081 : DECLARE_ALIGNED(16, uint8_t, intra_pred[MAX_SB_SQUARE]);
6082 : DECLARE_ALIGNED(16, uint8_t, intra_pred_cb[MAX_SB_SQUARE]);
6083 : DECLARE_ALIGNED(16, uint8_t, intra_pred_cr[MAX_SB_SQUARE]);
6084 :
6085 : int32_t intra_stride;
6086 :
6087 0 : for (int32_t plane = start_plane; plane < end_plane; ++plane) {
6088 :
6089 : EbPictureBufferDesc intra_pred_desc;
6090 0 : intra_pred_desc.origin_x = intra_pred_desc.origin_y = 0;
6091 0 : intra_pred_desc.stride_y = bwidth;
6092 0 : intra_pred_desc.stride_cb = bwidth/2;
6093 0 : intra_pred_desc.stride_cr = bwidth/2;
6094 0 : intra_pred_desc.buffer_y = intra_pred;
6095 0 : intra_pred_desc.buffer_cb = intra_pred_cb;
6096 0 : intra_pred_desc.buffer_cr = intra_pred_cr;
6097 :
6098 0 : const int ssx = plane ? 1 : 0;
6099 0 : const int ssy = plane ? 1 : 0;
6100 0 : const BlockSize plane_bsize = get_plane_block_size(blk_geom->bsize, ssx, ssy);
6101 : //av1_build_interintra_predictors_sbp
6102 : uint16_t topNeighArray[64 * 2 + 1];
6103 : uint16_t leftNeighArray[64 * 2 + 1];
6104 :
6105 0 : uint32_t cu_originx_uv = (pu_origin_x >> 3 << 3) >> 1;
6106 0 : uint32_t cu_originy_uv = (pu_origin_y >> 3 << 3) >> 1;
6107 :
6108 0 : if (plane == 0) {
6109 0 : dst_ptr = (uint16_t*)prediction_ptr->buffer_y + prediction_ptr->origin_x + dst_origin_x + (prediction_ptr->origin_y + dst_origin_y) * prediction_ptr->stride_y;
6110 0 : dst_stride = prediction_ptr->stride_y;
6111 0 : intra_stride = intra_pred_desc.stride_y;
6112 :
6113 0 : if (pu_origin_y != 0)
6114 0 : memcpy(topNeighArray + 1, (uint16_t*)luma_recon_neighbor_array->top_array + pu_origin_x, blk_geom->bwidth * 2 * sizeof(uint16_t));
6115 :
6116 0 : if (pu_origin_x != 0)
6117 0 : memcpy(leftNeighArray + 1, (uint16_t*)luma_recon_neighbor_array->left_array + pu_origin_y, blk_geom->bheight * 2 * sizeof(uint16_t));
6118 :
6119 0 : if (pu_origin_y != 0 && pu_origin_x != 0)
6120 0 : topNeighArray[0] = leftNeighArray[0] = ((uint16_t*)luma_recon_neighbor_array->top_left_array)[MAX_PICTURE_HEIGHT_SIZE + pu_origin_x - pu_origin_y];
6121 : }
6122 :
6123 0 : else if (plane == 1) {
6124 0 : dst_ptr = (uint16_t*)prediction_ptr->buffer_cb + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cb;
6125 0 : dst_stride = prediction_ptr->stride_cb;
6126 0 : intra_stride = intra_pred_desc.stride_cb;
6127 :
6128 0 : if (cu_originy_uv != 0)
6129 0 : memcpy(topNeighArray + 1, (uint16_t*)cb_recon_neighbor_array->top_array + cu_originx_uv, blk_geom->bwidth_uv * 2 * sizeof(uint16_t));
6130 :
6131 0 : if (cu_originx_uv != 0)
6132 0 : memcpy(leftNeighArray + 1, (uint16_t*)cb_recon_neighbor_array->left_array + cu_originy_uv, blk_geom->bheight_uv * 2 * sizeof(uint16_t));
6133 :
6134 0 : if (cu_originy_uv != 0 && cu_originx_uv != 0)
6135 0 : topNeighArray[0] = leftNeighArray[0] = ((uint16_t*)cb_recon_neighbor_array->top_left_array)[MAX_PICTURE_HEIGHT_SIZE / 2 + cu_originx_uv - cu_originy_uv / 2];
6136 : }
6137 : else {
6138 0 : dst_ptr = (uint16_t*)prediction_ptr->buffer_cr + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cr;
6139 0 : dst_stride = prediction_ptr->stride_cr;
6140 0 : intra_stride = intra_pred_desc.stride_cr;
6141 :
6142 0 : if (cu_originy_uv != 0)
6143 0 : memcpy(topNeighArray + 1, (uint16_t*)cr_recon_neighbor_array->top_array + cu_originx_uv, blk_geom->bwidth_uv * 2 * sizeof(uint16_t));
6144 :
6145 0 : if (cu_originx_uv != 0)
6146 0 : memcpy(leftNeighArray + 1, (uint16_t*)cr_recon_neighbor_array->left_array + cu_originy_uv, blk_geom->bheight_uv * 2 * sizeof(uint16_t));
6147 :
6148 0 : if (cu_originy_uv != 0 && cu_originx_uv != 0)
6149 0 : topNeighArray[0] = leftNeighArray[0] = ((uint16_t*)cr_recon_neighbor_array->top_left_array)[MAX_PICTURE_HEIGHT_SIZE / 2 + cu_originx_uv - cu_originy_uv / 2];
6150 : }
6151 0 : TxSize tx_size = blk_geom->txsize[0][0]; // Nader - Intra 128x128 not supported
6152 0 : TxSize tx_size_Chroma = blk_geom->txsize_uv[0][0]; //Nader - Intra 128x128 not supported
6153 :
6154 0 : eb_av1_predict_intra_block_16bit(
6155 : tile,
6156 : !ED_STAGE,
6157 : blk_geom,
6158 0 : picture_control_set_ptr->parent_pcs_ptr->av1_cm, //const Av1Common *cm,
6159 0 : plane ? blk_geom->bwidth_uv : blk_geom->bwidth, //int32_t wpx,
6160 0 : plane ? blk_geom->bheight_uv : blk_geom->bheight, //int32_t hpx,
6161 : plane ? tx_size_Chroma : tx_size, //TxSize tx_size,
6162 0 : interintra_to_intra_mode[interintra_mode], //PredictionMode mode,
6163 : 0,
6164 : 0, //int32_t use_palette,
6165 : #if PAL_SUP
6166 : NULL,
6167 : #endif
6168 : FILTER_INTRA_MODES, // FilterIntraMode filter_intra_mode,
6169 : topNeighArray + 1,
6170 : leftNeighArray + 1,
6171 : &intra_pred_desc, //uint8_t *dst,
6172 : //int32_t dst_stride,
6173 : 0, //int32_t col_off,
6174 : 0, //int32_t row_off,
6175 : plane, //int32_t plane,
6176 0 : blk_geom->bsize, //uint32_t puSize,
6177 : dst_origin_x,
6178 : dst_origin_y,
6179 : pu_origin_x,
6180 : pu_origin_y,
6181 : 0, //uint32_t cuOrgX used only for prediction Ptr
6182 : 0 //uint32_t cuOrgY used only for prediction Ptr
6183 : );
6184 :
6185 : //combine_interintra_highbd
6186 0 : combine_interintra_highbd(
6187 : interintra_mode,
6188 : use_wedge_interintra,
6189 : interintra_wedge_index,
6190 : INTERINTRA_WEDGE_SIGN,
6191 0 : blk_geom->bsize,
6192 : plane_bsize,
6193 : (uint8_t*)dst_ptr,
6194 : dst_stride,
6195 : (uint8_t*)dst_ptr, // Inter pred buff
6196 : dst_stride, // Inter pred stride
6197 : (plane == 0) ? intra_pred : (plane == 1) ? intra_pred_cb : intra_pred_cr, // Intra pred buff
6198 : intra_stride, // Intra pred stride
6199 : bit_depth);
6200 : }
6201 : }
6202 : #endif
6203 : #if OBMC_FLAG
6204 0 : if (motion_mode == OBMC_CAUSAL)
6205 : {
6206 :
6207 : uint16_t * tmp_obmc_bufs[2];
6208 :
6209 : DECLARE_ALIGNED(16, uint16_t, obmc_buff_0[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
6210 : DECLARE_ALIGNED(16, uint16_t, obmc_buff_1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
6211 0 : tmp_obmc_bufs[0] = (uint16_t*)obmc_buff_0;
6212 0 : tmp_obmc_bufs[1] = (uint16_t*)obmc_buff_1;
6213 :
6214 : uint16_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
6215 0 : int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
6216 0 : int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
6217 :
6218 : {
6219 0 : dst_buf1[0] = (uint16_t*)tmp_obmc_bufs[0];
6220 0 : dst_buf1[1] = (uint16_t*)tmp_obmc_bufs[0] + MAX_SB_SQUARE;
6221 0 : dst_buf1[2] = (uint16_t*)tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
6222 0 : dst_buf2[0] = (uint16_t*)tmp_obmc_bufs[1];
6223 0 : dst_buf2[1] = (uint16_t*)tmp_obmc_bufs[1] + MAX_SB_SQUARE;
6224 0 : dst_buf2[2] = (uint16_t*)tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
6225 : }
6226 :
6227 0 : int mi_row = pu_origin_y >> 2;
6228 0 : int mi_col = pu_origin_x >> 2;
6229 :
6230 0 : build_prediction_by_above_preds_hbd(
6231 : perform_chroma,
6232 0 : blk_geom->bsize, picture_control_set_ptr, cu_ptr->av1xd, mi_row, mi_col, dst_buf1,
6233 : dst_stride1);
6234 :
6235 0 : build_prediction_by_left_preds_hbd(
6236 : perform_chroma,
6237 0 : blk_geom->bsize, picture_control_set_ptr, cu_ptr->av1xd, mi_row, mi_col, dst_buf2,
6238 : dst_stride2);
6239 :
6240 0 : uint16_t * final_dst_ptr_y = (uint16_t*) prediction_ptr->buffer_y + prediction_ptr->origin_x + dst_origin_x + (prediction_ptr->origin_y + dst_origin_y) * prediction_ptr->stride_y;
6241 0 : uint16_t final_dst_stride_y = prediction_ptr->stride_y;
6242 :
6243 0 : uint16_t * final_dst_ptr_u = (uint16_t*)prediction_ptr->buffer_cb + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cb;
6244 0 : uint16_t final_dst_stride_u = prediction_ptr->stride_cb;
6245 :
6246 0 : uint16_t * final_dst_ptr_v = (uint16_t*)prediction_ptr->buffer_cr + (prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2 + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cr;
6247 0 : uint16_t final_dst_stride_v = prediction_ptr->stride_cr;
6248 :
6249 0 : av1_build_obmc_inter_prediction_hbd(
6250 : final_dst_ptr_y,
6251 : final_dst_stride_y,
6252 : final_dst_ptr_u,
6253 : final_dst_stride_u,
6254 : final_dst_ptr_v,
6255 : final_dst_stride_v,
6256 : perform_chroma,
6257 0 : blk_geom->bsize,
6258 : picture_control_set_ptr,
6259 : cu_ptr->av1xd,
6260 : mi_row,
6261 : mi_col,
6262 : dst_buf1,
6263 : dst_stride1,
6264 : dst_buf2,
6265 : dst_stride2);
6266 :
6267 : }
6268 : #endif
6269 0 : return return_error;
6270 : }
6271 :
6272 :
6273 621713 : static void chroma_plane_warped_motion_prediction_sub8x8(
6274 : PictureControlSet *picture_control_set_ptr,
6275 : uint8_t compound_idx,
6276 : CodingUnit *cu_ptr,
6277 : const BlockGeom *blk_geom,
6278 : uint8_t bwidth,
6279 : uint8_t bheight,
6280 : uint8_t is_compound,
6281 : uint8_t bit_depth,
6282 : int32_t src_stride,
6283 : int32_t dst_stride,
6284 : uint8_t *src_ptr_l0,
6285 : uint8_t *src_ptr_l1,
6286 : uint8_t *dst_ptr,
6287 : MvReferenceFrame rf[2],
6288 : MvUnit *mv_unit) {
6289 621713 : EbBool is16bit = (EbBool)(bit_depth > EB_8BIT);
6290 : DECLARE_ALIGNED(32, uint16_t, tmp_dst[64 * 64]);
6291 621713 : const uint32_t interp_filters = 0;
6292 : InterpFilterParams filter_params_x, filter_params_y;
6293 :
6294 : MV mv_l0;
6295 621713 : mv_l0.col = mv_unit->mv[REF_LIST_0].x;
6296 621713 : mv_l0.row = mv_unit->mv[REF_LIST_0].y;
6297 :
6298 621713 : MV mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv_l0, blk_geom->bwidth_uv, blk_geom->bheight_uv, 1, 1);
6299 621718 : int32_t subpel_x = mv_q4.col & SUBPEL_MASK;
6300 621718 : int32_t subpel_y = mv_q4.row & SUBPEL_MASK;
6301 621718 : src_ptr_l0 = src_ptr_l0 + (is16bit ? 2 : 1) * ((mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS));
6302 621718 : ConvolveParams conv_params = get_conv_params_no_round(0, 0, 0, tmp_dst, 64, is_compound, bit_depth);
6303 :
6304 621717 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
6305 621717 : &filter_params_y, blk_geom->bwidth_uv, blk_geom->bheight_uv);
6306 :
6307 621712 : if (bit_depth == EB_8BIT)
6308 621712 : convolve[subpel_x != 0][subpel_y != 0][is_compound](
6309 : src_ptr_l0,
6310 : src_stride,
6311 : dst_ptr,
6312 : dst_stride,
6313 : bwidth,
6314 : bheight,
6315 : &filter_params_x,
6316 : &filter_params_y,
6317 : subpel_x,
6318 : subpel_y,
6319 : &conv_params);
6320 : else
6321 0 : convolveHbd[subpel_x != 0][subpel_y != 0][is_compound](
6322 : (uint16_t *)src_ptr_l0,
6323 : src_stride,
6324 : (uint16_t *)dst_ptr,
6325 : dst_stride,
6326 : bwidth,
6327 : bheight,
6328 : &filter_params_x,
6329 : &filter_params_y,
6330 : subpel_x,
6331 : subpel_y,
6332 : &conv_params,
6333 : bit_depth);
6334 :
6335 : //List1-Cb
6336 621705 : if (is_compound) {
6337 : MV mv_l1;
6338 419486 : mv_l1.col = mv_unit->mv[REF_LIST_1].x;
6339 419486 : mv_l1.row = mv_unit->mv[REF_LIST_1].y;
6340 :
6341 419486 : mv_q4 = clamp_mv_to_umv_border_sb(cu_ptr->av1xd, &mv_l1, blk_geom->bwidth_uv, blk_geom->bheight_uv, 1, 1);
6342 419490 : subpel_x = mv_q4.col & SUBPEL_MASK;
6343 419490 : subpel_y = mv_q4.row & SUBPEL_MASK;
6344 419490 : src_ptr_l1 = src_ptr_l1 + (is16bit ? 2 : 1) * ((mv_q4.row >> SUBPEL_BITS) * src_stride + (mv_q4.col >> SUBPEL_BITS));
6345 :
6346 419490 : av1_dist_wtd_comp_weight_assign(
6347 419490 : &picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header,
6348 419490 : picture_control_set_ptr->parent_pcs_ptr->cur_order_hint,// cur_frame_index,
6349 419490 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[0] - 1],// bck_frame_index,
6350 419490 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[1] - 1],// fwd_frame_index,
6351 : compound_idx,
6352 : 0,// order_idx,
6353 : &conv_params.fwd_offset, &conv_params.bck_offset,
6354 : &conv_params.use_dist_wtd_comp_avg, is_compound);
6355 419490 : conv_params.use_jnt_comp_avg = conv_params.use_dist_wtd_comp_avg;
6356 419490 : av1_get_convolve_filter_params(interp_filters, &filter_params_x,
6357 419490 : &filter_params_y, blk_geom->bwidth_uv, blk_geom->bheight_uv);
6358 :
6359 419486 : conv_params.do_average = 1;
6360 419486 : if (bit_depth == EB_8BIT)
6361 419486 : convolve[subpel_x != 0][subpel_y != 0][is_compound](
6362 : src_ptr_l1,
6363 : src_stride,
6364 : dst_ptr,
6365 : dst_stride,
6366 : bwidth,
6367 : bheight,
6368 : &filter_params_x,//puSize > 8 ? &av1RegularFilter : &av1RegularFilterW4,
6369 : &filter_params_y,//puSize > 8 ? &av1RegularFilter : &av1RegularFilterW4,
6370 : subpel_x,
6371 : subpel_y,
6372 : &conv_params);
6373 : else
6374 0 : convolveHbd[subpel_x != 0][subpel_y != 0][is_compound](
6375 : (uint16_t *)src_ptr_l1,
6376 : src_stride,
6377 : (uint16_t *)dst_ptr,
6378 : dst_stride,
6379 : bwidth,
6380 : bheight,
6381 : &filter_params_x,//puSize > 8 ? &av1RegularFilter : &av1RegularFilterW4,
6382 : &filter_params_y,//puSize > 8 ? &av1RegularFilter : &av1RegularFilterW4,
6383 : subpel_x,
6384 : subpel_y,
6385 : &conv_params,
6386 : bit_depth);
6387 : }
6388 621708 : }
6389 :
6390 :
6391 5437110 : static void plane_warped_motion_prediction(
6392 : PictureControlSet *picture_control_set_ptr,
6393 : uint8_t compound_idx,
6394 : InterInterCompoundData *interinter_comp,
6395 : uint16_t pu_origin_x,
6396 : uint16_t pu_origin_y,
6397 : const BlockGeom *blk_geom,
6398 : uint8_t bwidth,
6399 : uint8_t bheight,
6400 : EbWarpedMotionParams *wm_params_l0,
6401 : EbWarpedMotionParams *wm_params_l1,
6402 : uint8_t is_compound,
6403 : uint8_t bit_depth,
6404 : int32_t src_stride,
6405 : int32_t dst_stride,
6406 : uint16_t buf_width,
6407 : uint16_t buf_height,
6408 : uint8_t ss_x,
6409 : uint8_t ss_y,
6410 : uint8_t *src_ptr_l0,
6411 : uint8_t *src_ptr_l1,
6412 : uint8_t *dst_ptr,
6413 : uint8_t plane,
6414 : MvReferenceFrame rf[2])
6415 : {
6416 5437110 : EbBool is16bit = (EbBool)(bit_depth > EB_8BIT);
6417 :
6418 5437110 : if (!is_compound) {
6419 3306670 : ConvolveParams conv_params = get_conv_params_no_round(0, 0, 0, NULL, 128, is_compound, bit_depth);
6420 :
6421 3306760 : eb_av1_warp_plane(
6422 : wm_params_l0,
6423 : (int) is16bit,
6424 : bit_depth,
6425 : src_ptr_l0,
6426 : (int) buf_width,
6427 : (int) buf_height,
6428 : src_stride,
6429 : dst_ptr,
6430 : pu_origin_x,
6431 : pu_origin_y,
6432 : bwidth,
6433 : bheight,
6434 : dst_stride,
6435 : ss_x,
6436 : ss_y,
6437 : &conv_params);
6438 : } else {
6439 : DECLARE_ALIGNED(32, uint16_t, tmp_dstY[128 * 128]);//move this to context if stack does not hold.
6440 :
6441 2130440 : ConvolveParams conv_params = get_conv_params_no_round(0, 0, 0, tmp_dstY, 128, is_compound, bit_depth);
6442 2130920 : av1_dist_wtd_comp_weight_assign(
6443 2130920 : &picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header,
6444 2130920 : picture_control_set_ptr->parent_pcs_ptr->cur_order_hint,// cur_frame_index,
6445 2130920 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[0] - 1],// bck_frame_index,
6446 2130920 : picture_control_set_ptr->parent_pcs_ptr->ref_order_hint[rf[1] - 1],// fwd_frame_index,
6447 : compound_idx,
6448 : 0,// order_idx,
6449 : &conv_params.fwd_offset, &conv_params.bck_offset,
6450 : &conv_params.use_dist_wtd_comp_avg, is_compound);
6451 2130880 : conv_params.use_jnt_comp_avg = conv_params.use_dist_wtd_comp_avg;
6452 :
6453 2130880 : conv_params.do_average = 0;
6454 2130880 : eb_av1_warp_plane(
6455 : wm_params_l0,
6456 : (int) is16bit,
6457 : bit_depth,
6458 : src_ptr_l0,
6459 : (int) buf_width,
6460 : (int) buf_height,
6461 : src_stride,
6462 : dst_ptr,
6463 : pu_origin_x,
6464 : pu_origin_y,
6465 : bwidth,
6466 : bheight,
6467 : dst_stride,
6468 : ss_x,
6469 : ss_y,
6470 : &conv_params);
6471 :
6472 2130990 : if (is_masked_compound_type(interinter_comp->type)) {
6473 0 : av1_make_masked_warp_inter_predictor(
6474 : src_ptr_l1,
6475 : src_stride,
6476 : buf_width,
6477 : buf_height,
6478 : dst_ptr,
6479 : dst_stride,
6480 : blk_geom,
6481 : bwidth,
6482 : bheight,
6483 : &conv_params,
6484 : interinter_comp,
6485 : bit_depth,
6486 : plane,
6487 : pu_origin_x,
6488 : pu_origin_y,
6489 : wm_params_l1
6490 : );
6491 : } else {
6492 2130960 : conv_params.do_average = 1;
6493 2130960 : eb_av1_warp_plane(
6494 : wm_params_l1,
6495 : (int) is16bit,
6496 : bit_depth,
6497 : src_ptr_l1,
6498 : (int) buf_width,
6499 : (int) buf_height,
6500 : src_stride,
6501 : dst_ptr,
6502 : pu_origin_x,
6503 : pu_origin_y,
6504 : bwidth,
6505 : bheight,
6506 : dst_stride,
6507 : ss_x,
6508 : ss_y,
6509 : &conv_params);
6510 : }
6511 : }
6512 5437840 : }
6513 :
6514 :
6515 5057210 : EbErrorType warped_motion_prediction(
6516 : PictureControlSet *picture_control_set_ptr,
6517 : MvUnit *mv_unit,
6518 : uint8_t ref_frame_type,
6519 : uint8_t compound_idx,
6520 : InterInterCompoundData *interinter_comp,
6521 : uint16_t pu_origin_x,
6522 : uint16_t pu_origin_y,
6523 : CodingUnit *cu_ptr,
6524 : const BlockGeom *blk_geom,
6525 : EbPictureBufferDesc *ref_pic_list0,
6526 : EbPictureBufferDesc *ref_pic_list1,
6527 : EbPictureBufferDesc *prediction_ptr,
6528 : uint16_t dst_origin_x,
6529 : uint16_t dst_origin_y,
6530 : EbWarpedMotionParams *wm_params_l0,
6531 : EbWarpedMotionParams *wm_params_l1,
6532 : uint8_t bit_depth,
6533 : EbBool perform_chroma)
6534 : {
6535 5057210 : EbErrorType return_error = EB_ErrorNone;
6536 5057210 : uint8_t is_compound = (mv_unit->pred_direction == BI_PRED) ? 1 : 0;
6537 5057210 : EbBool is16bit = (EbBool)(bit_depth > EB_8BIT);
6538 :
6539 : int32_t src_stride;
6540 : int32_t dst_stride;
6541 : uint16_t buf_width;
6542 : uint16_t buf_height;
6543 5057210 : uint8_t ss_x = 1; // subsamplings
6544 5057210 : uint8_t ss_y = 1;
6545 :
6546 : MvReferenceFrame rf[2];
6547 5057210 : av1_set_ref_frame(rf, ref_frame_type);
6548 :
6549 : uint8_t *src_ptr_l0, *src_ptr_l1;
6550 : uint8_t *dst_ptr;
6551 5056920 : assert(ref_pic_list0 != NULL);
6552 :
6553 : // Y
6554 5056990 : src_ptr_l0 = ref_pic_list0->buffer_y + (is16bit ? 2 : 1)
6555 5056990 : * (ref_pic_list0->origin_x + ref_pic_list0->origin_y * ref_pic_list0->stride_y);
6556 1867670 : src_ptr_l1 = is_compound ? ref_pic_list1->buffer_y + (is16bit ? 2 : 1)
6557 1867670 : * (ref_pic_list1->origin_x + ref_pic_list1->origin_y * ref_pic_list1->stride_y)
6558 6924660 : : NULL;
6559 5056990 : src_stride = ref_pic_list0->stride_y;
6560 5056990 : buf_width = ref_pic_list0->width;
6561 5056990 : buf_height = ref_pic_list0->height;
6562 :
6563 5056990 : dst_ptr = prediction_ptr->buffer_y + (is16bit ? 2 : 1)
6564 5056990 : * (prediction_ptr->origin_x + dst_origin_x
6565 5056990 : + (prediction_ptr->origin_y + dst_origin_y) * prediction_ptr->stride_y);
6566 5056990 : dst_stride = prediction_ptr->stride_y;
6567 :
6568 : // Warp plane
6569 5056990 : plane_warped_motion_prediction(
6570 : picture_control_set_ptr,
6571 : compound_idx,
6572 : interinter_comp,
6573 : pu_origin_x,
6574 : pu_origin_y,
6575 : blk_geom,
6576 5056990 : blk_geom->bwidth,
6577 5056990 : blk_geom->bheight,
6578 : wm_params_l0,
6579 : wm_params_l1,
6580 : is_compound,
6581 : bit_depth,
6582 : src_stride,
6583 : dst_stride,
6584 : buf_width,
6585 : buf_height,
6586 : 0,
6587 : 0,
6588 : src_ptr_l0,
6589 : src_ptr_l1,
6590 : dst_ptr,
6591 : 0, // plane
6592 : rf);
6593 :
6594 5057460 : if (!blk_geom->has_uv)
6595 0 : return return_error;
6596 :
6597 5057460 : if (perform_chroma) {
6598 500978 : if (blk_geom->bwidth >= 16 && blk_geom->bheight >= 16) {
6599 : // Cb
6600 190118 : src_ptr_l0 = ref_pic_list0->buffer_cb + (is16bit ? 2 : 1)
6601 190118 : * (ref_pic_list0->origin_x / 2
6602 190118 : + (ref_pic_list0->origin_y / 2) * ref_pic_list0->stride_cb);
6603 131638 : src_ptr_l1 = is_compound ? ref_pic_list1->buffer_cb + (is16bit ? 2 : 1)
6604 131638 : * (ref_pic_list1->origin_x / 2
6605 131638 : + (ref_pic_list1->origin_y / 2 ) * ref_pic_list1->stride_cb)
6606 321756 : : NULL;
6607 190118 : src_stride = ref_pic_list0->stride_cb;
6608 :
6609 190118 : dst_ptr = prediction_ptr->buffer_cb + (is16bit ? 2 : 1)
6610 190118 : * ((prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2
6611 190118 : + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cb);
6612 190118 : dst_stride = prediction_ptr->stride_cb;
6613 :
6614 190118 : plane_warped_motion_prediction(
6615 : picture_control_set_ptr,
6616 : compound_idx,
6617 : interinter_comp,
6618 190118 : pu_origin_x >> ss_x,
6619 190118 : pu_origin_y >> ss_y,
6620 : blk_geom,
6621 190118 : blk_geom->bwidth_uv,
6622 190118 : blk_geom->bheight_uv,
6623 : wm_params_l0,
6624 : wm_params_l1,
6625 : is_compound,
6626 : bit_depth,
6627 : src_stride,
6628 : dst_stride,
6629 190118 : buf_width >> ss_x,
6630 190118 : buf_height >> ss_y,
6631 : ss_x,
6632 : ss_y,
6633 : src_ptr_l0,
6634 : src_ptr_l1,
6635 : dst_ptr,
6636 : 1, // plane
6637 : rf);
6638 :
6639 : // Cr
6640 190117 : src_ptr_l0 = ref_pic_list0->buffer_cr + (is16bit ? 2 : 1)
6641 190117 : * (ref_pic_list0->origin_x / 2
6642 190117 : + (ref_pic_list0->origin_y / 2 ) * ref_pic_list0->stride_cr);
6643 131637 : src_ptr_l1 = is_compound ? ref_pic_list1->buffer_cr + (is16bit ? 2 : 1)
6644 131637 : * (ref_pic_list1->origin_x / 2
6645 131637 : + (ref_pic_list1->origin_y / 2 ) * ref_pic_list1->stride_cr)
6646 321754 : : NULL;
6647 190117 : src_stride = ref_pic_list0->stride_cr;
6648 :
6649 190117 : dst_ptr = prediction_ptr->buffer_cr + (is16bit ? 2 : 1)
6650 190117 : * ((prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2
6651 190117 : + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cr);
6652 190117 : dst_stride = prediction_ptr->stride_cr;
6653 :
6654 190117 : plane_warped_motion_prediction(
6655 : picture_control_set_ptr,
6656 : compound_idx,
6657 : interinter_comp,
6658 190117 : pu_origin_x >> ss_x,
6659 190117 : pu_origin_y >> ss_y,
6660 : blk_geom,
6661 190117 : blk_geom->bwidth_uv,
6662 190117 : blk_geom->bheight_uv,
6663 : wm_params_l0,
6664 : wm_params_l1,
6665 : is_compound,
6666 : bit_depth,
6667 : src_stride,
6668 : dst_stride,
6669 190117 : buf_width >> ss_x,
6670 190117 : buf_height >> ss_y,
6671 : ss_x,
6672 : ss_y,
6673 : src_ptr_l0,
6674 : src_ptr_l1,
6675 : dst_ptr,
6676 : 2, // plane
6677 : rf);
6678 :
6679 : } else { // Translation prediction when chroma block is smaller than 8x8
6680 :
6681 : // Cb
6682 310860 : src_ptr_l0 = ref_pic_list0->buffer_cb + (is16bit ? 2 : 1)
6683 310860 : * ((ref_pic_list0->origin_x + ((pu_origin_x >> 3) << 3)) / 2
6684 310860 : + (ref_pic_list0->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic_list0->stride_cb);
6685 209746 : src_ptr_l1 = is_compound ? ref_pic_list1->buffer_cb + (is16bit ? 2 : 1)
6686 209746 : * ((ref_pic_list1->origin_x + ((pu_origin_x >> 3) << 3)) / 2
6687 209746 : + (ref_pic_list1->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic_list1->stride_cb)
6688 520606 : : NULL;
6689 310860 : dst_ptr = prediction_ptr->buffer_cb + (is16bit ? 2 : 1)
6690 310860 : * ((prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2
6691 310860 : + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cb);
6692 310860 : src_stride = ref_pic_list0->stride_cb;
6693 310860 : dst_stride = prediction_ptr->stride_cb;
6694 :
6695 310860 : chroma_plane_warped_motion_prediction_sub8x8(
6696 : picture_control_set_ptr,
6697 : compound_idx,
6698 : cu_ptr,
6699 : blk_geom,
6700 310860 : blk_geom->bwidth_uv,
6701 310860 : blk_geom->bheight_uv,
6702 : is_compound,
6703 : bit_depth,
6704 : src_stride,
6705 : dst_stride,
6706 : src_ptr_l0,
6707 : src_ptr_l1,
6708 : dst_ptr,
6709 : rf,
6710 : mv_unit);
6711 :
6712 : // Cr
6713 310860 : src_ptr_l0 = ref_pic_list0->buffer_cr + (is16bit ? 2 : 1)
6714 310860 : * ((ref_pic_list0->origin_x + ((pu_origin_x >> 3) << 3)) / 2
6715 310860 : + (ref_pic_list0->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic_list0->stride_cr);
6716 209746 : src_ptr_l1 = is_compound ? ref_pic_list1->buffer_cr + (is16bit ? 2 : 1)
6717 209746 : * ((ref_pic_list1->origin_x + ((pu_origin_x >> 3) << 3)) / 2
6718 209746 : + (ref_pic_list1->origin_y + ((pu_origin_y >> 3) << 3)) / 2 * ref_pic_list1->stride_cr)
6719 520606 : : NULL;
6720 310860 : dst_ptr = prediction_ptr->buffer_cr + (is16bit ? 2 : 1)
6721 310860 : * ((prediction_ptr->origin_x + ((dst_origin_x >> 3) << 3)) / 2
6722 310860 : + (prediction_ptr->origin_y + ((dst_origin_y >> 3) << 3)) / 2 * prediction_ptr->stride_cb);
6723 310860 : src_stride = ref_pic_list0->stride_cr;
6724 310860 : dst_stride = prediction_ptr->stride_cr;
6725 :
6726 310860 : chroma_plane_warped_motion_prediction_sub8x8(
6727 : picture_control_set_ptr,
6728 : compound_idx,
6729 : cu_ptr,
6730 : blk_geom,
6731 310860 : blk_geom->bwidth_uv,
6732 310860 : blk_geom->bheight_uv,
6733 : is_compound,
6734 : bit_depth,
6735 : src_stride,
6736 : dst_stride,
6737 : src_ptr_l0,
6738 : src_ptr_l1,
6739 : dst_ptr,
6740 : rf,
6741 : mv_unit);
6742 : }
6743 : }
6744 :
6745 5057460 : return return_error;
6746 : }
6747 :
6748 :
6749 : #define SWITCHABLE_INTERP_RATE_FACTOR 1
6750 : extern int32_t eb_av1_get_pred_context_switchable_interp(
6751 : NeighborArrayUnit *ref_frame_type_neighbor_array,
6752 : MvReferenceFrame rf0,
6753 : MvReferenceFrame rf1,
6754 : NeighborArrayUnit32 *interpolation_type_neighbor_array,
6755 : uint32_t cu_origin_x,
6756 : uint32_t cu_origin_y,
6757 : int32_t dir
6758 : );
6759 :
6760 66250300 : int32_t eb_av1_get_switchable_rate(
6761 : ModeDecisionCandidateBuffer *candidate_buffer_ptr,
6762 : const Av1Common *const cm,
6763 : ModeDecisionContext *md_context_ptr)
6764 : {
6765 66250300 : if (cm->interp_filter == SWITCHABLE) {
6766 66254000 : int32_t inter_filter_cost = 0;
6767 : int32_t dir;
6768 :
6769 198361000 : for (dir = 0; dir < 2; ++dir) {
6770 : MvReferenceFrame rf[2];
6771 132096000 : av1_set_ref_frame(rf, candidate_buffer_ptr->candidate_ptr->ref_frame_type);
6772 132081000 : const int32_t ctx = eb_av1_get_pred_context_switchable_interp(
6773 : md_context_ptr->ref_frame_type_neighbor_array,
6774 132081000 : rf[0],
6775 132081000 : rf[1],
6776 : md_context_ptr->interpolation_type_neighbor_array,
6777 132081000 : md_context_ptr->cu_origin_x,
6778 132081000 : md_context_ptr->cu_origin_y,
6779 : dir
6780 : );
6781 :
6782 132112000 : const InterpFilter filter = av1_extract_interp_filter(/*mbmi*/candidate_buffer_ptr->candidate_ptr->interp_filters, dir);
6783 132107000 : assert(ctx < SWITCHABLE_FILTER_CONTEXTS);
6784 132107000 : assert(filter < SWITCHABLE_FILTERS);
6785 132107000 : inter_filter_cost += /*x->switchable_interp_costs*/md_context_ptr->md_rate_estimation_ptr->switchable_interp_fac_bitss[ctx][filter];
6786 : }
6787 66265100 : return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
6788 : }
6789 : else
6790 0 : return 0;
6791 : }
6792 :
6793 : //void model_rd_norm(int32_t xsq_q10, int32_t *r_q10, int32_t *d_q10) {
6794 : // NOTE: The tables below must be of the same size.
6795 :
6796 : // The functions described below are sampled at the four most significant
6797 : // bits of x^2 + 8 / 256.
6798 :
6799 0 : void highbd_variance64_c(const uint8_t *a8, int32_t a_stride,
6800 : const uint8_t *b8, int32_t b_stride, int32_t w, int32_t h,
6801 : uint64_t *sse) {
6802 0 : const uint8_t *a = a8;//CONVERT_TO_SHORTPTR(a8);
6803 0 : const uint8_t *b = b8;//CONVERT_TO_SHORTPTR(b8);
6804 0 : uint64_t tsse = 0;
6805 0 : for (int32_t i = 0; i < h; ++i) {
6806 0 : for (int32_t j = 0; j < w; ++j) {
6807 0 : const int32_t diff = a[j] - b[j];
6808 0 : tsse += (uint32_t)(diff * diff);
6809 : }
6810 0 : a += a_stride;
6811 0 : b += b_stride;
6812 : }
6813 0 : *sse = tsse;
6814 0 : }
6815 :
6816 : #define RDDIV_BITS 7
6817 : #define RDCOST(RM, R, D) \
6818 : (ROUND_POWER_OF_TWO(((uint64_t)(R)) * (RM), AV1_PROB_COST_SHIFT) + \
6819 : ((D) * (1 << RDDIV_BITS)))
6820 :
6821 66339100 : static void model_rd_norm(int32_t xsq_q10, int32_t *r_q10, int32_t *d_q10) {
6822 : // NOTE: The tables below must be of the same size.
6823 :
6824 : // The functions described below are sampled at the four most significant
6825 : // bits of x^2 + 8 / 256.
6826 :
6827 : // Normalized rate:
6828 : // This table models the rate for a Laplacian source with given variance
6829 : // when quantized with a uniform quantizer with given stepsize. The
6830 : // closed form expression is:
6831 : // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
6832 : // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
6833 : // and H(x) is the binary entropy function.
6834 : static const int32_t rate_tab_q10[] = {
6835 : 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
6836 : 4044, 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
6837 : 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
6838 : 2290, 2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
6839 : 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
6840 : 911, 864, 821, 781, 745, 680, 623, 574, 530, 490, 455, 424,
6841 : 395, 345, 304, 269, 239, 213, 190, 171, 154, 126, 104, 87,
6842 : 73, 61, 52, 44, 38, 28, 21, 16, 12, 10, 8, 6,
6843 : 5, 3, 2, 1, 1, 1, 0, 0,
6844 : };
6845 : // Normalized distortion:
6846 : // This table models the normalized distortion for a Laplacian source
6847 : // with given variance when quantized with a uniform quantizer
6848 : // with given stepsize. The closed form expression is:
6849 : // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
6850 : // where x = qpstep / sqrt(variance).
6851 : // Note the actual distortion is Dn * variance.
6852 : static const int32_t dist_tab_q10[] = {
6853 : 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5,
6854 : 5, 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17,
6855 : 18, 21, 24, 26, 29, 31, 34, 36, 39, 44, 49, 54,
6856 : 59, 64, 69, 73, 78, 88, 97, 106, 115, 124, 133, 142,
6857 : 151, 167, 184, 200, 215, 231, 245, 260, 274, 301, 327, 351,
6858 : 375, 397, 418, 439, 458, 495, 528, 559, 587, 613, 637, 659,
6859 : 680, 717, 749, 777, 801, 823, 842, 859, 874, 899, 919, 936,
6860 : 949, 960, 969, 977, 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
6861 : 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
6862 : };
6863 : static const int32_t xsq_iq_q10[] = {
6864 : 0, 4, 8, 12, 16, 20, 24, 28, 32,
6865 : 40, 48, 56, 64, 72, 80, 88, 96, 112,
6866 : 128, 144, 160, 176, 192, 208, 224, 256, 288,
6867 : 320, 352, 384, 416, 448, 480, 544, 608, 672,
6868 : 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504,
6869 : 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296,
6870 : 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136,
6871 : 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
6872 : 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736,
6873 : 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696,
6874 : 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808,
6875 : 180192, 196576, 212960, 229344, 245728,
6876 : };
6877 66339100 : const int32_t tmp = (xsq_q10 >> 2) + 8;
6878 66339100 : const int32_t k = get_msb(tmp) - 3;
6879 66326600 : const int32_t xq = (k << 3) + ((tmp >> k) & 0x7);
6880 66326600 : const int32_t one_q10 = 1 << 10;
6881 66326600 : const int32_t a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
6882 66326600 : const int32_t b_q10 = one_q10 - a_q10;
6883 66326600 : *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
6884 66326600 : *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
6885 66326600 : }
6886 :
6887 66283600 : void eb_av1_model_rd_from_var_lapndz(int64_t var, uint32_t n_log2,
6888 : uint32_t qstep, int32_t *rate,
6889 : int64_t *dist) {
6890 : // This function models the rate and distortion for a Laplacian
6891 : // source with given variance when quantized with a uniform quantizer
6892 : // with given stepsize. The closed form expressions are in:
6893 : // Hang and Chen, "Source Model for transform video coder and its
6894 : // application - Part I: Fundamental Theory", IEEE Trans. Circ.
6895 : // Sys. for Video Tech., April 1997.
6896 66283600 : if (var == 0) {
6897 596 : *rate = 0;
6898 596 : *dist = 0;
6899 : }
6900 : else {
6901 : int32_t d_q10, r_q10;
6902 : static const uint32_t MAX_XSQ_Q10 = 245727;
6903 66283000 : const uint64_t xsq_q10_64 =
6904 66283000 : (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
6905 66283000 : const int32_t xsq_q10 = (int32_t)MIN(xsq_q10_64, MAX_XSQ_Q10);
6906 66283000 : model_rd_norm(xsq_q10, &r_q10, &d_q10);
6907 66342200 : *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT);
6908 66342200 : *dist = (var * (int64_t)d_q10 + 512) >> 10;
6909 : }
6910 66342700 : }
6911 :
6912 66285500 : void model_rd_from_sse(
6913 : BlockSize bsize,
6914 : int16_t quantizer,
6915 : uint8_t bit_depth,
6916 : uint64_t sse,
6917 : uint32_t *rate,
6918 : uint64_t *dist)
6919 : {
6920 : /* OMK (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :3;*/
6921 66285500 : int32_t dequant_shift = bit_depth - 5;
6922 :
6923 : // Fast approximate the modelling function.
6924 : if (0/*cpi->sf.simple_model_rd_from_var*/)
6925 : {
6926 : int64_t square_error = (uint64_t)sse;
6927 : quantizer = quantizer >> dequant_shift;
6928 :
6929 : if (quantizer < 120)
6930 : *rate = (int32_t)((square_error * (280 - quantizer)) >>
6931 : (16 - AV1_PROB_COST_SHIFT));
6932 : else
6933 : *rate = 0;
6934 : *dist = (uint64_t)(square_error * quantizer) >> 8;
6935 : } else {
6936 66285500 : eb_av1_model_rd_from_var_lapndz((uint64_t)sse, num_pels_log2_lookup[bsize],
6937 66285500 : quantizer >> dequant_shift, (int32_t*)rate,
6938 : (int64_t*)dist);
6939 : }
6940 :
6941 66342400 : *dist <<= 4;
6942 66342400 : }
6943 :
6944 66236800 : extern void model_rd_for_sb(
6945 : PictureControlSet *picture_control_set_ptr,
6946 : EbPictureBufferDesc *prediction_ptr,
6947 : ModeDecisionContext *md_context_ptr,
6948 : int32_t plane_from,
6949 : int32_t plane_to,
6950 : int32_t *out_rate_sum,
6951 : int64_t *out_dist_sum,
6952 : uint8_t bit_depth)
6953 : {
6954 : // Note our transform coeffs are 8 times an orthogonal transform.
6955 : // Hence quantizer step is also 8 times. To get effective quantizer
6956 : // we need to divide by 8 before sending to modeling function.
6957 : int32_t plane;
6958 :
6959 66236800 : uint64_t rate_sum = 0;
6960 66236800 : uint64_t dist_sum = 0;
6961 66236800 : uint64_t total_sse = 0;
6962 :
6963 66236800 : EbPictureBufferDesc *input_picture_ptr = bit_depth > 8 ? picture_control_set_ptr->input_frame16bit : picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
6964 66236800 : const uint32_t input_offset = (md_context_ptr->cu_origin_y + input_picture_ptr->origin_y) * input_picture_ptr->stride_y + (md_context_ptr->cu_origin_x + input_picture_ptr->origin_x);
6965 66236800 : const uint32_t input_chroma_offset = ((md_context_ptr->cu_origin_y + input_picture_ptr->origin_y) * input_picture_ptr->stride_cb + (md_context_ptr->cu_origin_x + input_picture_ptr->origin_x)) / 2;
6966 66236800 : const uint32_t prediction_offset = prediction_ptr->origin_x + md_context_ptr->blk_geom->origin_x + (prediction_ptr->origin_y + md_context_ptr->blk_geom->origin_y) * prediction_ptr->stride_y;
6967 66236800 : const uint32_t prediction_chroma_offset = (prediction_ptr->origin_x + md_context_ptr->blk_geom->origin_x + (prediction_ptr->origin_y + md_context_ptr->blk_geom->origin_y) * prediction_ptr->stride_cb) / 2;
6968 :
6969 66236800 : EbSpatialFullDistType spatial_full_dist_type_fun = bit_depth > 8 ?
6970 66236800 : full_distortion_kernel16_bits : spatial_full_distortion_kernel;
6971 :
6972 132580000 : for (plane = plane_from; plane <= plane_to; ++plane) {
6973 : uint64_t sse;
6974 : uint32_t rate;
6975 : uint64_t dist;
6976 :
6977 66301200 : if (plane == 0) {
6978 66301200 : sse = spatial_full_dist_type_fun(
6979 : input_picture_ptr->buffer_y,
6980 : input_offset,
6981 66301200 : input_picture_ptr->stride_y,
6982 : prediction_ptr->buffer_y,
6983 : prediction_offset,
6984 66301200 : prediction_ptr->stride_y,
6985 66301200 : md_context_ptr->blk_geom->bwidth,
6986 66301200 : md_context_ptr->blk_geom->bheight);
6987 : }
6988 0 : else if (plane == 1) {
6989 0 : sse = spatial_full_dist_type_fun(
6990 : input_picture_ptr->buffer_cb,
6991 : input_chroma_offset,
6992 0 : input_picture_ptr->stride_cb,
6993 : prediction_ptr->buffer_cb,
6994 : prediction_chroma_offset,
6995 0 : prediction_ptr->stride_cb,
6996 0 : md_context_ptr->blk_geom->bwidth_uv,
6997 0 : md_context_ptr->blk_geom->bheight_uv);
6998 : } else {
6999 0 : sse = spatial_full_dist_type_fun(
7000 : input_picture_ptr->buffer_cr,
7001 : input_chroma_offset,
7002 0 : input_picture_ptr->stride_cr,
7003 : prediction_ptr->buffer_cr,
7004 : prediction_chroma_offset,
7005 0 : prediction_ptr->stride_cr,
7006 0 : md_context_ptr->blk_geom->bwidth_uv,
7007 0 : md_context_ptr->blk_geom->bheight_uv);
7008 : }
7009 :
7010 66295600 : sse = ROUND_POWER_OF_TWO(sse, 2 * (bit_depth - 8));
7011 66295600 : total_sse += sse;
7012 :
7013 66295600 : int32_t current_q_index = picture_control_set_ptr->
7014 66295600 : parent_pcs_ptr->frm_hdr.quantization_params.base_q_idx;
7015 66295600 : Dequants *const dequants = &picture_control_set_ptr->parent_pcs_ptr->deq;
7016 :
7017 66295600 : int16_t quantizer = dequants->y_dequant_Q3[current_q_index][1];
7018 132591000 : model_rd_from_sse(
7019 66295600 : plane == 0 ? md_context_ptr->blk_geom->bsize : md_context_ptr->blk_geom->bsize_uv,
7020 : quantizer,
7021 : bit_depth,
7022 : sse,
7023 : &rate,
7024 : &dist);
7025 :
7026 66343100 : rate_sum += rate;
7027 66343100 : dist_sum += dist;
7028 : }
7029 :
7030 : //*skip_txfm_sb = total_sse == 0;
7031 : //*skip_sse_sb = total_sse << 4;
7032 66278700 : *out_rate_sum = (int32_t)rate_sum;
7033 66278700 : *out_dist_sum = dist_sum;
7034 66278700 : }
7035 :
7036 :
7037 22012900 : int32_t is_nontrans_global_motion(
7038 : BlockSize sb_type,
7039 : ModeDecisionCandidateBuffer *candidate_buffer_ptr,
7040 : PictureControlSet *picture_control_set_ptr)
7041 : {
7042 : int32_t ref;
7043 :
7044 : // First check if all modes are GLOBALMV
7045 22012900 : if (candidate_buffer_ptr->candidate_ptr->pred_mode != GLOBALMV && candidate_buffer_ptr->candidate_ptr->pred_mode != GLOBAL_GLOBALMV)
7046 22013700 : return 0;
7047 :
7048 0 : if (MIN(mi_size_wide[sb_type], mi_size_high[sb_type]) < 2)
7049 0 : return 0;
7050 : MvReferenceFrame rf[2];
7051 0 : av1_set_ref_frame(rf, candidate_buffer_ptr->candidate_ptr->ref_frame_type);
7052 : // Now check if all global motion is non translational
7053 0 : for (ref = 0; ref < 1 + candidate_buffer_ptr->candidate_ptr->is_compound/*has_second_ref(mbmi)*/; ++ref) {
7054 0 : if (picture_control_set_ptr->parent_pcs_ptr->global_motion[ref ? rf[1] : rf[0]].wmtype == TRANSLATION)
7055 : //if (xd->global_motion[mbmi->ref_frame[ref]].wmtype == TRANSLATION)
7056 0 : return 0;
7057 : }
7058 0 : return 1;
7059 : }
7060 22361200 : static INLINE int32_t av1_is_interp_needed(
7061 : ModeDecisionCandidateBuffer *candidate_buffer_ptr,
7062 : PictureControlSet *picture_control_set_ptr,
7063 : BlockSize bsize)
7064 : {
7065 22361200 : if (candidate_buffer_ptr->candidate_ptr->merge_flag)
7066 348252 : return 0;
7067 :
7068 22013000 : if (candidate_buffer_ptr->candidate_ptr->motion_mode == WARPED_CAUSAL)
7069 0 : return 0;
7070 :
7071 22013000 : if (is_nontrans_global_motion( bsize,
7072 : candidate_buffer_ptr, picture_control_set_ptr))
7073 0 : return 0;
7074 :
7075 22014100 : return 1;
7076 : }
7077 :
7078 : #define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
7079 : static const int32_t filter_sets[DUAL_FILTER_SET_SIZE][2] = {
7080 : { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 },
7081 : { 1, 2 }, { 2, 0 }, { 2, 1 }, { 2, 2 },
7082 : };
7083 :
7084 22355900 : void interpolation_filter_search(
7085 : PictureControlSet *picture_control_set_ptr,
7086 : EbPictureBufferDesc *prediction_ptr,
7087 : ModeDecisionContext *md_context_ptr,
7088 : ModeDecisionCandidateBuffer *candidate_buffer_ptr,
7089 : MvUnit mv_unit,
7090 : EbPictureBufferDesc *ref_pic_list0,
7091 : EbPictureBufferDesc *ref_pic_list1,
7092 : uint8_t hbd_mode_decision,
7093 : uint8_t bit_depth)
7094 : {
7095 22355900 : const Av1Common *cm = picture_control_set_ptr->parent_pcs_ptr->av1_cm;//&cpi->common;
7096 22358300 : EbBool use_uv = (md_context_ptr->blk_geom->has_uv && md_context_ptr->chroma_level <= CHROMA_MODE_1 &&
7097 44714300 : picture_control_set_ptr->parent_pcs_ptr->interpolation_search_level != IT_SEARCH_FAST_LOOP_UV_BLIND) ? EB_TRUE : EB_FALSE;
7098 22355900 : const int32_t num_planes = use_uv ? MAX_MB_PLANE : 1;
7099 :
7100 22355900 : int64_t rd = INT64_MAX;
7101 22355900 : int32_t switchable_rate = 0;
7102 :
7103 : int32_t i;
7104 : int32_t tmp_rate;
7105 : int64_t tmp_dist;
7106 :
7107 22355900 : uint32_t full_lambda_8b = md_context_ptr->full_lambda >> (2 * (bit_depth - 8));
7108 :
7109 22355900 : InterpFilter assign_filter = SWITCHABLE;
7110 :
7111 22355900 : if (cm->interp_filter != SWITCHABLE)
7112 0 : assign_filter = cm->interp_filter;
7113 :
7114 : //set_default_interp_filters(mbmi, assign_filter);
7115 44709500 : /*mbmi*/candidate_buffer_ptr->candidate_ptr->interp_filters =//EIGHTTAP_REGULAR ;
7116 22355900 : av1_broadcast_interp_filter(av1_unswitchable_filter(assign_filter));
7117 :
7118 22357300 : switchable_rate = eb_av1_get_switchable_rate(
7119 : candidate_buffer_ptr,
7120 : cm,
7121 : md_context_ptr
7122 : );
7123 :
7124 22342900 : av1_inter_prediction_function_table[hbd_mode_decision](
7125 : picture_control_set_ptr,
7126 22342900 : candidate_buffer_ptr->candidate_ptr->interp_filters,
7127 : md_context_ptr->cu_ptr,
7128 22342900 : candidate_buffer_ptr->candidate_ptr->ref_frame_type,
7129 : &mv_unit,
7130 : 0,
7131 : #if OBMC_FLAG
7132 : SIMPLE_TRANSLATION,
7133 : 0,
7134 : 0,
7135 : #endif
7136 22342900 : candidate_buffer_ptr->candidate_ptr->compound_idx,
7137 22342900 : &candidate_buffer_ptr->candidate_ptr->interinter_comp,
7138 : #if II_COMP_FLAG
7139 22342900 : &md_context_ptr->sb_ptr->tile_info,
7140 : md_context_ptr->luma_recon_neighbor_array,
7141 : md_context_ptr->cb_recon_neighbor_array,
7142 : md_context_ptr->cr_recon_neighbor_array,
7143 : 0, //No inter-intra for IFSearch
7144 22342900 : candidate_buffer_ptr->candidate_ptr->interintra_mode,
7145 22342900 : candidate_buffer_ptr->candidate_ptr->use_wedge_interintra,
7146 22342900 : candidate_buffer_ptr->candidate_ptr->interintra_wedge_index,
7147 : #endif
7148 22342900 : md_context_ptr->cu_origin_x,
7149 22342900 : md_context_ptr->cu_origin_y,
7150 22342900 : md_context_ptr->blk_geom->bwidth,
7151 22342900 : md_context_ptr->blk_geom->bheight,
7152 : ref_pic_list0,
7153 : ref_pic_list1,
7154 : prediction_ptr,
7155 22342900 : md_context_ptr->blk_geom->origin_x,
7156 22342900 : md_context_ptr->blk_geom->origin_y,
7157 : use_uv,
7158 : #if IFS_8BIT_MD
7159 : hbd_mode_decision ? EB_10BIT : EB_8BIT);
7160 : #else
7161 : (uint8_t)picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->static_config.encoder_bit_depth);
7162 : #endif
7163 :
7164 22352000 : model_rd_for_sb(
7165 : picture_control_set_ptr,
7166 : prediction_ptr,
7167 : md_context_ptr,
7168 : 0,
7169 : num_planes - 1,
7170 : &tmp_rate,
7171 : &tmp_dist,
7172 : hbd_mode_decision ? EB_10BIT : EB_8BIT);
7173 :
7174 22361200 : rd = RDCOST(full_lambda_8b, switchable_rate + tmp_rate, tmp_dist);
7175 :
7176 22361200 : if (assign_filter == SWITCHABLE) {
7177 : // do interp_filter search
7178 22361800 : if (av1_is_interp_needed(candidate_buffer_ptr, picture_control_set_ptr, md_context_ptr->blk_geom->bsize) /*&& av1_is_interp_search_needed(xd)*/) {
7179 22013800 : const int32_t filter_set_size = DUAL_FILTER_SET_SIZE;
7180 22013800 : int32_t best_in_temp = 0;
7181 22013800 : uint32_t best_filters = 0;// mbmi->interp_filters;
7182 :
7183 22013800 : if (picture_control_set_ptr->parent_pcs_ptr->interpolation_search_level &&
7184 22014500 : picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.enable_dual_filter) {
7185 : int32_t tmp_rs;
7186 : int64_t tmp_rd;
7187 :
7188 : // default to (R,R): EIGHTTAP_REGULARxEIGHTTAP_REGULAR
7189 0 : int32_t best_dual_mode = 0;
7190 : // Find best of {R}x{R,Sm,Sh}
7191 : // EIGHTTAP_REGULAR mode is calculated beforehand
7192 0 : for (i = 1; i < SWITCHABLE_FILTERS; ++i) {
7193 :
7194 0 : /*mbmi*/candidate_buffer_ptr->candidate_ptr->interp_filters = (InterpFilter)
7195 0 : av1_make_interp_filters((InterpFilter)filter_sets[i][0], (InterpFilter)filter_sets[i][1]);
7196 :
7197 0 : tmp_rs = eb_av1_get_switchable_rate(
7198 : candidate_buffer_ptr,
7199 : cm,
7200 : md_context_ptr
7201 : );
7202 :
7203 0 : av1_inter_prediction_function_table[hbd_mode_decision](
7204 : picture_control_set_ptr,
7205 0 : candidate_buffer_ptr->candidate_ptr->interp_filters,
7206 : md_context_ptr->cu_ptr,
7207 0 : candidate_buffer_ptr->candidate_ptr->ref_frame_type,
7208 : &mv_unit,
7209 : 0,
7210 : #if OBMC_FLAG
7211 : SIMPLE_TRANSLATION,
7212 : 0,
7213 : 0,
7214 : #endif
7215 0 : candidate_buffer_ptr->candidate_ptr->compound_idx,
7216 0 : &candidate_buffer_ptr->candidate_ptr->interinter_comp,
7217 : #if II_COMP_FLAG
7218 0 : &md_context_ptr->sb_ptr->tile_info,
7219 : md_context_ptr->luma_recon_neighbor_array,
7220 : md_context_ptr->cb_recon_neighbor_array,
7221 : md_context_ptr->cr_recon_neighbor_array,
7222 : 0, //No inter-intra for IFSearch
7223 0 : candidate_buffer_ptr->candidate_ptr->interintra_mode,
7224 0 : candidate_buffer_ptr->candidate_ptr->use_wedge_interintra,
7225 0 : candidate_buffer_ptr->candidate_ptr->interintra_wedge_index,
7226 : #endif
7227 0 : md_context_ptr->cu_origin_x,
7228 0 : md_context_ptr->cu_origin_y,
7229 0 : md_context_ptr->blk_geom->bwidth,
7230 0 : md_context_ptr->blk_geom->bheight,
7231 : ref_pic_list0,
7232 : ref_pic_list1,
7233 : prediction_ptr,
7234 0 : md_context_ptr->blk_geom->origin_x,
7235 0 : md_context_ptr->blk_geom->origin_y,
7236 : use_uv,
7237 : #if IFS_8BIT_MD
7238 : hbd_mode_decision ? EB_10BIT : EB_8BIT);
7239 : #else
7240 : (uint8_t)picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->static_config.encoder_bit_depth);
7241 : #endif
7242 :
7243 0 : model_rd_for_sb(
7244 : picture_control_set_ptr,
7245 : prediction_ptr,
7246 : md_context_ptr,
7247 : 0,
7248 : num_planes - 1,
7249 : &tmp_rate,
7250 : &tmp_dist,
7251 : hbd_mode_decision ? EB_10BIT : EB_8BIT);
7252 0 : tmp_rd = RDCOST(full_lambda_8b, tmp_rs + tmp_rate, tmp_dist);
7253 :
7254 0 : if (tmp_rd < rd) {
7255 0 : best_dual_mode = i;
7256 0 : rd = tmp_rd;
7257 0 : switchable_rate = tmp_rs;
7258 0 : best_filters = /*mbmi*/candidate_buffer_ptr->candidate_ptr->interp_filters;
7259 0 : best_in_temp = !best_in_temp;
7260 : }
7261 : }
7262 :
7263 : // From best of horizontal EIGHTTAP_REGULAR modes, check vertical modes
7264 0 : for (i = best_dual_mode + SWITCHABLE_FILTERS; i < filter_set_size;
7265 0 : i += SWITCHABLE_FILTERS) {
7266 :
7267 0 : /*mbmi*/candidate_buffer_ptr->candidate_ptr->interp_filters =
7268 0 : av1_make_interp_filters((InterpFilter)filter_sets[i][0], (InterpFilter)filter_sets[i][1]);
7269 :
7270 0 : tmp_rs = eb_av1_get_switchable_rate(
7271 : candidate_buffer_ptr,
7272 : cm,
7273 : md_context_ptr
7274 : );
7275 :
7276 0 : av1_inter_prediction_function_table[hbd_mode_decision](
7277 : picture_control_set_ptr,
7278 0 : candidate_buffer_ptr->candidate_ptr->interp_filters,
7279 : md_context_ptr->cu_ptr,
7280 0 : candidate_buffer_ptr->candidate_ptr->ref_frame_type,
7281 : &mv_unit,
7282 : 0,
7283 : #if OBMC_FLAG
7284 : SIMPLE_TRANSLATION,
7285 : 0,
7286 : 0,
7287 : #endif
7288 0 : candidate_buffer_ptr->candidate_ptr->compound_idx,
7289 0 : &candidate_buffer_ptr->candidate_ptr->interinter_comp,
7290 : #if II_COMP_FLAG
7291 0 : &md_context_ptr->sb_ptr->tile_info,
7292 : md_context_ptr->luma_recon_neighbor_array,
7293 : md_context_ptr->cb_recon_neighbor_array,
7294 : md_context_ptr->cr_recon_neighbor_array,
7295 : 0, //No inter-intra for IFSearch
7296 0 : candidate_buffer_ptr->candidate_ptr->interintra_mode,
7297 0 : candidate_buffer_ptr->candidate_ptr->use_wedge_interintra,
7298 0 : candidate_buffer_ptr->candidate_ptr->interintra_wedge_index,
7299 : #endif
7300 0 : md_context_ptr->cu_origin_x,
7301 0 : md_context_ptr->cu_origin_y,
7302 0 : md_context_ptr->blk_geom->bwidth,
7303 0 : md_context_ptr->blk_geom->bheight,
7304 : ref_pic_list0,
7305 : ref_pic_list1,
7306 : prediction_ptr,
7307 0 : md_context_ptr->blk_geom->origin_x,
7308 0 : md_context_ptr->blk_geom->origin_y,
7309 : use_uv,
7310 : #if IFS_8BIT_MD
7311 : hbd_mode_decision ? EB_10BIT : EB_8BIT);
7312 : #else
7313 : (uint8_t)picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->static_config.encoder_bit_depth);
7314 : #endif
7315 :
7316 0 : model_rd_for_sb(
7317 : picture_control_set_ptr,
7318 : prediction_ptr,
7319 : md_context_ptr,
7320 : 0,
7321 : num_planes - 1,
7322 : &tmp_rate,
7323 : &tmp_dist,
7324 : hbd_mode_decision ? EB_10BIT : EB_8BIT);
7325 0 : tmp_rd = RDCOST(full_lambda_8b, tmp_rs + tmp_rate, tmp_dist);
7326 :
7327 0 : if (tmp_rd < rd) {
7328 0 : rd = tmp_rd;
7329 0 : switchable_rate = tmp_rs;
7330 0 : best_filters = /*mbmi*/candidate_buffer_ptr->candidate_ptr->interp_filters;
7331 0 : best_in_temp = !best_in_temp;
7332 : }
7333 : }
7334 : }
7335 : else {
7336 : // EIGHTTAP_REGULAR mode is calculated beforehand
7337 198001000 : for (i = 1; i < filter_set_size; ++i) {
7338 : int32_t tmp_rs;
7339 : int64_t tmp_rd;
7340 :
7341 175934000 : if (/*cm->seq_params.enable_dual_filter*/picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.enable_dual_filter == 0)
7342 175947000 : if (filter_sets[i][0] != filter_sets[i][1]) continue;
7343 :
7344 43959200 : /*mbmi*/candidate_buffer_ptr->candidate_ptr->interp_filters = av1_make_interp_filters((InterpFilter)filter_sets[i][0], (InterpFilter)filter_sets[i][1]);
7345 :
7346 43983200 : tmp_rs = eb_av1_get_switchable_rate(
7347 : candidate_buffer_ptr,
7348 : cm,
7349 : md_context_ptr
7350 : );
7351 :
7352 43958700 : av1_inter_prediction_function_table[hbd_mode_decision](
7353 : picture_control_set_ptr,
7354 43958700 : candidate_buffer_ptr->candidate_ptr->interp_filters,
7355 : md_context_ptr->cu_ptr,
7356 43958700 : candidate_buffer_ptr->candidate_ptr->ref_frame_type,
7357 : &mv_unit,
7358 : 0,
7359 : #if OBMC_FLAG
7360 : SIMPLE_TRANSLATION,
7361 : 0,
7362 : 0,
7363 : #endif
7364 43958700 : candidate_buffer_ptr->candidate_ptr->compound_idx,
7365 43958700 : &candidate_buffer_ptr->candidate_ptr->interinter_comp,
7366 : #if II_COMP_FLAG
7367 43958700 : &md_context_ptr->sb_ptr->tile_info,
7368 : md_context_ptr->luma_recon_neighbor_array,
7369 : md_context_ptr->cb_recon_neighbor_array,
7370 : md_context_ptr->cr_recon_neighbor_array,
7371 : 0, //No inter-intra for IFSearch
7372 43958700 : candidate_buffer_ptr->candidate_ptr->interintra_mode,
7373 43958700 : candidate_buffer_ptr->candidate_ptr->use_wedge_interintra,
7374 43958700 : candidate_buffer_ptr->candidate_ptr->interintra_wedge_index,
7375 : #endif
7376 43958700 : md_context_ptr->cu_origin_x,
7377 43958700 : md_context_ptr->cu_origin_y,
7378 43958700 : md_context_ptr->blk_geom->bwidth,
7379 43958700 : md_context_ptr->blk_geom->bheight,
7380 : ref_pic_list0,
7381 : ref_pic_list1,
7382 : prediction_ptr,
7383 43958700 : md_context_ptr->blk_geom->origin_x,
7384 43958700 : md_context_ptr->blk_geom->origin_y,
7385 : use_uv,
7386 : #if IFS_8BIT_MD
7387 : hbd_mode_decision ? EB_10BIT : EB_8BIT);
7388 : #else
7389 : (uint8_t)picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr->static_config.encoder_bit_depth);
7390 : #endif
7391 :
7392 43975600 : model_rd_for_sb(
7393 : picture_control_set_ptr,
7394 : prediction_ptr,
7395 : md_context_ptr,
7396 : 0,
7397 : num_planes - 1,
7398 : &tmp_rate,
7399 : &tmp_dist,
7400 : hbd_mode_decision ? EB_10BIT : EB_8BIT);
7401 44012200 : tmp_rd = RDCOST(full_lambda_8b, tmp_rs + tmp_rate, tmp_dist);
7402 :
7403 44012200 : if (tmp_rd < rd) {
7404 4501350 : rd = tmp_rd;
7405 4501350 : switchable_rate = tmp_rs;
7406 4501350 : best_filters = /*mbmi*/candidate_buffer_ptr->candidate_ptr->interp_filters;
7407 4501350 : best_in_temp = !best_in_temp;
7408 : }
7409 : }
7410 : }
7411 :
7412 22066800 : /*mbmi*/candidate_buffer_ptr->candidate_ptr->interp_filters = best_filters;
7413 : }
7414 : else {
7415 348406 : candidate_buffer_ptr->candidate_ptr->interp_filters = 0;
7416 : }
7417 : }
7418 22414600 : }
7419 :
7420 198599000 : EbErrorType inter_pu_prediction_av1(
7421 : ModeDecisionContext *md_context_ptr,
7422 : PictureControlSet *picture_control_set_ptr,
7423 : ModeDecisionCandidateBuffer *candidate_buffer_ptr)
7424 : {
7425 198599000 : EbErrorType return_error = EB_ErrorNone;
7426 198599000 : EbPictureBufferDesc *ref_pic_list0 = (EbPictureBufferDesc*)EB_NULL;
7427 198599000 : EbPictureBufferDesc *ref_pic_list1 = (EbPictureBufferDesc*)EB_NULL;
7428 198599000 : ModeDecisionCandidate *const candidate_ptr = candidate_buffer_ptr->candidate_ptr;
7429 198599000 : SequenceControlSet* sequence_control_set_ptr = ((SequenceControlSet*)(picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr));
7430 :
7431 : Mv mv_0;
7432 : Mv mv_1;
7433 198599000 : mv_0.x = candidate_buffer_ptr->candidate_ptr->motion_vector_xl0;
7434 198599000 : mv_0.y = candidate_buffer_ptr->candidate_ptr->motion_vector_yl0;
7435 198599000 : mv_1.x = candidate_buffer_ptr->candidate_ptr->motion_vector_xl1;
7436 198599000 : mv_1.y = candidate_buffer_ptr->candidate_ptr->motion_vector_yl1;
7437 : MvUnit mv_unit;
7438 198599000 : mv_unit.pred_direction = candidate_buffer_ptr->candidate_ptr->prediction_direction[md_context_ptr->pu_itr];
7439 198599000 : mv_unit.mv[0] = mv_0;
7440 198599000 : mv_unit.mv[1] = mv_1;
7441 :
7442 198599000 : if (candidate_buffer_ptr->candidate_ptr->use_intrabc) {
7443 0 : if (!md_context_ptr->hbd_mode_decision)
7444 0 : ref_pic_list0 = ((EbReferenceObject*)picture_control_set_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture;
7445 : else
7446 0 : ref_pic_list0 = ((EbReferenceObject*)picture_control_set_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture16bit;
7447 :
7448 0 : av1_inter_prediction_function_table[md_context_ptr->hbd_mode_decision > EB_8_BIT_MD](
7449 : picture_control_set_ptr,
7450 0 : candidate_buffer_ptr->candidate_ptr->interp_filters,
7451 : md_context_ptr->cu_ptr,
7452 0 : candidate_buffer_ptr->candidate_ptr->ref_frame_type,
7453 : &mv_unit,
7454 : 1,//use_intrabc
7455 : #if OBMC_FLAG
7456 : SIMPLE_TRANSLATION,
7457 : 0,
7458 : 0,
7459 : #endif
7460 : 1,//1 for avg
7461 0 : &candidate_buffer_ptr->candidate_ptr->interinter_comp,
7462 : #if II_COMP_FLAG
7463 : NULL,
7464 : NULL,
7465 : NULL,
7466 : NULL,
7467 : 0,
7468 : 0,
7469 : 0,
7470 : 0,
7471 : #endif
7472 0 : md_context_ptr->cu_origin_x,
7473 0 : md_context_ptr->cu_origin_y,
7474 0 : md_context_ptr->blk_geom->bwidth,
7475 0 : md_context_ptr->blk_geom->bheight,
7476 : ref_pic_list0,
7477 : 0,//ref_pic_list1,
7478 : candidate_buffer_ptr->prediction_ptr,
7479 0 : md_context_ptr->blk_geom->origin_x,
7480 0 : md_context_ptr->blk_geom->origin_y,
7481 0 : md_context_ptr->chroma_level <= CHROMA_MODE_1 && md_context_ptr->md_staging_skip_inter_chroma_pred == EB_FALSE,
7482 0 : sequence_control_set_ptr->static_config.encoder_bit_depth);
7483 :
7484 0 : return return_error;
7485 : }
7486 :
7487 198599000 : int8_t ref_idx_l0 = candidate_buffer_ptr->candidate_ptr->ref_frame_index_l0;
7488 198599000 : int8_t ref_idx_l1 = candidate_buffer_ptr->candidate_ptr->ref_frame_index_l1;
7489 : MvReferenceFrame rf[2];
7490 198599000 : av1_set_ref_frame(rf, candidate_buffer_ptr->candidate_ptr->ref_frame_type);
7491 :
7492 : uint8_t list_idx0, list_idx1;
7493 199363000 : list_idx0 = get_list_idx(rf[0]);
7494 199541000 : if (rf[1] == NONE_FRAME)
7495 115795000 : list_idx1 = get_list_idx(rf[0]);
7496 : else
7497 83746000 : list_idx1 = get_list_idx(rf[1]);
7498 199058000 : assert(list_idx0 < MAX_NUM_OF_REF_PIC_LIST);
7499 199058000 : assert(list_idx1 < MAX_NUM_OF_REF_PIC_LIST);
7500 :
7501 199058000 : if (ref_idx_l0 >= 0) {
7502 153871000 : ref_pic_list0 = md_context_ptr->hbd_mode_decision ?
7503 0 : ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[list_idx0][ref_idx_l0]->object_ptr)->reference_picture16bit
7504 153871000 : : ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[list_idx0][ref_idx_l0]->object_ptr)->reference_picture;
7505 : }
7506 :
7507 199058000 : if (ref_idx_l1 >= 0) {
7508 129146000 : ref_pic_list1 = md_context_ptr->hbd_mode_decision ?
7509 0 : ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[list_idx1][ref_idx_l1]->object_ptr)->reference_picture16bit
7510 129146000 : : ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[list_idx1][ref_idx_l1]->object_ptr)->reference_picture;
7511 : }
7512 :
7513 199058000 : if (picture_control_set_ptr->parent_pcs_ptr->frm_hdr.allow_warped_motion
7514 134364000 : && candidate_ptr->motion_mode != WARPED_CAUSAL)
7515 : {
7516 130559000 : wm_count_samples(
7517 : md_context_ptr->cu_ptr,
7518 : md_context_ptr->blk_geom,
7519 130559000 : md_context_ptr->cu_origin_x,
7520 130559000 : md_context_ptr->cu_origin_y,
7521 130559000 : candidate_ptr->ref_frame_type,
7522 : picture_control_set_ptr,
7523 : &candidate_ptr->num_proj_ref);
7524 : }
7525 :
7526 198724000 : uint8_t bit_depth = EB_8BIT;
7527 198724000 : if (sequence_control_set_ptr->static_config.encoder_bit_depth > EB_8BIT && md_context_ptr->hbd_mode_decision)
7528 0 : bit_depth = sequence_control_set_ptr->static_config.encoder_bit_depth;
7529 :
7530 :
7531 198724000 : if (candidate_ptr->motion_mode == WARPED_CAUSAL) {
7532 5053160 : assert(ref_pic_list0 != NULL);
7533 :
7534 5053160 : warped_motion_prediction(
7535 : picture_control_set_ptr,
7536 : &mv_unit,
7537 5053160 : candidate_ptr->ref_frame_type,
7538 5053160 : candidate_ptr->compound_idx,
7539 : &candidate_ptr->interinter_comp,
7540 5053160 : md_context_ptr->cu_origin_x,
7541 5053160 : md_context_ptr->cu_origin_y,
7542 : md_context_ptr->cu_ptr,
7543 : md_context_ptr->blk_geom,
7544 : ref_pic_list0,
7545 : ref_pic_list1,
7546 : candidate_buffer_ptr->prediction_ptr,
7547 5053160 : md_context_ptr->blk_geom->origin_x,
7548 5053160 : md_context_ptr->blk_geom->origin_y,
7549 : &candidate_ptr->wm_params_l0,
7550 : &candidate_ptr->wm_params_l1,
7551 : bit_depth,
7552 5053160 : md_context_ptr->chroma_level <= CHROMA_MODE_1 && md_context_ptr->md_staging_skip_inter_chroma_pred == EB_FALSE);
7553 :
7554 5053420 : return return_error;
7555 : }
7556 :
7557 :
7558 193671000 : if (picture_control_set_ptr->parent_pcs_ptr->interpolation_search_level == IT_SEARCH_OFF)
7559 619811 : candidate_buffer_ptr->candidate_ptr->interp_filters = 0;
7560 : else {
7561 :
7562 193051000 : if (md_context_ptr->md_staging_skip_interpolation_search == EB_FALSE) {
7563 26466200 : uint16_t capped_size = md_context_ptr->interpolation_filter_search_blk_size == 0 ? 4 :
7564 0 : md_context_ptr->interpolation_filter_search_blk_size == 1 ? 8 : 16 ;
7565 :
7566 26466200 : if (md_context_ptr->blk_geom->bwidth > capped_size && md_context_ptr->blk_geom->bheight > capped_size)
7567 : #if IFS_8BIT_MD
7568 : {
7569 22354600 : if (md_context_ptr->hbd_mode_decision == EB_DUAL_BIT_MD) {
7570 :
7571 0 : if (ref_idx_l0 >= 0)
7572 0 : ref_pic_list0 = ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[list_idx0][ref_idx_l0]->object_ptr)->reference_picture;
7573 :
7574 0 : if (ref_idx_l1 >= 0)
7575 0 : ref_pic_list1 = ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[list_idx1][ref_idx_l1]->object_ptr)->reference_picture;
7576 : }
7577 : #endif
7578 22354600 : interpolation_filter_search(
7579 : picture_control_set_ptr,
7580 : candidate_buffer_ptr->prediction_ptr_temp,
7581 : md_context_ptr,
7582 : candidate_buffer_ptr,
7583 : mv_unit,
7584 : ref_pic_list0,
7585 : ref_pic_list1,
7586 22354600 : md_context_ptr->hbd_mode_decision == EB_DUAL_BIT_MD ? EB_8_BIT_MD: md_context_ptr->hbd_mode_decision,
7587 : bit_depth);
7588 : #if IFS_8BIT_MD
7589 22359900 : if (md_context_ptr->hbd_mode_decision == EB_DUAL_BIT_MD) {
7590 0 : if (ref_idx_l0 >= 0)
7591 0 : ref_pic_list0 = ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[list_idx0][ref_idx_l0]->object_ptr)->reference_picture16bit;
7592 :
7593 0 : if (ref_idx_l1 >= 0)
7594 0 : ref_pic_list1 = ((EbReferenceObject*)picture_control_set_ptr->ref_pic_ptr_array[list_idx1][ref_idx_l1]->object_ptr)->reference_picture16bit;
7595 : }
7596 : }
7597 : #endif
7598 : }
7599 : }
7600 :
7601 : NeighborArrayUnit *luma_recon_neighbor_array;
7602 : NeighborArrayUnit *cb_recon_neighbor_array;
7603 : NeighborArrayUnit *cr_recon_neighbor_array;
7604 :
7605 193676000 : if (!md_context_ptr->hbd_mode_decision) {
7606 193713000 : luma_recon_neighbor_array = md_context_ptr->luma_recon_neighbor_array;
7607 193713000 : cb_recon_neighbor_array = md_context_ptr->cb_recon_neighbor_array;
7608 193713000 : cr_recon_neighbor_array = md_context_ptr->cr_recon_neighbor_array;
7609 : }
7610 : else {
7611 0 : luma_recon_neighbor_array = md_context_ptr->luma_recon_neighbor_array16bit;
7612 0 : cb_recon_neighbor_array = md_context_ptr->cb_recon_neighbor_array16bit;
7613 0 : cr_recon_neighbor_array = md_context_ptr->cr_recon_neighbor_array16bit;
7614 :
7615 : }
7616 :
7617 387352000 : av1_inter_prediction_function_table[md_context_ptr->hbd_mode_decision > EB_8_BIT_MD](
7618 : picture_control_set_ptr,
7619 193676000 : candidate_buffer_ptr->candidate_ptr->interp_filters,
7620 : md_context_ptr->cu_ptr,
7621 193676000 : candidate_buffer_ptr->candidate_ptr->ref_frame_type,
7622 : &mv_unit,
7623 193676000 : candidate_buffer_ptr->candidate_ptr->use_intrabc,
7624 : #if OBMC_FLAG
7625 193676000 : candidate_buffer_ptr->candidate_ptr->motion_mode,//MD
7626 : 1,
7627 : md_context_ptr,
7628 : #endif
7629 193676000 : candidate_buffer_ptr->candidate_ptr->compound_idx,
7630 193676000 : &candidate_buffer_ptr->candidate_ptr->interinter_comp,
7631 : #if II_COMP_FLAG
7632 193676000 : &md_context_ptr->sb_ptr->tile_info,
7633 : luma_recon_neighbor_array,
7634 : cb_recon_neighbor_array,
7635 : cr_recon_neighbor_array,
7636 193676000 : candidate_ptr->is_interintra_used,
7637 193676000 : candidate_ptr->interintra_mode,
7638 193676000 : candidate_ptr->use_wedge_interintra,
7639 : candidate_ptr->interintra_wedge_index,
7640 : #endif
7641 193676000 : md_context_ptr->cu_origin_x,
7642 193676000 : md_context_ptr->cu_origin_y,
7643 193676000 : md_context_ptr->blk_geom->bwidth,
7644 193676000 : md_context_ptr->blk_geom->bheight,
7645 : ref_pic_list0,
7646 : ref_pic_list1,
7647 : candidate_buffer_ptr->prediction_ptr,
7648 193676000 : md_context_ptr->blk_geom->origin_x,
7649 193676000 : md_context_ptr->blk_geom->origin_y,
7650 193676000 : md_context_ptr->chroma_level <= CHROMA_MODE_1 && md_context_ptr->md_staging_skip_inter_chroma_pred == EB_FALSE,
7651 193676000 : sequence_control_set_ptr->static_config.encoder_bit_depth);
7652 :
7653 193619000 : return return_error;
7654 : }
7655 :
7656 : /***************************************************
7657 : * PreLoad Reference Block for 16bit mode
7658 : ***************************************************/
7659 0 : void UnPackReferenceLumaBlock(
7660 : EbPictureBufferDesc *refFramePic,
7661 : uint32_t pos_x,
7662 : uint32_t pos_y,
7663 : uint32_t pu_width,
7664 : uint32_t pu_height,
7665 : EbPictureBufferDesc *dst,
7666 : EbBool sub_pred)
7667 : {
7668 0 : pu_width += 4;
7669 0 : pu_height += 4;
7670 0 : uint32_t inPosx = (pos_x >> 2) - 2;
7671 0 : uint32_t inPosy = (pos_y >> 2) - 2;
7672 0 : uint16_t *ptr16 = (uint16_t *)refFramePic->buffer_y + inPosx + inPosy * refFramePic->stride_y;
7673 :
7674 0 : extract8_bitdata_safe_sub(
7675 : ptr16,
7676 0 : refFramePic->stride_y << sub_pred,
7677 : dst->buffer_y,
7678 0 : dst->stride_y << sub_pred,
7679 : pu_width,
7680 : pu_height >> sub_pred,
7681 : sub_pred
7682 : );
7683 0 : }
7684 :
7685 : /** choose_mvp_idx_v2 function is used to choose the best AMVP candidate.
7686 : @param *candidate_ptr(output)
7687 : candidate_ptr points to the prediction result.
7688 : @param cu_ptr(input)
7689 : pointer to the CU where the target PU belongs to.
7690 : @param *pu_index(input)
7691 : the index of the PU inside a CU
7692 : @param ref0AMVPCandArray(input)
7693 : @param ref0_num_available_amvp_cand(input)
7694 : @param ref1AMVPCandArray(input)
7695 : @param ref1NumAvailableAMVPCand(input)
7696 : */
7697 0 : EbErrorType choose_mvp_idx_v2(
7698 : ModeDecisionCandidate *candidate_ptr,
7699 : uint32_t cu_origin_x,
7700 : uint32_t cu_origin_y,
7701 : uint32_t pu_index,
7702 : uint32_t tb_size,
7703 : int16_t *ref0_amvp_cand_array_x,
7704 : int16_t *ref0_amvp_cand_array_y,
7705 : uint32_t ref0_num_available_amvp_cand,
7706 : int16_t *ref1_amvp_cand_array_x,
7707 : int16_t *ref1_amvp_cand_array_y,
7708 : uint32_t ref1NumAvailableAMVPCand,
7709 : PictureControlSet *picture_control_set_ptr)
7710 : {
7711 0 : EbErrorType return_error = EB_ErrorNone;
7712 : uint8_t mvpRef0Idx;
7713 : uint8_t mvpRef1Idx;
7714 :
7715 0 : uint32_t picture_width = ((SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr)->seq_header.max_frame_width;
7716 0 : uint32_t picture_height = ((SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr)->seq_header.max_frame_height;
7717 :
7718 : uint32_t mvd0, mvd1;
7719 :
7720 0 : switch (candidate_ptr->prediction_direction[pu_index]) {
7721 0 : case UNI_PRED_LIST_0:
7722 : // Clip the input MV
7723 0 : clip_mv(
7724 : cu_origin_x,
7725 : cu_origin_y,
7726 : &candidate_ptr->motion_vector_xl0,
7727 : &candidate_ptr->motion_vector_yl0,
7728 : picture_width,
7729 : picture_height,
7730 : tb_size);
7731 :
7732 : // Choose the AMVP candidate
7733 : switch (ref0_num_available_amvp_cand) {
7734 0 : case 0:
7735 : case 1:
7736 : //mvpRef0Idx = 0;
7737 0 : candidate_ptr->motion_vector_pred_idx[REF_LIST_0] = 0;
7738 0 : candidate_ptr->motion_vector_pred_x[REF_LIST_0] = ref0_amvp_cand_array_x[0];
7739 0 : candidate_ptr->motion_vector_pred_y[REF_LIST_0] = ref0_amvp_cand_array_y[0];
7740 0 : break;
7741 0 : case 2:
7742 :
7743 0 : mvd0 = EB_ABS_DIFF(ref0_amvp_cand_array_x[0], candidate_ptr->motion_vector_xl0) +
7744 0 : EB_ABS_DIFF(ref0_amvp_cand_array_y[0], candidate_ptr->motion_vector_yl0);
7745 :
7746 0 : mvd1 = EB_ABS_DIFF(ref0_amvp_cand_array_x[1], candidate_ptr->motion_vector_xl0) +
7747 0 : EB_ABS_DIFF(ref0_amvp_cand_array_y[1], candidate_ptr->motion_vector_yl0);
7748 :
7749 0 : mvpRef0Idx = ((mvd0) <= (mvd1)) ? 0 : 1;
7750 :
7751 0 : candidate_ptr->motion_vector_pred_idx[REF_LIST_0] = mvpRef0Idx;
7752 0 : candidate_ptr->motion_vector_pred_x[REF_LIST_0] = ref0_amvp_cand_array_x[mvpRef0Idx];
7753 0 : candidate_ptr->motion_vector_pred_y[REF_LIST_0] = ref0_amvp_cand_array_y[mvpRef0Idx];
7754 0 : break;
7755 0 : default:
7756 0 : break;
7757 : }
7758 :
7759 0 : break;
7760 :
7761 0 : case UNI_PRED_LIST_1:
7762 :
7763 : // Clip the input MV
7764 0 : clip_mv(
7765 : cu_origin_x,
7766 : cu_origin_y,
7767 : &candidate_ptr->motion_vector_xl1,
7768 : &candidate_ptr->motion_vector_yl1,
7769 : picture_width,
7770 : picture_height,
7771 : tb_size);
7772 :
7773 : // Choose the AMVP candidate
7774 : switch (ref1NumAvailableAMVPCand) {
7775 0 : case 0:
7776 : case 1:
7777 : //mvpRef1Idx = 0;
7778 0 : candidate_ptr->motion_vector_pred_idx[REF_LIST_1] = 0;
7779 0 : candidate_ptr->motion_vector_pred_x[REF_LIST_1] = ref1_amvp_cand_array_x[0];
7780 0 : candidate_ptr->motion_vector_pred_y[REF_LIST_1] = ref1_amvp_cand_array_y[0];
7781 0 : break;
7782 0 : case 2:
7783 :
7784 0 : mvd0 = EB_ABS_DIFF(ref1_amvp_cand_array_x[0], candidate_ptr->motion_vector_xl1) +
7785 0 : EB_ABS_DIFF(ref1_amvp_cand_array_y[0], candidate_ptr->motion_vector_yl1);
7786 :
7787 0 : mvd1 = EB_ABS_DIFF(ref1_amvp_cand_array_x[1], candidate_ptr->motion_vector_xl1) +
7788 0 : EB_ABS_DIFF(ref1_amvp_cand_array_y[1], candidate_ptr->motion_vector_yl1);
7789 :
7790 0 : mvpRef1Idx = ((mvd0) <= (mvd1)) ? 0 : 1;
7791 :
7792 0 : candidate_ptr->motion_vector_pred_idx[REF_LIST_1] = mvpRef1Idx;
7793 0 : candidate_ptr->motion_vector_pred_x[REF_LIST_1] = ref1_amvp_cand_array_x[mvpRef1Idx];
7794 0 : candidate_ptr->motion_vector_pred_y[REF_LIST_1] = ref1_amvp_cand_array_y[mvpRef1Idx];
7795 0 : break;
7796 0 : default:
7797 0 : break;
7798 : }
7799 :
7800 : // MVP in ref_pic_list0
7801 : //mvpRef0Idx = 0;
7802 : //candidate_ptr->motion_vector_pred_idx[REF_LIST_0][pu_index] = mvpRef0Idx;
7803 : //candidate_ptr->motion_vector_pred_x[REF_LIST_0][pu_index] = 0;
7804 : //candidate_ptr->motion_vector_pred_y[REF_LIST_0][pu_index] = 0;
7805 :
7806 0 : break;
7807 :
7808 0 : case BI_PRED:
7809 :
7810 : // Choose the MVP in list0
7811 : // Clip the input MV
7812 0 : clip_mv(
7813 : cu_origin_x,
7814 : cu_origin_y,
7815 : &candidate_ptr->motion_vector_xl0,
7816 : &candidate_ptr->motion_vector_yl0,
7817 : picture_width,
7818 : picture_height,
7819 : tb_size);
7820 :
7821 : // Choose the AMVP candidate
7822 : switch (ref0_num_available_amvp_cand) {
7823 0 : case 0:
7824 : case 1:
7825 : //mvpRef0Idx = 0;
7826 0 : candidate_ptr->motion_vector_pred_idx[REF_LIST_0] = 0;
7827 0 : candidate_ptr->motion_vector_pred_x[REF_LIST_0] = ref0_amvp_cand_array_x[0];
7828 0 : candidate_ptr->motion_vector_pred_y[REF_LIST_0] = ref0_amvp_cand_array_y[0];
7829 0 : break;
7830 0 : case 2:
7831 :
7832 0 : mvd0 = EB_ABS_DIFF(ref0_amvp_cand_array_x[0], candidate_ptr->motion_vector_xl0) +
7833 0 : EB_ABS_DIFF(ref0_amvp_cand_array_y[0], candidate_ptr->motion_vector_yl0);
7834 :
7835 0 : mvd1 = EB_ABS_DIFF(ref0_amvp_cand_array_x[1], candidate_ptr->motion_vector_xl0) +
7836 0 : EB_ABS_DIFF(ref0_amvp_cand_array_y[1], candidate_ptr->motion_vector_yl0);
7837 :
7838 0 : mvpRef0Idx = ((mvd0) <= (mvd1)) ? 0 : 1;
7839 :
7840 0 : candidate_ptr->motion_vector_pred_idx[REF_LIST_0] = mvpRef0Idx;
7841 0 : candidate_ptr->motion_vector_pred_x[REF_LIST_0] = ref0_amvp_cand_array_x[mvpRef0Idx];
7842 0 : candidate_ptr->motion_vector_pred_y[REF_LIST_0] = ref0_amvp_cand_array_y[mvpRef0Idx];
7843 0 : break;
7844 0 : default:
7845 0 : break;
7846 : }
7847 :
7848 : // Choose the MVP in list1
7849 : // Clip the input MV
7850 0 : clip_mv(
7851 : cu_origin_x,
7852 : cu_origin_y,
7853 : &candidate_ptr->motion_vector_xl1,
7854 : &candidate_ptr->motion_vector_yl1,
7855 : picture_width,
7856 : picture_height,
7857 : tb_size);
7858 :
7859 : // Choose the AMVP candidate
7860 : switch (ref1NumAvailableAMVPCand) {
7861 0 : case 0:
7862 : case 1:
7863 : //mvpRef1Idx = 0;
7864 0 : candidate_ptr->motion_vector_pred_idx[REF_LIST_1] = 0;
7865 0 : candidate_ptr->motion_vector_pred_x[REF_LIST_1] = ref1_amvp_cand_array_x[0];
7866 0 : candidate_ptr->motion_vector_pred_y[REF_LIST_1] = ref1_amvp_cand_array_y[0];
7867 0 : break;
7868 0 : case 2:
7869 :
7870 0 : mvd0 = EB_ABS_DIFF(ref1_amvp_cand_array_x[0], candidate_ptr->motion_vector_xl1) +
7871 0 : EB_ABS_DIFF(ref1_amvp_cand_array_y[0], candidate_ptr->motion_vector_yl1);
7872 :
7873 0 : mvd1 = EB_ABS_DIFF(ref1_amvp_cand_array_x[1], candidate_ptr->motion_vector_xl1) +
7874 0 : EB_ABS_DIFF(ref1_amvp_cand_array_y[1], candidate_ptr->motion_vector_yl1);
7875 :
7876 0 : mvpRef1Idx = ((mvd0) <= (mvd1)) ? 0 : 1;
7877 :
7878 0 : candidate_ptr->motion_vector_pred_idx[REF_LIST_1] = mvpRef1Idx;
7879 0 : candidate_ptr->motion_vector_pred_x[REF_LIST_1] = ref1_amvp_cand_array_x[mvpRef1Idx];
7880 0 : candidate_ptr->motion_vector_pred_y[REF_LIST_1] = ref1_amvp_cand_array_y[mvpRef1Idx];
7881 0 : break;
7882 0 : default:
7883 0 : break;
7884 : }
7885 :
7886 0 : break;
7887 :
7888 0 : default:
7889 0 : break;
7890 : }
7891 :
7892 0 : return return_error;
7893 : }
|