Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include <limits.h>
13 : #include <math.h>
14 : #include <stdio.h>
15 : #include "EbDefinitions.h"
16 : #include "EbCodingUnit.h"
17 : #include "av1me.h"
18 : #include "EbPictureControlSet.h"
19 : #include "EbSequenceControlSet.h"
20 : #include "EbComputeSAD.h"
21 : #include "aom_dsp_rtcd.h"
22 : #if OBMC_FLAG
23 : #include "EbModeDecisionProcess.h"
24 : #endif
25 :
26 : #include "EbAdaptiveMotionVectorPrediction.h"
27 :
28 : int av1_is_dv_valid(const MV dv,
29 : const MacroBlockD *xd, int mi_row, int mi_col,
30 : BlockSize bsize, int mib_size_log2);
31 :
32 : typedef struct dist_wtd_comp_params {
33 : int use_dist_wtd_comp_avg;
34 : int fwd_offset;
35 : int bck_offset;
36 : } DIST_WTD_COMP_PARAMS;
37 :
38 : typedef unsigned int(*aom_sad_avg_fn_t)(const uint8_t *a, int a_stride,
39 : const uint8_t *b, int b_stride,
40 : const uint8_t *second_pred);
41 :
42 : typedef void(*aom_copy32xn_fn_t)(const uint8_t *a, int a_stride, uint8_t *b,
43 : int b_stride, int n);
44 :
45 : typedef unsigned int(*aom_subpixvariance_fn_t)(const uint8_t *a, int a_stride,
46 : int xoffset, int yoffset,
47 : const uint8_t *b, int b_stride,
48 : unsigned int *sse);
49 :
50 : typedef unsigned int(*aom_subp_avg_variance_fn_t)(
51 : const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
52 : int b_stride, unsigned int *sse, const uint8_t *second_pred);
53 :
54 : typedef unsigned int(*aom_dist_wtd_sad_avg_fn_t)(
55 : const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,
56 : const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param);
57 :
58 : typedef unsigned int(*aom_dist_wtd_subp_avg_variance_fn_t)(
59 : const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
60 : int b_stride, unsigned int *sse, const uint8_t *second_pred,
61 : const DIST_WTD_COMP_PARAMS *jcp_param);
62 :
63 : typedef unsigned int(*aom_masked_sad_fn_t)(const uint8_t *src, int src_stride,
64 : const uint8_t *ref, int ref_stride,
65 : const uint8_t *second_pred,
66 : const uint8_t *msk, int msk_stride,
67 : int invert_mask);
68 : typedef unsigned int(*aom_masked_subpixvariance_fn_t)(
69 : const uint8_t *src, int src_stride, int xoffset, int yoffset,
70 : const uint8_t *ref, int ref_stride, const uint8_t *second_pred,
71 : const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse);
72 :
73 : int eb_av1_refining_search_sad(IntraBcContext *x, MV *ref_mv, int error_per_bit,
74 : int search_range,
75 : const aom_variance_fn_ptr_t *fn_ptr,
76 : const MV *center_mv);
77 :
78 : aom_variance_fn_ptr_t mefn_ptr[BlockSizeS_ALL];
79 :
80 2 : void init_fn_ptr(void)
81 : {
82 : #define BFP0(BT, SDF, VF, SDX4DF) \
83 : mefn_ptr[BT].sdf = SDF; \
84 : mefn_ptr[BT].vf = VF; \
85 : mefn_ptr[BT].sdx4df = SDX4DF;
86 :
87 2 : BFP0(BLOCK_4X16, eb_aom_sad4x16, eb_aom_variance4x16, eb_aom_sad4x16x4d)
88 2 : BFP0(BLOCK_16X4, eb_aom_sad16x4, eb_aom_variance16x4, eb_aom_sad16x4x4d)
89 2 : BFP0(BLOCK_8X32, eb_aom_sad8x32, eb_aom_variance8x32, eb_aom_sad8x32x4d)
90 2 : BFP0(BLOCK_32X8, eb_aom_sad32x8, eb_aom_variance32x8, eb_aom_sad32x8x4d)
91 2 : BFP0(BLOCK_16X64, eb_aom_sad16x64, eb_aom_variance16x64, eb_aom_sad16x64x4d)
92 2 : BFP0(BLOCK_64X16, eb_aom_sad64x16, eb_aom_variance64x16, eb_aom_sad64x16x4d)
93 2 : BFP0(BLOCK_128X128, eb_aom_sad128x128, eb_aom_variance128x128, eb_aom_sad128x128x4d)
94 2 : BFP0(BLOCK_128X64, eb_aom_sad128x64, eb_aom_variance128x64, eb_aom_sad128x64x4d)
95 2 : BFP0(BLOCK_64X128, eb_aom_sad64x128, eb_aom_variance64x128, eb_aom_sad64x128x4d)
96 2 : BFP0(BLOCK_32X16, eb_aom_sad32x16, eb_aom_variance32x16, eb_aom_sad32x16x4d)
97 2 : BFP0(BLOCK_16X32, eb_aom_sad16x32, eb_aom_variance16x32, eb_aom_sad16x32x4d)
98 2 : BFP0(BLOCK_64X32, eb_aom_sad64x32, eb_aom_variance64x32, eb_aom_sad64x32x4d)
99 2 : BFP0(BLOCK_32X64, eb_aom_sad32x64, eb_aom_variance32x64, eb_aom_sad32x64x4d)
100 2 : BFP0(BLOCK_32X32, eb_aom_sad32x32, eb_aom_variance32x32, eb_aom_sad32x32x4d)
101 2 : BFP0(BLOCK_64X64, eb_aom_sad64x64, eb_aom_variance64x64, eb_aom_sad64x64x4d)
102 2 : BFP0(BLOCK_16X16, eb_aom_sad16x16, eb_aom_variance16x16, eb_aom_sad16x16x4d)
103 2 : BFP0(BLOCK_16X8, eb_aom_sad16x8, eb_aom_variance16x8, eb_aom_sad16x8x4d)
104 2 : BFP0(BLOCK_8X16, eb_aom_sad8x16, eb_aom_variance8x16, eb_aom_sad8x16x4d)
105 2 : BFP0(BLOCK_8X8, eb_aom_sad8x8, eb_aom_variance8x8, eb_aom_sad8x8x4d)
106 2 : BFP0(BLOCK_8X4, eb_aom_sad8x4, eb_aom_variance8x4, eb_aom_sad8x4x4d)
107 2 : BFP0(BLOCK_4X8, eb_aom_sad4x8, eb_aom_variance4x8, eb_aom_sad4x8x4d)
108 2 : BFP0(BLOCK_4X4, eb_aom_sad4x4, eb_aom_variance4x4, eb_aom_sad4x4x4d)
109 : #if OBMC_FLAG
110 : #define OBFP(BT, OSDF, OVF, OSVF) \
111 : mefn_ptr[BT].osdf = OSDF; \
112 : mefn_ptr[BT].ovf = OVF; \
113 : mefn_ptr[BT].osvf = OSVF;
114 2 : OBFP(BLOCK_128X128, aom_obmc_sad128x128, aom_obmc_variance128x128,
115 : aom_obmc_sub_pixel_variance128x128)
116 2 : OBFP(BLOCK_128X64, aom_obmc_sad128x64, aom_obmc_variance128x64,
117 : aom_obmc_sub_pixel_variance128x64)
118 2 : OBFP(BLOCK_64X128, aom_obmc_sad64x128, aom_obmc_variance64x128,
119 : aom_obmc_sub_pixel_variance64x128)
120 2 : OBFP(BLOCK_64X64, aom_obmc_sad64x64, aom_obmc_variance64x64,
121 : aom_obmc_sub_pixel_variance64x64)
122 2 : OBFP(BLOCK_64X32, aom_obmc_sad64x32, aom_obmc_variance64x32,
123 : aom_obmc_sub_pixel_variance64x32)
124 2 : OBFP(BLOCK_32X64, aom_obmc_sad32x64, aom_obmc_variance32x64,
125 : aom_obmc_sub_pixel_variance32x64)
126 2 : OBFP(BLOCK_32X32, aom_obmc_sad32x32, aom_obmc_variance32x32,
127 : aom_obmc_sub_pixel_variance32x32)
128 2 : OBFP(BLOCK_32X16, aom_obmc_sad32x16, aom_obmc_variance32x16,
129 : aom_obmc_sub_pixel_variance32x16)
130 2 : OBFP(BLOCK_16X32, aom_obmc_sad16x32, aom_obmc_variance16x32,
131 : aom_obmc_sub_pixel_variance16x32)
132 2 : OBFP(BLOCK_16X16, aom_obmc_sad16x16, aom_obmc_variance16x16,
133 : aom_obmc_sub_pixel_variance16x16)
134 2 : OBFP(BLOCK_16X8, aom_obmc_sad16x8, aom_obmc_variance16x8,
135 : aom_obmc_sub_pixel_variance16x8)
136 2 : OBFP(BLOCK_8X16, aom_obmc_sad8x16, aom_obmc_variance8x16,
137 : aom_obmc_sub_pixel_variance8x16)
138 2 : OBFP(BLOCK_8X8, aom_obmc_sad8x8, aom_obmc_variance8x8,
139 : aom_obmc_sub_pixel_variance8x8)
140 2 : OBFP(BLOCK_4X8, aom_obmc_sad4x8, aom_obmc_variance4x8,
141 : aom_obmc_sub_pixel_variance4x8)
142 2 : OBFP(BLOCK_8X4, aom_obmc_sad8x4, aom_obmc_variance8x4,
143 : aom_obmc_sub_pixel_variance8x4)
144 2 : OBFP(BLOCK_4X4, aom_obmc_sad4x4, aom_obmc_variance4x4,
145 : aom_obmc_sub_pixel_variance4x4)
146 2 : OBFP(BLOCK_4X16, aom_obmc_sad4x16, aom_obmc_variance4x16,
147 : aom_obmc_sub_pixel_variance4x16)
148 2 : OBFP(BLOCK_16X4, aom_obmc_sad16x4, aom_obmc_variance16x4,
149 : aom_obmc_sub_pixel_variance16x4)
150 2 : OBFP(BLOCK_8X32, aom_obmc_sad8x32, aom_obmc_variance8x32,
151 : aom_obmc_sub_pixel_variance8x32)
152 2 : OBFP(BLOCK_32X8, aom_obmc_sad32x8, aom_obmc_variance32x8,
153 : aom_obmc_sub_pixel_variance32x8)
154 2 : OBFP(BLOCK_16X64, aom_obmc_sad16x64, aom_obmc_variance16x64,
155 : aom_obmc_sub_pixel_variance16x64)
156 2 : OBFP(BLOCK_64X16, aom_obmc_sad64x16, aom_obmc_variance64x16,
157 : aom_obmc_sub_pixel_variance64x16)
158 : #endif
159 2 : }
160 :
161 : // #define NEW_DIAMOND_SEARCH
162 :
163 18143600 : static INLINE const uint8_t *get_buf_from_mv(const struct Buf2D *buf,
164 : const MV *mv) {
165 18143600 : return &buf->buf[mv->row * buf->stride + mv->col];
166 : }
167 :
168 1781160 : void eb_av1_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
169 1781160 : int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
170 1781160 : int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
171 1781160 : int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
172 1781160 : int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
173 :
174 1781160 : col_min = AOMMAX(col_min, (MV_LOW >> 3) + 1);
175 1781160 : row_min = AOMMAX(row_min, (MV_LOW >> 3) + 1);
176 1781160 : col_max = AOMMIN(col_max, (MV_UPP >> 3) - 1);
177 1781160 : row_max = AOMMIN(row_max, (MV_UPP >> 3) - 1);
178 :
179 : // Get intersection of UMV window and valid MV window to reduce # of checks
180 : // in diamond search.
181 1781160 : if (mv_limits->col_min < col_min) mv_limits->col_min = col_min;
182 1781160 : if (mv_limits->col_max > col_max) mv_limits->col_max = col_max;
183 1781160 : if (mv_limits->row_min < row_min) mv_limits->row_min = row_min;
184 1781160 : if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
185 1781160 : }
186 :
187 : MvJointType av1_get_mv_joint(const MV *mv);
188 :
189 47286800 : static INLINE int mv_cost(const MV *mv, const int *joint_cost,
190 : int *const comp_cost[2]) {
191 47286800 : return joint_cost[av1_get_mv_joint(mv)] + comp_cost[0][mv->row] +
192 47283100 : comp_cost[1][mv->col];
193 : }
194 :
195 : #define PIXEL_TRANSFORM_ERROR_SCALE 4
196 38352800 : static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
197 : int *mvcost[2], int error_per_bit) {
198 38352800 : if (mvcost) {
199 38353000 : const MV diff = { mv->row - ref->row, mv->col - ref->col };
200 38353000 : return (int)ROUND_POWER_OF_TWO_64(
201 : (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
202 : RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT +
203 : PIXEL_TRANSFORM_ERROR_SCALE);
204 : }
205 0 : return 0;
206 : }
207 :
208 8958030 : static int mvsad_err_cost(const IntraBcContext *x, const MV *mv, const MV *ref,
209 : int sad_per_bit) {
210 8958030 : const MV diff = { (mv->row - ref->row) * 8, (mv->col - ref->col) * 8 };
211 8958030 : return ROUND_POWER_OF_TWO(
212 : (unsigned)mv_cost(&diff, x->nmv_vec_cost, x->mv_cost_stack) * sad_per_bit,
213 : AV1_PROB_COST_SHIFT);
214 : }
215 :
216 0 : void eb_av1_init3smotion_compensation(SearchSiteConfig *cfg, int stride) {
217 0 : int len, ss_count = 1;
218 :
219 0 : cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
220 0 : cfg->ss[0].offset = 0;
221 :
222 0 : for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
223 : // Generate offsets for 8 search sites per step.
224 0 : const MV ss_mvs[8] = { { -len, 0 }, { len, 0 }, { 0, -len },
225 : { 0, len }, { -len, -len }, { -len, len },
226 : { len, -len }, { len, len } };
227 : int i;
228 0 : for (i = 0; i < 8; ++i) {
229 0 : search_site *const ss = &cfg->ss[ss_count++];
230 0 : ss->mv = ss_mvs[i];
231 0 : ss->offset = ss->mv.row * stride + ss->mv.col;
232 : }
233 : }
234 :
235 0 : cfg->ss_count = ss_count;
236 0 : cfg->searches_per_step = 8;
237 0 : }
238 :
239 14605100 : static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) {
240 14593500 : return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) &&
241 29198600 : (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max);
242 : }
243 : #if !OBMC_FLAG
244 : #define CHECK_BETTER \
245 : { \
246 : if (thissad < bestsad) { \
247 : if (use_mvcost) \
248 : thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
249 : if (thissad < bestsad) { \
250 : bestsad = thissad; \
251 : best_site = i; \
252 : } \
253 : } \
254 : }
255 : #endif
256 : #define MAX_PATTERN_SCALES 11
257 : #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale
258 : #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates
259 :
260 0 : int eb_av1_get_mvpred_var(const IntraBcContext *x, const MV *best_mv,
261 : const MV *center_mv, const aom_variance_fn_ptr_t *vfp,
262 : int use_mvcost) {
263 0 : const struct Buf2D *const what = &x->plane[0].src;
264 0 : const struct Buf2D *const in_what = &x->xdplane[0].pre[0];
265 0 : const MV mv = { best_mv->row * 8, best_mv->col * 8 };
266 : unsigned int unused;
267 :
268 0 : return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
269 0 : in_what->stride, &unused) +
270 0 : (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmv_vec_cost,
271 : x->mv_cost_stack, x->errorperbit)
272 0 : : 0);
273 : }
274 :
275 : // Exhuastive motion search around a given centre position with a given
276 : // step size.
277 0 : static int exhuastive_mesh_search(IntraBcContext *x, MV *ref_mv, MV *best_mv,
278 : int range, int step, int sad_per_bit,
279 : const aom_variance_fn_ptr_t *fn_ptr,
280 : const MV *center_mv) {
281 0 : const struct Buf2D *const what = &x->plane[0].src;
282 0 : const struct Buf2D *const in_what = &x->xdplane[0].pre[0];
283 0 : MV fcenter_mv = { center_mv->row, center_mv->col };
284 0 : unsigned int best_sad = INT_MAX;
285 : int r, c, i;
286 : int start_col, end_col, start_row, end_row;
287 0 : int col_step = (step > 1) ? step : 4;
288 :
289 0 : assert(step >= 1);
290 :
291 0 : clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
292 : x->mv_limits.row_min, x->mv_limits.row_max);
293 0 : *best_mv = fcenter_mv;
294 0 : best_sad =
295 0 : fn_ptr->sdf(what->buf, what->stride,
296 : get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
297 0 : mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
298 0 : start_row = AOMMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
299 0 : start_col = AOMMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
300 0 : end_row = AOMMIN(range, x->mv_limits.row_max - fcenter_mv.row);
301 0 : end_col = AOMMIN(range, x->mv_limits.col_max - fcenter_mv.col);
302 :
303 0 : for (r = start_row; r <= end_row; r += step) {
304 0 : for (c = start_col; c <= end_col; c += col_step) {
305 : // Step > 1 means we are not checking every location in this pass.
306 0 : if (step > 1) {
307 0 : const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
308 : unsigned int sad =
309 0 : fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
310 : in_what->stride);
311 0 : if (sad < best_sad) {
312 0 : sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
313 0 : if (sad < best_sad) {
314 0 : best_sad = sad;
315 0 : x->second_best_mv.as_mv = *best_mv;
316 0 : *best_mv = mv;
317 : }
318 : }
319 : } else {
320 : // 4 sads in a single call if we are checking every location
321 0 : if (c + 3 <= end_col) {
322 : unsigned int sads[4];
323 : const uint8_t *addrs[4];
324 0 : for (i = 0; i < 4; ++i) {
325 0 : const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
326 0 : addrs[i] = get_buf_from_mv(in_what, &mv);
327 : }
328 0 : fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
329 :
330 0 : for (i = 0; i < 4; ++i) {
331 0 : if (sads[i] < best_sad) {
332 0 : const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
333 0 : const unsigned int sad =
334 0 : sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
335 0 : if (sad < best_sad) {
336 0 : best_sad = sad;
337 0 : x->second_best_mv.as_mv = *best_mv;
338 0 : *best_mv = mv;
339 : }
340 : }
341 : }
342 : } else {
343 0 : for (i = 0; i < end_col - c; ++i) {
344 0 : const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
345 : unsigned int sad =
346 0 : fn_ptr->sdf(what->buf, what->stride,
347 : get_buf_from_mv(in_what, &mv), in_what->stride);
348 0 : if (sad < best_sad) {
349 0 : sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
350 0 : if (sad < best_sad) {
351 0 : best_sad = sad;
352 0 : x->second_best_mv.as_mv = *best_mv;
353 0 : *best_mv = mv;
354 : }
355 : }
356 : }
357 : }
358 : }
359 : }
360 : }
361 :
362 0 : return best_sad;
363 : }
364 :
365 0 : int eb_av1_diamond_search_sad_c(IntraBcContext *x, const SearchSiteConfig *cfg,
366 : MV *ref_mv, MV *best_mv, int search_param,
367 : int sad_per_bit, int *num00,
368 : const aom_variance_fn_ptr_t *fn_ptr,
369 : const MV *center_mv) {
370 : int i, j, step;
371 :
372 0 : uint8_t *what = x->plane[0].src.buf;
373 0 : const int what_stride = x->plane[0].src.stride;
374 : const uint8_t *in_what;
375 0 : const int in_what_stride = x->xdplane[0].pre[0].stride;
376 : const uint8_t *best_address;
377 :
378 0 : unsigned int bestsad = INT_MAX;
379 0 : int best_site = 0;
380 0 : int last_site = 0;
381 :
382 : int ref_row;
383 : int ref_col;
384 :
385 : // search_param determines the length of the initial step and hence the number
386 : // of iterations.
387 : // 0 = initial step (MAX_FIRST_STEP) pel
388 : // 1 = (MAX_FIRST_STEP/2) pel,
389 : // 2 = (MAX_FIRST_STEP/4) pel...
390 0 : const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
391 0 : const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
392 :
393 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
394 0 : clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
395 : x->mv_limits.row_min, x->mv_limits.row_max);
396 0 : ref_row = ref_mv->row;
397 0 : ref_col = ref_mv->col;
398 0 : *num00 = 0;
399 0 : best_mv->row = ref_row;
400 0 : best_mv->col = ref_col;
401 :
402 : // Work out the start point for the search
403 0 : in_what = x->xdplane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
404 0 : best_address = in_what;
405 :
406 : // Check the starting position
407 0 : bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
408 0 : mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
409 :
410 0 : i = 1;
411 :
412 0 : for (step = 0; step < tot_steps; step++) {
413 0 : int all_in = 1, t;
414 :
415 : // All_in is true if every one of the points we are checking are within
416 : // the bounds of the image.
417 0 : all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_limits.row_min);
418 0 : all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_limits.row_max);
419 0 : all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_limits.col_min);
420 0 : all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_limits.col_max);
421 :
422 : // If all the pixels are within the bounds we don't check whether the
423 : // search point is valid in this loop, otherwise we check each point
424 : // for validity..
425 0 : if (all_in) {
426 : unsigned int sad_array[4];
427 :
428 0 : for (j = 0; j < cfg->searches_per_step; j += 4) {
429 : unsigned char const *block_offset[4];
430 :
431 0 : for (t = 0; t < 4; t++)
432 0 : block_offset[t] = ss[i + t].offset + best_address;
433 :
434 0 : fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
435 : sad_array);
436 :
437 0 : for (t = 0; t < 4; t++, i++) {
438 0 : if (sad_array[t] < bestsad) {
439 0 : const MV this_mv = { best_mv->row + ss[i].mv.row,
440 0 : best_mv->col + ss[i].mv.col };
441 0 : sad_array[t] +=
442 0 : mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
443 0 : if (sad_array[t] < bestsad) {
444 0 : bestsad = sad_array[t];
445 0 : best_site = i;
446 : }
447 : }
448 : }
449 : }
450 : } else {
451 0 : for (j = 0; j < cfg->searches_per_step; j++) {
452 : // Trap illegal vectors
453 0 : const MV this_mv = { best_mv->row + ss[i].mv.row,
454 0 : best_mv->col + ss[i].mv.col };
455 :
456 0 : if (is_mv_in(&x->mv_limits, &this_mv)) {
457 0 : const uint8_t *const check_here = ss[i].offset + best_address;
458 : unsigned int thissad =
459 0 : fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
460 :
461 0 : if (thissad < bestsad) {
462 0 : thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
463 0 : if (thissad < bestsad) {
464 0 : bestsad = thissad;
465 0 : best_site = i;
466 : }
467 : }
468 : }
469 0 : i++;
470 : }
471 : }
472 0 : if (best_site != last_site) {
473 0 : x->second_best_mv.as_mv = *best_mv;
474 0 : best_mv->row += ss[best_site].mv.row;
475 0 : best_mv->col += ss[best_site].mv.col;
476 0 : best_address += ss[best_site].offset;
477 0 : last_site = best_site;
478 : #if defined(NEW_DIAMOND_SEARCH)
479 : while (1) {
480 : const MV this_mv = { best_mv->row + ss[best_site].mv.row,
481 : best_mv->col + ss[best_site].mv.col };
482 : if (is_mv_in(&x->mv_limits, &this_mv)) {
483 : const uint8_t *const check_here = ss[best_site].offset + best_address;
484 : unsigned int thissad =
485 : fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
486 : if (thissad < bestsad) {
487 : thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
488 : if (thissad < bestsad) {
489 : bestsad = thissad;
490 : best_mv->row += ss[best_site].mv.row;
491 : best_mv->col += ss[best_site].mv.col;
492 : best_address += ss[best_site].offset;
493 : continue;
494 : }
495 : }
496 : }
497 : break;
498 : }
499 : #endif
500 0 : } else if (best_address == in_what)
501 0 : (*num00)++;
502 : }
503 0 : return bestsad;
504 : }
505 :
506 : /* do_refine: If last step (1-away) of n-step search doesn't pick the center
507 : point as the best match, we will do a final 1-away diamond
508 : refining search */
509 0 : static int full_pixel_diamond(PictureControlSet *pcs, IntraBcContext /*MACROBLOCK*/ *x,
510 : MV *mvp_full, int step_param, int sadpb,
511 : int further_steps, int do_refine, int *cost_list,
512 : const aom_variance_fn_ptr_t *fn_ptr,
513 : const MV *ref_mv) {
514 : MV temp_mv;
515 0 : int thissme, n, num00 = 0;
516 : (void)cost_list;
517 : /*int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
518 : step_param, sadpb, &n, fn_ptr, ref_mv);*/
519 0 : int bestsme = eb_av1_diamond_search_sad_c(x, &pcs->ss_cfg, mvp_full, &temp_mv,
520 : step_param, sadpb, &n, fn_ptr, ref_mv);
521 :
522 0 : if (bestsme < INT_MAX)
523 0 : bestsme = eb_av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
524 0 : x->best_mv.as_mv = temp_mv;
525 :
526 : // If there won't be more n-step search, check to see if refining search is
527 : // needed.
528 0 : if (n > further_steps) do_refine = 0;
529 :
530 0 : while (n < further_steps) {
531 0 : ++n;
532 :
533 0 : if (num00) {
534 0 : num00--;
535 : } else {
536 : /*thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
537 : step_param + n, sadpb, &num00, fn_ptr,
538 : ref_mv);*/
539 0 : thissme = eb_av1_diamond_search_sad_c(x, &pcs->ss_cfg, mvp_full, &temp_mv,
540 : step_param + n, sadpb, &num00, fn_ptr,
541 : ref_mv);
542 :
543 0 : if (thissme < INT_MAX)
544 0 : thissme = eb_av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
545 :
546 : // check to see if refining search is needed.
547 0 : if (num00 > further_steps - n) do_refine = 0;
548 :
549 0 : if (thissme < bestsme) {
550 0 : bestsme = thissme;
551 0 : x->best_mv.as_mv = temp_mv;
552 : }
553 : }
554 : }
555 :
556 : // final 1-away diamond refining search
557 0 : if (do_refine) {
558 0 : const int search_range = 8;
559 0 : MV best_mv = x->best_mv.as_mv;
560 0 : thissme = eb_av1_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr,
561 : ref_mv);
562 0 : if (thissme < INT_MAX)
563 0 : thissme = eb_av1_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
564 0 : if (thissme < bestsme) {
565 0 : bestsme = thissme;
566 0 : x->best_mv.as_mv = best_mv;
567 : }
568 : }
569 :
570 : // Return cost list.
571 : /* if (cost_list) {
572 : calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, &x->best_mv.as_mv, cost_list);
573 : }*/
574 0 : return bestsme;
575 : }
576 :
577 : #define MIN_RANGE 7
578 : #define MAX_RANGE 256
579 : #define MIN_INTERVAL 1
580 : // Runs an limited range exhaustive mesh search using a pattern set
581 : // according to the encode speed profile.
582 0 : static int full_pixel_exhaustive(PictureControlSet *pcs, IntraBcContext *x,
583 : const MV *centre_mv_full, int sadpb,
584 : int *cost_list,
585 : const aom_variance_fn_ptr_t *fn_ptr,
586 : const MV *ref_mv, MV *dst_mv) {
587 : UNUSED(cost_list);
588 0 : const SpeedFeatures *const sf = &pcs->sf;// cpi->sf;
589 0 : MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
590 0 : MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
591 : int bestsme;
592 : int i;
593 0 : int interval = sf->mesh_patterns[0].interval;
594 0 : int range = sf->mesh_patterns[0].range;
595 : int baseline_interval_divisor;
596 :
597 : // Keep track of number of exhaustive calls (this frame in this thread).
598 : //CHKN if (x->ex_search_count_ptr != NULL) ++(*x->ex_search_count_ptr);
599 :
600 : // Trap illegal values for interval and range for this function.
601 0 : if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
602 : (interval > range))
603 0 : return INT_MAX;
604 :
605 0 : baseline_interval_divisor = range / interval;
606 :
607 : // Check size of proposed first range against magnitude of the centre
608 : // value used as a starting point.
609 0 : range = AOMMAX(range, (5 * AOMMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
610 0 : range = AOMMIN(range, MAX_RANGE);
611 0 : interval = AOMMAX(interval, range / baseline_interval_divisor);
612 :
613 : // initial search
614 0 : bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval,
615 : sadpb, fn_ptr, &temp_mv);
616 :
617 0 : if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
618 : // Progressive searches with range and step size decreasing each time
619 : // till we reach a step size of 1. Then break out.
620 0 : for (i = 1; i < MAX_MESH_STEP; ++i) {
621 : // First pass with coarser step and longer range
622 0 : bestsme = exhuastive_mesh_search(
623 : x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
624 : sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
625 :
626 0 : if (sf->mesh_patterns[i].interval == 1) break;
627 : }
628 : }
629 :
630 0 : if (bestsme < INT_MAX)
631 0 : bestsme = eb_av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
632 0 : *dst_mv = temp_mv;
633 :
634 : // Return cost list.
635 : /* if (cost_list) {
636 : calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
637 : }*/
638 0 : return bestsme;
639 : }
640 :
641 0 : int eb_av1_refining_search_sad(IntraBcContext *x, MV *ref_mv, int error_per_bit,
642 : int search_range,
643 : const aom_variance_fn_ptr_t *fn_ptr,
644 : const MV *center_mv) {
645 0 : const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
646 0 : const struct Buf2D *const what = &x->plane[0].src;
647 0 : const struct Buf2D *const in_what = &x->xdplane[0].pre[0];
648 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
649 0 : const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
650 0 : unsigned int best_sad =
651 0 : fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
652 0 : mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
653 : int i, j;
654 :
655 0 : for (i = 0; i < search_range; i++) {
656 0 : int best_site = -1;
657 0 : const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) &
658 0 : ((ref_mv->row + 1) < x->mv_limits.row_max) &
659 0 : ((ref_mv->col - 1) > x->mv_limits.col_min) &
660 0 : ((ref_mv->col + 1) < x->mv_limits.col_max);
661 :
662 0 : if (all_in) {
663 : unsigned int sads[4];
664 0 : const uint8_t *const positions[4] = { best_address - in_what->stride,
665 0 : best_address - 1, best_address + 1,
666 0 : best_address + in_what->stride };
667 :
668 0 : fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
669 :
670 0 : for (j = 0; j < 4; ++j) {
671 0 : if (sads[j] < best_sad) {
672 0 : const MV mv = { ref_mv->row + neighbors[j].row,
673 0 : ref_mv->col + neighbors[j].col };
674 0 : sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
675 0 : if (sads[j] < best_sad) {
676 0 : best_sad = sads[j];
677 0 : best_site = j;
678 : }
679 : }
680 : }
681 : } else {
682 0 : for (j = 0; j < 4; ++j) {
683 0 : const MV mv = { ref_mv->row + neighbors[j].row,
684 0 : ref_mv->col + neighbors[j].col };
685 :
686 0 : if (is_mv_in(&x->mv_limits, &mv)) {
687 : unsigned int sad =
688 0 : fn_ptr->sdf(what->buf, what->stride,
689 : get_buf_from_mv(in_what, &mv), in_what->stride);
690 0 : if (sad < best_sad) {
691 0 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
692 0 : if (sad < best_sad) {
693 0 : best_sad = sad;
694 0 : best_site = j;
695 : }
696 : }
697 : }
698 : }
699 : }
700 :
701 0 : if (best_site == -1) {
702 0 : break;
703 : } else {
704 0 : x->second_best_mv.as_mv = *ref_mv;
705 0 : ref_mv->row += neighbors[best_site].row;
706 0 : ref_mv->col += neighbors[best_site].col;
707 0 : best_address = get_buf_from_mv(in_what, ref_mv);
708 : }
709 : }
710 :
711 0 : return best_sad;
712 : }
713 : #if OBMC_FLAG
714 1781130 : static int get_obmc_mvpred_var(const IntraBcContext *x, const int32_t *wsrc,
715 : const int32_t *mask, const MV *best_mv,
716 : const MV *center_mv,
717 : const aom_variance_fn_ptr_t *vfp, int use_mvcost,
718 : int is_second) {
719 :
720 1781130 : const struct Buf2d * in_what = (const struct Buf2d *) (&x->xdplane[0].pre[is_second]);
721 1781130 : const MV mv = { best_mv->row * 8, best_mv->col * 8 };
722 : unsigned int unused;
723 :
724 1781130 : return vfp->ovf(get_buf_from_mv((const struct Buf2D *)in_what, best_mv), in_what->stride, wsrc,
725 1781160 : mask, &unused) +
726 1781170 : (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmv_vec_cost,
727 : x->mv_cost_stack, x->errorperbit)
728 3562330 : : 0);
729 : }
730 1781110 : static int obmc_refining_search_sad(const IntraBcContext *x, const int32_t *wsrc,
731 : const int32_t *mask, MV *ref_mv,
732 : int error_per_bit, int search_range,
733 : const aom_variance_fn_ptr_t *fn_ptr,
734 : const MV *center_mv, int is_second) {
735 1781110 : const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
736 :
737 1781110 : const struct Buf2d *in_what = (const struct Buf2d *)(&x->xdplane[0].pre[is_second]);
738 1781110 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
739 1781110 : unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv((const struct Buf2D *)in_what, ref_mv),
740 : in_what->stride, wsrc, mask) +
741 1781170 : mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
742 : int i, j;
743 :
744 3660000 : for (i = 0; i < search_range; i++) {
745 3652260 : int best_site = -1;
746 :
747 18257300 : for (j = 0; j < 4; j++) {
748 14605100 : const MV mv = { ref_mv->row + neighbors[j].row,
749 14605100 : ref_mv->col + neighbors[j].col };
750 14605100 : if (is_mv_in(&x->mv_limits, &mv)) {
751 14585000 : unsigned int sad = fn_ptr->osdf(get_buf_from_mv((const struct Buf2D *)in_what, &mv),
752 : in_what->stride, wsrc, mask);
753 14584600 : if (sad < best_sad) {
754 7177370 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
755 7177380 : if (sad < best_sad) {
756 2235740 : best_sad = sad;
757 2235740 : best_site = j;
758 : }
759 : }
760 : }
761 : }
762 :
763 3652260 : if (best_site == -1) {
764 1773380 : break;
765 : } else {
766 1878880 : ref_mv->row += neighbors[best_site].row;
767 1878880 : ref_mv->col += neighbors[best_site].col;
768 : }
769 : }
770 1781130 : return best_sad;
771 : }
772 :
773 :
774 1781130 : int av1_obmc_full_pixel_search(
775 : ModeDecisionContext *context_ptr,
776 : IntraBcContext *x,
777 : MV *mvp_full,
778 : int sadpb,
779 : const aom_variance_fn_ptr_t *fn_ptr,
780 : const MV *ref_mv,
781 : MV *dst_mv,
782 : int is_second) {
783 :
784 : // obmc_full_pixel_diamond does not provide BDR gain on 360p
785 1781130 : const int32_t *wsrc = context_ptr->wsrc_buf;
786 1781130 : const int32_t *mask = context_ptr->mask_buf;
787 1781130 : const int search_range = 8;
788 1781130 : *dst_mv = *mvp_full;
789 1781130 : clamp_mv(dst_mv, x->mv_limits.col_min, x->mv_limits.col_max,
790 : x->mv_limits.row_min, x->mv_limits.row_max);
791 1781140 : int thissme = obmc_refining_search_sad(
792 : x, wsrc, mask, dst_mv, sadpb, search_range, fn_ptr, ref_mv, is_second);
793 1781130 : if (thissme < INT_MAX)
794 1781130 : thissme = get_obmc_mvpred_var(x, wsrc, mask, dst_mv, ref_mv, fn_ptr, 1, is_second);
795 :
796 1781150 : return thissme;
797 :
798 : }
799 :
800 1781180 : static INLINE void set_subpel_mv_search_range(const MvLimits *mv_limits,
801 : int *col_min, int *col_max,
802 : int *row_min, int *row_max,
803 : const MV *ref_mv) {
804 1781180 : const int max_mv = MAX_FULL_PEL_VAL * 8;
805 1781180 : const int minc = AOMMAX(mv_limits->col_min * 8, ref_mv->col - max_mv);
806 1781180 : const int maxc = AOMMIN(mv_limits->col_max * 8, ref_mv->col + max_mv);
807 1781180 : const int minr = AOMMAX(mv_limits->row_min * 8, ref_mv->row - max_mv);
808 1781180 : const int maxr = AOMMIN(mv_limits->row_max * 8, ref_mv->row + max_mv);
809 :
810 1781180 : *col_min = AOMMAX(MV_LOW + 1, minc);
811 1781180 : *col_max = AOMMIN(MV_UPP - 1, maxc);
812 1781180 : *row_min = AOMMAX(MV_LOW + 1, minr);
813 1781180 : *row_max = AOMMIN(MV_UPP - 1, maxr);
814 1781180 : }
815 : static const MV search_step_table[12] = {
816 : // left, right, up, down
817 : { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
818 : { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
819 : { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
820 : };
821 :
822 :
823 0 : static unsigned int setup_obmc_center_error(
824 : const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit,
825 : const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc,
826 : const uint8_t *const y, int y_stride, int offset, int *mvjcost,
827 : int *mvcost[2], unsigned int *sse1, int *distortion) {
828 : unsigned int besterr;
829 0 : besterr = vfp->ovf(y + offset, y_stride, wsrc, mask, sse1);
830 0 : *distortion = besterr;
831 0 : besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
832 0 : return besterr;
833 : }
834 :
835 :
836 : /* checks if (r, c) has better score than previous best */
837 : #define MVC(r, c) \
838 : (unsigned int)(mvcost \
839 : ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
840 : mvcost[0][((r)-rr)] + (int64_t)mvcost[1][((c)-rc)]) * \
841 : error_per_bit + \
842 : 4096) >> \
843 : 13 \
844 : : 0)
845 :
846 : /* returns subpixel variance error function */
847 : #define DIST(r, c) \
848 : vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse)
849 : #define CHECK_BETTER(v, r, c) \
850 : if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
851 : thismse = (DIST(r, c)); \
852 : if ((v = MVC(r, c) + thismse) < besterr) { \
853 : besterr = v; \
854 : br = r; \
855 : bc = c; \
856 : *distortion = thismse; \
857 : *sse1 = sse; \
858 : } \
859 : } else { \
860 : v = INT_MAX; \
861 : }
862 : #define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
863 :
864 :
865 : #define CHECK_BETTER1(v, r, c) \
866 : if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
867 : MV this_mv = { r, c }; \
868 : thismse = upsampled_obmc_pref_error(xd, cm, mi_row, mi_col, &this_mv, \
869 : mask, vfp, z, pre(y, y_stride, r, c), \
870 : y_stride, sp(c), sp(r), w, h, &sse, \
871 : use_accurate_subpel_search); \
872 : v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
873 : if ((v + thismse) < besterr) { \
874 : besterr = v + thismse; \
875 : br = r; \
876 : bc = c; \
877 : *distortion = thismse; \
878 : *sse1 = sse; \
879 : } \
880 : } else { \
881 : v = INT_MAX; \
882 : }
883 :
884 : // TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
885 : // SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
886 : // later in the same way.
887 : #define SECOND_LEVEL_CHECKS_BEST(k) \
888 : { \
889 : unsigned int second; \
890 : int br0 = br; \
891 : int bc0 = bc; \
892 : assert(tr == br || tc == bc); \
893 : if (tr == br && tc != bc) { \
894 : kc = bc - tc; \
895 : } else if (tr != br && tc == bc) { \
896 : kr = br - tr; \
897 : } \
898 : CHECK_BETTER##k(second, br0 + kr, bc0); \
899 : CHECK_BETTER##k(second, br0, bc0 + kc); \
900 : if (br0 != br || bc0 != bc) { \
901 : CHECK_BETTER##k(second, br0 + kr, bc0 + kc); \
902 : } \
903 : }
904 :
905 36563600 : static int upsampled_obmc_pref_error(
906 : MacroBlockD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
907 : const MV *const mv, const int32_t *mask, const aom_variance_fn_ptr_t *vfp,
908 : const int32_t *const wsrc, const uint8_t *const y, int y_stride,
909 : int subpel_x_q3, int subpel_y_q3, int w, int h, unsigned int *sse,
910 : int subpel_search) {
911 : unsigned int besterr;
912 :
913 : DECLARE_ALIGNED(16, uint8_t, pred[2 * MAX_SB_SQUARE]);
914 : #if CONFIG_AV1_HIGHBITDEPTH
915 : if (is_cur_buf_hbd(xd)) {
916 : uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred);
917 : aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred8, w, h,
918 : subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd,
919 : subpel_search);
920 : besterr = vfp->ovf(pred8, w, wsrc, mask, sse);
921 : } else {
922 : aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3,
923 : subpel_y_q3, y, y_stride, subpel_search);
924 :
925 : besterr = vfp->ovf(pred, w, wsrc, mask, sse);
926 : }
927 : #else
928 36563600 : aom_upsampled_pred(xd, (const struct AV1Common *const)cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3,
929 : subpel_y_q3, y, y_stride, subpel_search);
930 :
931 36577500 : besterr = vfp->ovf(pred, w, wsrc, mask, sse);
932 : #endif
933 36583300 : return besterr;
934 : }
935 1781170 : static unsigned int upsampled_setup_obmc_center_error(
936 : MacroBlockD *xd, const Av1Common *const cm, int mi_row, int mi_col,
937 : const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit,
938 : const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc,
939 : const uint8_t *const y, int y_stride, int w, int h, int offset,
940 : int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion,
941 : int subpel_search) {
942 1781170 : unsigned int besterr = upsampled_obmc_pref_error(
943 : xd, cm, mi_row, mi_col, bestmv, mask, vfp, wsrc, y + offset, y_stride, 0,
944 : 0, w, h, sse1, subpel_search);
945 1781170 : *distortion = besterr;
946 1781170 : besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
947 1781140 : return besterr;
948 : }
949 :
950 :
951 : // convert motion vector component to offset for sv[a]f calc
952 69558600 : static INLINE int sp(int x) { return x & 7; }
953 34780200 : static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
954 34780200 : const int offset = (r >> 3) * stride + (c >> 3);
955 34780200 : return buf + offset;
956 : }
957 :
958 1781160 : int av1_find_best_obmc_sub_pixel_tree_up(
959 : ModeDecisionContext *context_ptr,IntraBcContext *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
960 : MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit,
961 : const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
962 : int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
963 : int is_second, int use_accurate_subpel_search) {
964 1781160 : const int32_t *wsrc = context_ptr->wsrc_buf;
965 1781160 : const int32_t *mask = context_ptr->mask_buf;
966 1781160 : const int *const z = wsrc;
967 1781160 : const int *const src_address = z;
968 1781160 : MacroBlockD *xd = x->xd;
969 1781160 : struct MacroBlockDPlane *const pd = &x->xdplane[0];
970 1781160 : unsigned int besterr = INT_MAX;
971 : unsigned int sse;
972 : unsigned int thismse;
973 :
974 1781160 : int rr = ref_mv->row;
975 1781160 : int rc = ref_mv->col;
976 1781160 : int br = bestmv->row * 8;
977 1781160 : int bc = bestmv->col * 8;
978 1781160 : int hstep = 4;
979 : int iter;
980 1781160 : int round = 3 - forced_stop;
981 1781160 : int tr = br;
982 1781160 : int tc = bc;
983 1781160 : const MV *search_step = search_step_table;
984 1781160 : int idx, best_idx = -1;
985 : unsigned int cost_array[5];
986 : int kr, kc;
987 1781160 : const int w = block_size_wide[context_ptr->blk_geom->bsize];
988 1781160 : const int h = block_size_high[context_ptr->blk_geom->bsize];
989 : int offset;
990 : int y_stride;
991 : const uint8_t *y;
992 :
993 : int minc, maxc, minr, maxr;
994 :
995 1781160 : set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, ref_mv);
996 :
997 1781180 : y = pd->pre[is_second].buf;
998 1781180 : y_stride = pd->pre[is_second].stride;
999 1781180 : offset = bestmv->row * y_stride + bestmv->col;
1000 :
1001 1781180 : if (!allow_hp)
1002 0 : if (round == 3) round = 2;
1003 :
1004 1781180 : bestmv->row *= 8;
1005 1781180 : bestmv->col *= 8;
1006 : // use_accurate_subpel_search can be 0 or 1 or 2
1007 1781180 : if (use_accurate_subpel_search)
1008 1781170 : besterr = upsampled_setup_obmc_center_error(
1009 : xd, cm, mi_row, mi_col, mask, bestmv, ref_mv, error_per_bit, vfp, z, y,
1010 : y_stride, w, h, offset, mvjcost, mvcost, sse1, distortion,
1011 : use_accurate_subpel_search);
1012 : else
1013 3 : besterr = setup_obmc_center_error(mask, bestmv, ref_mv, error_per_bit, vfp,
1014 : z, y, y_stride, offset, mvjcost, mvcost,
1015 : sse1, distortion);
1016 :
1017 7125600 : for (iter = 0; iter < round; ++iter) {
1018 : // Check vertical and horizontal sub-pixel positions.
1019 26706800 : for (idx = 0; idx < 4; ++idx) {
1020 21362200 : tr = br + search_step[idx].row;
1021 21362200 : tc = bc + search_step[idx].col;
1022 42712500 : if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
1023 21348500 : MV this_mv = { tr, tc };
1024 21348500 : if (use_accurate_subpel_search) {
1025 21348300 : thismse = upsampled_obmc_pref_error(
1026 : xd, cm, mi_row, mi_col, &this_mv, mask, vfp, src_address,
1027 : pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse,
1028 : use_accurate_subpel_search);
1029 : } else {
1030 209 : thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc),
1031 : sp(tr), src_address, mask, &sse);
1032 : }
1033 :
1034 21352500 : cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
1035 : mvcost, error_per_bit);
1036 21350300 : if (cost_array[idx] < besterr) {
1037 4615760 : best_idx = idx;
1038 4615760 : besterr = cost_array[idx];
1039 4615760 : *distortion = thismse;
1040 4615760 : *sse1 = sse;
1041 : }
1042 : } else {
1043 13662 : cost_array[idx] = INT_MAX;
1044 : }
1045 : }
1046 :
1047 : // Check diagonal sub-pixel position
1048 5344610 : kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
1049 5344610 : kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
1050 :
1051 5344610 : tc = bc + kc;
1052 5344610 : tr = br + kr;
1053 10687600 : if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
1054 5343050 : MV this_mv = { tr, tc };
1055 :
1056 5343050 : if (use_accurate_subpel_search) {
1057 5343000 : thismse = upsampled_obmc_pref_error(
1058 : xd, cm, mi_row, mi_col, &this_mv, mask, vfp, src_address,
1059 : pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse,
1060 : use_accurate_subpel_search);
1061 : } else {
1062 49 : thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr),
1063 : src_address, mask, &sse);
1064 : }
1065 :
1066 5343120 : cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
1067 : error_per_bit);
1068 :
1069 5342960 : if (cost_array[4] < besterr) {
1070 1282480 : best_idx = 4;
1071 1282480 : besterr = cost_array[4];
1072 1282480 : *distortion = thismse;
1073 1282480 : *sse1 = sse;
1074 : }
1075 : } else {
1076 1557 : cost_array[idx] = INT_MAX;
1077 : }
1078 :
1079 5344520 : if (best_idx < 4 && best_idx >= 0) {
1080 2577570 : br += search_step[best_idx].row;
1081 2577570 : bc += search_step[best_idx].col;
1082 2766940 : } else if (best_idx == 4) {
1083 1282480 : br = tr;
1084 1282480 : bc = tc;
1085 : }
1086 :
1087 5344520 : if (iters_per_step > 1 && best_idx != -1) {
1088 3859900 : if (use_accurate_subpel_search) {
1089 3859900 : SECOND_LEVEL_CHECKS_BEST(1);
1090 : } else {
1091 0 : SECOND_LEVEL_CHECKS_BEST(0);
1092 : }
1093 : }
1094 :
1095 5344470 : tr = br;
1096 5344470 : tc = bc;
1097 :
1098 5344470 : search_step += 4;
1099 5344470 : hstep >>= 1;
1100 5344470 : best_idx = -1;
1101 : }
1102 :
1103 : // These lines insure static analysis doesn't warn that
1104 : // tr and tc aren't used after the above point.
1105 : (void)tr;
1106 : (void)tc;
1107 :
1108 1782750 : bestmv->row = br;
1109 1782750 : bestmv->col = bc;
1110 :
1111 1782750 : return besterr;
1112 : }
1113 :
1114 :
1115 : #endif
1116 0 : int eb_av1_full_pixel_search(PictureControlSet *pcs, IntraBcContext *x, BlockSize bsize,
1117 : MV *mvp_full, int step_param, int method,
1118 : int run_mesh_search, int error_per_bit,
1119 : int *cost_list, const MV *ref_mv, int var_max, int rd,
1120 : int x_pos, int y_pos, int intra) {
1121 : UNUSED (run_mesh_search);
1122 : UNUSED (var_max);
1123 : UNUSED (rd);
1124 :
1125 0 : int32_t ibc_shift = 0;
1126 0 : if (pcs->parent_pcs_ptr->ibc_mode > 0)
1127 0 : ibc_shift = 1;
1128 :
1129 0 : SpeedFeatures * sf = &pcs->sf;
1130 0 : sf->exhaustive_searches_thresh = (1 << 25);
1131 0 : const aom_variance_fn_ptr_t *fn_ptr = &mefn_ptr[bsize];
1132 0 : int var = 0;
1133 :
1134 0 : if (cost_list) {
1135 0 : cost_list[0] = INT_MAX;
1136 0 : cost_list[1] = INT_MAX;
1137 0 : cost_list[2] = INT_MAX;
1138 0 : cost_list[3] = INT_MAX;
1139 0 : cost_list[4] = INT_MAX;
1140 : }
1141 :
1142 : // Keep track of number of searches (this frame in this thread).
1143 : //if (x->m_search_count_ptr != NULL) ++(*x->m_search_count_ptr);
1144 :
1145 0 : switch (method) {
1146 0 : case FAST_DIAMOND:
1147 : //var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
1148 : // cost_list, fn_ptr, 1, ref_mv);
1149 0 : break;
1150 0 : case FAST_HEX:
1151 : //var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
1152 : // cost_list, fn_ptr, 1, ref_mv);
1153 0 : break;
1154 0 : case HEX:
1155 : //var = av1_hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
1156 : // fn_ptr, 1, ref_mv);
1157 0 : break;
1158 0 : case SQUARE:
1159 : //var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
1160 : // fn_ptr, 1, ref_mv);
1161 0 : break;
1162 0 : case BIGDIA:
1163 : //var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
1164 : // fn_ptr, 1, ref_mv);
1165 0 : break;
1166 0 : case NSTEP:
1167 0 : var = full_pixel_diamond(pcs, x, mvp_full, step_param, error_per_bit,
1168 : MAX_MVSEARCH_STEPS - 1 - step_param, 1,
1169 : cost_list, fn_ptr, ref_mv);
1170 :
1171 0 : if (x->is_exhaustive_allowed)
1172 : {
1173 0 : int exhuastive_thr = sf->exhaustive_searches_thresh;
1174 0 : exhuastive_thr >>=
1175 0 : 10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]);
1176 :
1177 0 : exhuastive_thr = exhuastive_thr << ibc_shift;
1178 :
1179 0 : if (var > exhuastive_thr)
1180 : {
1181 : int var_ex;
1182 : MV tmp_mv_ex;
1183 : var_ex =
1184 0 : full_pixel_exhaustive(pcs, x, &x->best_mv.as_mv, error_per_bit,
1185 : cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
1186 :
1187 0 : if (var_ex < var) {
1188 0 : var = var_ex;
1189 0 : x->best_mv.as_mv = tmp_mv_ex;
1190 : }
1191 : }
1192 : }
1193 0 : break;
1194 0 : default: assert(0 && "Invalid search method.");
1195 : }
1196 :
1197 : do {
1198 : //CHKN if (!intra || !av1_use_hash_me(&cpi->common)) break;
1199 :
1200 : // already single ME
1201 : // get block size and original buffer of current block
1202 0 : const int block_height = block_size_high[bsize];
1203 0 : const int block_width = block_size_wide[bsize];
1204 0 : if (block_height == block_width && x_pos >= 0 && y_pos >= 0) {
1205 0 : if (block_width == 4 || block_width == 8 || block_width == 16 ||
1206 0 : block_width == 32 || block_width == 64 || block_width == 128) {
1207 0 : uint8_t *what = x->plane[0].src.buf;
1208 0 : const int what_stride = x->plane[0].src.stride;
1209 : uint32_t hash_value1, hash_value2;
1210 : MV best_hash_mv;
1211 0 : int best_hash_cost = INT_MAX;
1212 :
1213 : // for the hashMap
1214 0 : HashTable *ref_frame_hash = &pcs->hash_table;
1215 :
1216 0 : av1_get_block_hash_value(what, what_stride, block_width, &hash_value1,
1217 : &hash_value2, 0, pcs, x);
1218 :
1219 0 : const int count = av1_hash_table_count(ref_frame_hash, hash_value1);
1220 : // for intra, at least one matching can be found, itself.
1221 0 : if (count <= (intra ? 1 : 0))
1222 0 : break;
1223 : Iterator iterator =
1224 0 : av1_hash_get_first_iterator(ref_frame_hash, hash_value1);
1225 0 : for (int i = 0; i < count; i++, iterator_increment(&iterator)) {
1226 0 : block_hash ref_block_hash = *(block_hash *)(iterator_get(&iterator));
1227 0 : if (hash_value2 == ref_block_hash.hash_value2) {
1228 : // For intra, make sure the prediction is from valid area.
1229 0 : if (intra) {
1230 0 : const int mi_col = x_pos / MI_SIZE;
1231 0 : const int mi_row = y_pos / MI_SIZE;
1232 0 : const MV dv = { 8 * (ref_block_hash.y - y_pos),
1233 0 : 8 * (ref_block_hash.x - x_pos) };
1234 0 : if (!av1_is_dv_valid(dv, x->xd, mi_row, mi_col,
1235 0 : bsize, pcs->parent_pcs_ptr->sequence_control_set_ptr->seq_header.sb_size_log2))
1236 0 : continue;
1237 : }
1238 : MV hash_mv;
1239 0 : hash_mv.col = ref_block_hash.x - x_pos;
1240 0 : hash_mv.row = ref_block_hash.y - y_pos;
1241 0 : if (!is_mv_in(&x->mv_limits, &hash_mv)) continue;
1242 : const int refCost =
1243 0 : eb_av1_get_mvpred_var(x, &hash_mv, ref_mv, fn_ptr, 1);
1244 0 : if (refCost < best_hash_cost) {
1245 0 : best_hash_cost = refCost;
1246 0 : best_hash_mv = hash_mv;
1247 : }
1248 : }
1249 : }
1250 :
1251 0 : if (best_hash_cost < var) {
1252 0 : x->second_best_mv = x->best_mv;
1253 0 : x->best_mv.as_mv = best_hash_mv;
1254 0 : var = best_hash_cost;
1255 : }
1256 : }
1257 : }
1258 : } while (0);
1259 :
1260 0 : return 0;//CHKN var;
1261 : }
|