Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include <assert.h>
13 : #include <math.h>
14 : #include "EbDefinitions.h"
15 : #include "EbPictureBufferDesc.h"
16 : #include "EbPsnr.h"
17 : #include "EbPictureControlSet.h"
18 : #include "aom_dsp_rtcd.h"
19 : #include "EbRestoration.h"
20 : #include "EbRestorationPick.h"
21 :
22 : #include "EbRestProcess.h"
23 :
24 : void av1_foreach_rest_unit_in_frame_seg(Av1Common *cm, int32_t plane,
25 : RestTileStartVisitor on_tile,
26 : RestUnitVisitor on_rest_unit,
27 : void *priv,
28 : PictureControlSet *picture_control_set_ptr,
29 : uint32_t segment_index);
30 :
31 : void eb_av1_selfguided_restoration_c(const uint8_t *dgd8, int32_t width, int32_t height,
32 : int32_t dgd_stride, int32_t *flt0, int32_t *flt1,
33 : int32_t flt_stride, int32_t sgr_params_idx,
34 : int32_t bit_depth, int32_t highbd);
35 : void av1_foreach_rest_unit_in_frame(Av1Common *cm, int32_t plane,
36 : RestTileStartVisitor on_tile,
37 : RestUnitVisitor on_rest_unit,
38 : void *priv);
39 :
40 : // When set to RESTORE_WIENER or RESTORE_SGRPROJ only those are allowed.
41 : // When set to RESTORE_TYPES we allow switchable.
42 : //static const RestorationType force_restore_type = RESTORE_TYPES;
43 :
44 : // Number of Wiener iterations
45 : #define NUM_WIENER_ITERS 5
46 : // Working precision for Wiener filter coefficients
47 : #define WIENER_TAP_SCALE_FACTOR ((int64_t)1 << 16)
48 :
49 : typedef int64_t(*sse_part_extractor_type)(const Yv12BufferConfig *a,
50 : const Yv12BufferConfig *b,
51 : int32_t hstart, int32_t width, int32_t vstart,
52 : int32_t height);
53 :
54 : #define NUM_EXTRACTORS (3 * (1 + 1))
55 :
56 : static const sse_part_extractor_type sse_part_extractors[NUM_EXTRACTORS] = {
57 : eb_aom_get_y_sse_part, eb_aom_get_u_sse_part,
58 : eb_aom_get_v_sse_part, eb_aom_highbd_get_y_sse_part,
59 : eb_aom_highbd_get_u_sse_part, eb_aom_highbd_get_v_sse_part,
60 : };
61 9645 : static int64_t sse_restoration_unit(const RestorationTileLimits *limits,
62 : const Yv12BufferConfig *src,
63 : const Yv12BufferConfig *dst, int32_t plane,
64 : int32_t highbd) {
65 19290 : return sse_part_extractors[3 * highbd + plane](
66 9645 : src, dst, limits->h_start, limits->h_end - limits->h_start,
67 9645 : limits->v_start, limits->v_end - limits->v_start);
68 : }
69 :
70 : typedef struct {
71 : const Yv12BufferConfig *src;
72 : Yv12BufferConfig *dst;
73 :
74 : Av1Common *cm;
75 : const Macroblock *x;
76 : int32_t plane;
77 : int32_t plane_width;
78 : int32_t plane_height;
79 : RestUnitSearchInfo *rusi;
80 : RestUnitSearchInfo *rusi_pic;
81 : uint32_t pic_num;
82 : Yv12BufferConfig * org_frame_to_show;
83 : int32_t *tmpbuf;
84 :
85 : uint8_t *dgd_buffer;
86 : int32_t dgd_stride;
87 : const uint8_t *src_buffer;
88 : int32_t src_stride;
89 :
90 : // sse and bits are initialised by reset_rsc in search_rest_type
91 : int64_t sse;
92 : int64_t bits;
93 : int32_t tile_y0, tile_stripe0;
94 :
95 : // sgrproj and wiener are initialised by rsc_on_tile when starting the first
96 : // tile in the frame.
97 : SgrprojInfo sgrproj;
98 : WienerInfo wiener;
99 : } RestSearchCtxt;
100 :
101 1140 : static void rsc_on_tile(int32_t tile_row, int32_t tile_col, void *priv) {
102 : (void)tile_col;
103 :
104 1140 : RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
105 1140 : set_default_sgrproj(&rsc->sgrproj);
106 1140 : set_default_wiener(&rsc->wiener);
107 :
108 1140 : rsc->tile_stripe0 =
109 1140 : (tile_row == 0) ? 0 : rsc->cm->rst_end_stripe[tile_row - 1];
110 1140 : }
111 :
112 600 : static void reset_rsc(RestSearchCtxt *rsc) {
113 600 : rsc->sse = 0;
114 600 : rsc->bits = 0;
115 600 : }
116 180 : static void init_rsc_seg(
117 : Yv12BufferConfig *org_fts,
118 : const Yv12BufferConfig *src, Av1Common *cm,
119 : const Macroblock *x, int32_t plane, RestUnitSearchInfo *rusi,
120 : Yv12BufferConfig *dst, RestSearchCtxt *rsc) {
121 180 : rsc->src = src;
122 180 : rsc->dst = dst;
123 180 : rsc->cm = cm;
124 180 : rsc->x = x;
125 180 : rsc->plane = plane;
126 180 : rsc->rusi = rusi;
127 :
128 180 : rsc->org_frame_to_show = org_fts;
129 :
130 180 : const Yv12BufferConfig *dgd = org_fts;
131 180 : const int32_t is_uv = plane != AOM_PLANE_Y;
132 180 : rsc->plane_width = src->crop_widths[is_uv];
133 180 : rsc->plane_height = src->crop_heights[is_uv];
134 180 : rsc->src_buffer = src->buffers[plane];
135 180 : rsc->src_stride = src->strides[is_uv];
136 180 : rsc->dgd_buffer = dgd->buffers[plane];
137 180 : rsc->dgd_stride = dgd->strides[is_uv];
138 : assert(src->crop_widths[is_uv] == dgd->crop_widths[is_uv]);
139 : assert(src->crop_heights[is_uv] == dgd->crop_heights[is_uv]);
140 180 : }
141 :
142 0 : static void init_rsc(const Yv12BufferConfig *src, Av1Common *cm,
143 : const Macroblock *x, int32_t plane, RestUnitSearchInfo *rusi,
144 : Yv12BufferConfig *dst, RestSearchCtxt *rsc) {
145 0 : rsc->src = src;
146 0 : rsc->dst = dst;
147 0 : rsc->cm = cm;
148 0 : rsc->x = x;
149 0 : rsc->plane = plane;
150 0 : rsc->rusi = rusi;
151 :
152 0 : const Yv12BufferConfig *dgd = cm->frame_to_show;
153 0 : const int32_t is_uv = plane != AOM_PLANE_Y;
154 0 : rsc->plane_width = src->crop_widths[is_uv];
155 0 : rsc->plane_height = src->crop_heights[is_uv];
156 0 : rsc->src_buffer = src->buffers[plane];
157 0 : rsc->src_stride = src->strides[is_uv];
158 0 : rsc->dgd_buffer = dgd->buffers[plane];
159 0 : rsc->dgd_stride = dgd->strides[is_uv];
160 : assert(src->crop_widths[is_uv] == dgd->crop_widths[is_uv]);
161 : assert(src->crop_heights[is_uv] == dgd->crop_heights[is_uv]);
162 0 : }
163 :
164 0 : static int64_t try_restoration_unit(const RestSearchCtxt *rsc,
165 : const RestorationTileLimits *limits,
166 : const AV1PixelRect *tile_rect,
167 : const RestorationUnitInfo *rui) {
168 0 : const Av1Common *const cm = rsc->cm;
169 0 : const int32_t plane = rsc->plane;
170 0 : const int32_t is_uv = plane > 0;
171 0 : const RestorationInfo *rsi = &cm->rst_info[plane];
172 : RestorationLineBuffers rlbs;
173 0 : const int32_t bit_depth = cm->bit_depth;
174 0 : const int32_t highbd = cm->use_highbitdepth;
175 :
176 0 : const Yv12BufferConfig *fts = cm->frame_to_show;
177 : // TODO(yunqing): For now, only use optimized LR filter in decoder. Can be
178 : // also used in encoder.
179 0 : const int32_t optimized_lr = 0;
180 :
181 0 : eb_av1_loop_restoration_filter_unit(
182 : 1,
183 : limits, rui, &rsi->boundaries, &rlbs, tile_rect, rsc->tile_stripe0,
184 0 : is_uv && cm->subsampling_x, is_uv && cm->subsampling_y, highbd, bit_depth,
185 0 : fts->buffers[plane], fts->strides[is_uv], rsc->dst->buffers[plane],
186 0 : rsc->dst->strides[is_uv], cm->rst_tmpbuf, optimized_lr);
187 :
188 0 : return sse_restoration_unit(limits, rsc->src, rsc->dst, plane, highbd);
189 : }
190 9345 : static int64_t try_restoration_unit_seg(const RestSearchCtxt *rsc,
191 : const RestorationTileLimits *limits,
192 : const AV1PixelRect *tile_rect,
193 : const RestorationUnitInfo *rui) {
194 9345 : const Av1Common *const cm = rsc->cm;
195 9345 : const int32_t plane = rsc->plane;
196 9345 : const int32_t is_uv = plane > 0;
197 9345 : const RestorationInfo *rsi = &cm->rst_info[plane];
198 : RestorationLineBuffers rlbs;
199 9345 : const int32_t bit_depth = cm->bit_depth;
200 9345 : const int32_t highbd = cm->use_highbitdepth;
201 :
202 9345 : const Yv12BufferConfig *fts = rsc->org_frame_to_show;
203 :
204 : // TODO(yunqing): For now, only use optimized LR filter in decoder. Can be
205 : // also used in encoder.
206 9345 : const int32_t optimized_lr = 0;
207 :
208 12326 : eb_av1_loop_restoration_filter_unit(
209 : 1,
210 : limits, rui, &rsi->boundaries, &rlbs, tile_rect, rsc->tile_stripe0,
211 5962 : is_uv && cm->subsampling_x, is_uv && cm->subsampling_y, highbd, bit_depth,
212 9345 : fts->buffers[plane], fts->strides[is_uv], rsc->dst->buffers[plane],
213 9345 : rsc->dst->strides[is_uv], rsc->tmpbuf, optimized_lr);
214 :
215 9345 : return sse_restoration_unit(limits, rsc->src, rsc->dst, plane, highbd);
216 : }
217 :
218 0 : int64_t eb_av1_lowbd_pixel_proj_error_c(const uint8_t *src8, int32_t width, int32_t height,
219 : int32_t src_stride, const uint8_t *dat8,
220 : int32_t dat_stride, int32_t *flt0,
221 : int32_t flt0_stride, int32_t *flt1,
222 : int32_t flt1_stride, int32_t xq[2],
223 : const SgrParamsType *params) {
224 : int32_t i, j;
225 0 : const uint8_t *src = src8;
226 0 : const uint8_t *dat = dat8;
227 0 : int64_t err = 0;
228 0 : if (params->r[0] > 0 && params->r[1] > 0) {
229 0 : for (i = 0; i < height; ++i) {
230 0 : for (j = 0; j < width; ++j) {
231 : assert(flt1[j] < (1 << 15) && flt1[j] > -(1 << 15));
232 : assert(flt0[j] < (1 << 15) && flt0[j] > -(1 << 15));
233 0 : const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS);
234 0 : int32_t v = u << SGRPROJ_PRJ_BITS;
235 0 : v += xq[0] * (flt0[j] - u) + xq[1] * (flt1[j] - u);
236 0 : const int32_t e =
237 0 : ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j];
238 0 : err += e * e;
239 : }
240 0 : dat += dat_stride;
241 0 : src += src_stride;
242 0 : flt0 += flt0_stride;
243 0 : flt1 += flt1_stride;
244 : }
245 : }
246 0 : else if (params->r[0] > 0) {
247 0 : for (i = 0; i < height; ++i) {
248 0 : for (j = 0; j < width; ++j) {
249 : assert(flt0[j] < (1 << 15) && flt0[j] > -(1 << 15));
250 0 : const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS);
251 0 : int32_t v = u << SGRPROJ_PRJ_BITS;
252 0 : v += xq[0] * (flt0[j] - u);
253 0 : const int32_t e =
254 0 : ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j];
255 0 : err += e * e;
256 : }
257 0 : dat += dat_stride;
258 0 : src += src_stride;
259 0 : flt0 += flt0_stride;
260 : }
261 : }
262 0 : else if (params->r[1] > 0) {
263 0 : for (i = 0; i < height; ++i) {
264 0 : for (j = 0; j < width; ++j) {
265 : assert(flt1[j] < (1 << 15) && flt1[j] > -(1 << 15));
266 0 : const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS);
267 0 : int32_t v = u << SGRPROJ_PRJ_BITS;
268 0 : v += xq[1] * (flt1[j] - u);
269 0 : const int32_t e =
270 0 : ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j];
271 0 : err += e * e;
272 : }
273 0 : dat += dat_stride;
274 0 : src += src_stride;
275 0 : flt1 += flt1_stride;
276 : }
277 : }
278 : else {
279 0 : for (i = 0; i < height; ++i) {
280 0 : for (j = 0; j < width; ++j) {
281 0 : const int32_t e = (int32_t)(dat[j]) - src[j];
282 0 : err += e * e;
283 : }
284 0 : dat += dat_stride;
285 0 : src += src_stride;
286 : }
287 : }
288 :
289 0 : return err;
290 : }
291 :
292 0 : int64_t eb_av1_highbd_pixel_proj_error_c(const uint8_t *src8, int32_t width,
293 : int32_t height, int32_t src_stride,
294 : const uint8_t *dat8, int32_t dat_stride,
295 : int32_t *flt0, int32_t flt0_stride,
296 : int32_t *flt1, int32_t flt1_stride, int32_t xq[2],
297 : const SgrParamsType *params) {
298 0 : const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
299 0 : const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
300 : int32_t i, j;
301 0 : int64_t err = 0;
302 0 : const int32_t half = 1 << (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS - 1);
303 0 : if (params->r[0] > 0 && params->r[1] > 0) {
304 0 : int32_t xq0 = xq[0];
305 0 : int32_t xq1 = xq[1];
306 0 : for (i = 0; i < height; ++i) {
307 0 : for (j = 0; j < width; ++j) {
308 0 : const int32_t d = dat[j];
309 0 : const int32_t s = src[j];
310 0 : const int32_t u = (int32_t)(d << SGRPROJ_RST_BITS);
311 0 : int32_t v0 = flt0[j] - u;
312 0 : int32_t v1 = flt1[j] - u;
313 0 : int32_t v = half;
314 0 : v += xq0 * v0;
315 0 : v += xq1 * v1;
316 0 : const int32_t e = (v >> (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS)) + d - s;
317 0 : err += e * e;
318 : }
319 0 : dat += dat_stride;
320 0 : flt0 += flt0_stride;
321 0 : flt1 += flt1_stride;
322 0 : src += src_stride;
323 : }
324 : }
325 0 : else if (params->r[0] > 0 || params->r[1] > 0) {
326 : int32_t exq;
327 : int32_t *flt;
328 : int32_t flt_stride;
329 0 : if (params->r[0] > 0) {
330 0 : exq = xq[0];
331 0 : flt = flt0;
332 0 : flt_stride = flt0_stride;
333 : }
334 : else {
335 0 : exq = xq[1];
336 0 : flt = flt1;
337 0 : flt_stride = flt1_stride;
338 : }
339 0 : for (i = 0; i < height; ++i) {
340 0 : for (j = 0; j < width; ++j) {
341 0 : const int32_t d = dat[j];
342 0 : const int32_t s = src[j];
343 0 : const int32_t u = (int32_t)(d << SGRPROJ_RST_BITS);
344 0 : int32_t v = half;
345 0 : v += exq * (flt[j] - u);
346 0 : const int32_t e = (v >> (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS)) + d - s;
347 0 : err += e * e;
348 : }
349 0 : dat += dat_stride;
350 0 : flt += flt_stride;
351 0 : src += src_stride;
352 : }
353 : }
354 : else {
355 0 : for (i = 0; i < height; ++i) {
356 0 : for (j = 0; j < width; ++j) {
357 0 : const int32_t d = dat[j];
358 0 : const int32_t s = src[j];
359 0 : const int32_t e = d - s;
360 0 : err += e * e;
361 : }
362 0 : dat += dat_stride;
363 0 : src += src_stride;
364 : }
365 : }
366 0 : return err;
367 : }
368 :
369 36589 : static int64_t get_pixel_proj_error(const uint8_t *src8, int32_t width, int32_t height,
370 : int32_t src_stride, const uint8_t *dat8,
371 : int32_t dat_stride, int32_t use_highbitdepth,
372 : int32_t *flt0, int32_t flt0_stride,
373 : int32_t *flt1, int32_t flt1_stride, int32_t *xqd,
374 : const SgrParamsType *params) {
375 : int32_t xq[2];
376 36589 : eb_decode_xq(xqd, xq, params);
377 36589 : if (!use_highbitdepth) {
378 36589 : return eb_av1_lowbd_pixel_proj_error(src8, width, height, src_stride, dat8,
379 : dat_stride, flt0, flt0_stride, flt1,
380 : flt1_stride, xq, params);
381 : }
382 : else {
383 0 : return eb_av1_highbd_pixel_proj_error(src8, width, height, src_stride, dat8,
384 : dat_stride, flt0, flt0_stride, flt1,
385 : flt1_stride, xq, params);
386 : }
387 : }
388 :
389 : #define USE_SGRPROJ_REFINEMENT_SEARCH 1
390 4800 : static int64_t finer_search_pixel_proj_error(
391 : const uint8_t *src8, int32_t width, int32_t height, int32_t src_stride,
392 : const uint8_t *dat8, int32_t dat_stride, int32_t use_highbitdepth, int32_t *flt0,
393 : int32_t flt0_stride, int32_t *flt1, int32_t flt1_stride, int32_t start_step, int32_t *xqd,
394 : const SgrParamsType *params) {
395 4800 : int64_t err = get_pixel_proj_error(
396 : src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth, flt0,
397 : flt0_stride, flt1, flt1_stride, xqd, params);
398 : (void)start_step;
399 : #if USE_SGRPROJ_REFINEMENT_SEARCH
400 : int64_t err2;
401 4800 : int32_t tap_min[] = { SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MIN1 };
402 4800 : int32_t tap_max[] = { SGRPROJ_PRJ_MAX0, SGRPROJ_PRJ_MAX1 };
403 14400 : for (int32_t s = start_step; s >= 1; s >>= 1) {
404 23073 : for (int32_t p = 0; p < 2; ++p) {
405 16930 : if ((params->r[0] == 0 && p == 0) || (params->r[1] == 0 && p == 1))
406 3082 : continue;
407 13848 : int32_t skip = 0;
408 : do {
409 20159 : if (xqd[p] - s >= tap_min[p]) {
410 17519 : xqd[p] -= s;
411 : err2 =
412 17519 : get_pixel_proj_error(src8, width, height, src_stride, dat8,
413 : dat_stride, use_highbitdepth, flt0,
414 : flt0_stride, flt1, flt1_stride, xqd, params);
415 17519 : if (err2 > err)
416 9840 : xqd[p] += s;
417 : else {
418 7679 : err = err2;
419 7679 : skip = 1;
420 : // At the highest step size continue moving in the same direction
421 7679 : if (s == start_step) continue;
422 : }
423 : }
424 13848 : break;
425 : } while (1);
426 13848 : if (skip) break;
427 : do {
428 16607 : if (xqd[p] + s <= tap_max[p]) {
429 14270 : xqd[p] += s;
430 : err2 =
431 14270 : get_pixel_proj_error(src8, width, height, src_stride, dat8,
432 : dat_stride, use_highbitdepth, flt0,
433 : flt0_stride, flt1, flt1_stride, xqd, params);
434 14270 : if (err2 > err)
435 6928 : xqd[p] -= s;
436 : else {
437 7342 : err = err2;
438 : // At the highest step size continue moving in the same direction
439 7342 : if (s == start_step) continue;
440 : }
441 : }
442 10391 : break;
443 : } while (1);
444 : }
445 : }
446 : #endif // USE_SGRPROJ_REFINEMENT_SEARCH
447 4800 : return err;
448 : }
449 :
450 : extern void RunEmms();
451 :
452 0 : void get_proj_subspace_c(const uint8_t *src8, int32_t width, int32_t height,
453 : int32_t src_stride, const uint8_t *dat8,
454 : int32_t dat_stride, int32_t use_highbitdepth,
455 : int32_t *flt0, int32_t flt0_stride, int32_t *flt1,
456 : int32_t flt1_stride, int32_t *xq,
457 : const SgrParamsType *params) {
458 : int32_t i, j;
459 0 : double H[2][2] = { { 0, 0 }, { 0, 0 } };
460 0 : double C[2] = { 0, 0 };
461 : double Det;
462 : double x[2];
463 0 : const int32_t size = width * height;
464 :
465 0 : aom_clear_system_state();
466 0 : RunEmms();
467 :
468 : // Default
469 0 : xq[0] = 0;
470 0 : xq[1] = 0;
471 0 : if (!use_highbitdepth) {
472 0 : const uint8_t *src = src8;
473 0 : const uint8_t *dat = dat8;
474 0 : for (i = 0; i < height; ++i) {
475 0 : for (j = 0; j < width; ++j) {
476 0 : const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
477 0 : const double s =
478 0 : (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
479 0 : const double f1 =
480 0 : (params->r[0] > 0) ? (double)flt0[i * flt0_stride + j] - u : 0;
481 0 : const double f2 =
482 0 : (params->r[1] > 0) ? (double)flt1[i * flt1_stride + j] - u : 0;
483 0 : H[0][0] += f1 * f1;
484 0 : H[1][1] += f2 * f2;
485 0 : H[0][1] += f1 * f2;
486 0 : C[0] += f1 * s;
487 0 : C[1] += f2 * s;
488 : }
489 : }
490 : }
491 : else {
492 0 : const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
493 0 : const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
494 0 : for (i = 0; i < height; ++i) {
495 0 : for (j = 0; j < width; ++j) {
496 0 : const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
497 0 : const double s =
498 0 : (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
499 0 : const double f1 =
500 0 : (params->r[0] > 0) ? (double)flt0[i * flt0_stride + j] - u : 0;
501 0 : const double f2 =
502 0 : (params->r[1] > 0) ? (double)flt1[i * flt1_stride + j] - u : 0;
503 0 : H[0][0] += f1 * f1;
504 0 : H[1][1] += f2 * f2;
505 0 : H[0][1] += f1 * f2;
506 0 : C[0] += f1 * s;
507 0 : C[1] += f2 * s;
508 : }
509 : }
510 : }
511 0 : H[0][0] /= size;
512 0 : H[0][1] /= size;
513 0 : H[1][1] /= size;
514 0 : H[1][0] = H[0][1];
515 0 : C[0] /= size;
516 0 : C[1] /= size;
517 0 : if (params->r[0] == 0) {
518 : // H matrix is now only the scalar H[1][1]
519 : // C vector is now only the scalar C[1]
520 0 : Det = H[1][1];
521 0 : if (Det < 1e-8) return; // ill-posed, return default values
522 0 : x[0] = 0;
523 0 : x[1] = C[1] / Det;
524 :
525 0 : xq[0] = 0;
526 0 : xq[1] = (int32_t)rint(x[1] * (1 << SGRPROJ_PRJ_BITS));
527 : }
528 0 : else if (params->r[1] == 0) {
529 : // H matrix is now only the scalar H[0][0]
530 : // C vector is now only the scalar C[0]
531 0 : Det = H[0][0];
532 0 : if (Det < 1e-8) return; // ill-posed, return default values
533 0 : x[0] = C[0] / Det;
534 0 : x[1] = 0;
535 :
536 0 : xq[0] = (int32_t)rint(x[0] * (1 << SGRPROJ_PRJ_BITS));
537 0 : xq[1] = 0;
538 : }
539 : else {
540 0 : Det = (H[0][0] * H[1][1] - H[0][1] * H[1][0]);
541 0 : if (Det < 1e-8) return; // ill-posed, return default values
542 0 : x[0] = (H[1][1] * C[0] - H[0][1] * C[1]) / Det;
543 0 : x[1] = (H[0][0] * C[1] - H[1][0] * C[0]) / Det;
544 :
545 0 : xq[0] = (int32_t)rint(x[0] * (1 << SGRPROJ_PRJ_BITS));
546 0 : xq[1] = (int32_t)rint(x[1] * (1 << SGRPROJ_PRJ_BITS));
547 : }
548 : }
549 :
550 4800 : static INLINE void encode_xq(int32_t *xq, int32_t *xqd,
551 : const SgrParamsType *params)
552 : {
553 4800 : if (params->r[0] == 0) {
554 1200 : xqd[0] = 0;
555 1200 : xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - xq[1], SGRPROJ_PRJ_MIN1,
556 : SGRPROJ_PRJ_MAX1);
557 : }
558 3600 : else if (params->r[1] == 0) {
559 600 : xqd[0] = clamp(xq[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0);
560 600 : xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - xqd[0], SGRPROJ_PRJ_MIN1,
561 : SGRPROJ_PRJ_MAX1);
562 : }
563 : else {
564 3000 : xqd[0] = clamp(xq[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0);
565 3000 : xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - xqd[0] - xq[1], SGRPROJ_PRJ_MIN1,
566 : SGRPROJ_PRJ_MAX1);
567 : }
568 4800 : }
569 :
570 : // Apply the self-guided filter across an entire restoration unit.
571 4800 : static INLINE void apply_sgr(int32_t sgr_params_idx, const uint8_t *dat8,
572 : int32_t width, int32_t height, int32_t dat_stride, int32_t use_highbd,
573 : int32_t bit_depth, int32_t pu_width, int32_t pu_height, int32_t *flt0,
574 : int32_t *flt1, int32_t flt_stride)
575 : {
576 33600 : for (int32_t i = 0; i < height; i += pu_height)
577 : {
578 28800 : const int32_t h = AOMMIN(pu_height, height - i);
579 28800 : int32_t *flt0_row = flt0 + i * flt_stride;
580 28800 : int32_t *flt1_row = flt1 + i * flt_stride;
581 28800 : const uint8_t *dat8_row = dat8 + i * dat_stride;
582 :
583 : // Iterate over the stripe in blocks of width pu_width
584 201599 : for (int32_t j = 0; j < width; j += pu_width) {
585 172799 : const int32_t w = AOMMIN(pu_width, width - j);
586 :
587 : //CHKN SSE
588 172799 : eb_av1_selfguided_restoration(dat8_row + j, w, h, dat_stride, flt0_row + j,
589 172799 : flt1_row + j, flt_stride, sgr_params_idx,
590 : bit_depth, use_highbd);
591 : }
592 : }
593 4800 : }
594 :
595 300 : static SgrprojInfo search_selfguided_restoration(
596 : const uint8_t *dat8, int32_t width, int32_t height, int32_t dat_stride,
597 : const uint8_t *src8, int32_t src_stride, int32_t use_highbitdepth, int32_t bit_depth,
598 : int32_t pu_width, int32_t pu_height, int32_t *rstbuf
599 : ,
600 : int8_t sg_ref_frame_ep[2],
601 : int32_t sg_frame_ep_cnt[SGRPROJ_PARAMS],
602 : int8_t step
603 : )
604 : {
605 300 : int32_t *flt0 = rstbuf;
606 300 : int32_t *flt1 = flt0 + RESTORATION_UNITPELS_MAX;
607 300 : int32_t ep, bestep = 0;
608 300 : int64_t besterr = -1;
609 300 : int32_t exqd[2], bestxqd[2] = { 0, 0 };
610 300 : int32_t flt_stride = ((width + 7) & ~7) + 8;
611 : assert(pu_width == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
612 : pu_width == RESTORATION_PROC_UNIT_SIZE);
613 : assert(pu_height == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
614 : pu_height == RESTORATION_PROC_UNIT_SIZE);
615 590 : int8_t mid_ep = sg_ref_frame_ep[0] < 0 && sg_ref_frame_ep[1] < 0 ? 0 :
616 290 : sg_ref_frame_ep[1] < 0 ? sg_ref_frame_ep[0] :
617 290 : sg_ref_frame_ep[0] < 0 ? sg_ref_frame_ep[1] :
618 290 : (sg_ref_frame_ep[0] + sg_ref_frame_ep[1]) / 2;
619 :
620 300 : int8_t start_ep = sg_ref_frame_ep[0] < 0 && sg_ref_frame_ep[1] < 0 ? 0 : AOMMAX(0, mid_ep - step);
621 300 : int8_t end_ep = sg_ref_frame_ep[0] < 0 && sg_ref_frame_ep[1] < 0 ? SGRPROJ_PARAMS : AOMMIN(SGRPROJ_PARAMS, mid_ep + step);
622 :
623 5100 : for (ep = start_ep; ep < end_ep; ep++) {
624 : int32_t exq[2];
625 4800 : apply_sgr(ep, dat8, width, height, dat_stride, use_highbitdepth, bit_depth,
626 : pu_width, pu_height, flt0, flt1, flt_stride);
627 4800 : aom_clear_system_state();
628 4800 : const SgrParamsType *const params = &eb_sgr_params[ep];
629 4800 : get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride,
630 : use_highbitdepth, flt0, flt_stride, flt1, flt_stride, exq,
631 : params);
632 4800 : aom_clear_system_state();
633 4800 : encode_xq(exq, exqd, params);
634 4800 : int64_t err = finer_search_pixel_proj_error(
635 : src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth,
636 : flt0, flt_stride, flt1, flt_stride, 2, exqd, params);
637 4800 : if (besterr == -1 || err < besterr) {
638 1273 : bestep = ep;
639 1273 : besterr = err;
640 1273 : bestxqd[0] = exqd[0];
641 1273 : bestxqd[1] = exqd[1];
642 : }
643 : }
644 300 : sg_frame_ep_cnt[bestep]++;
645 :
646 : SgrprojInfo ret;
647 300 : ret.ep = bestep;
648 300 : ret.xqd[0] = bestxqd[0];
649 300 : ret.xqd[1] = bestxqd[1];
650 300 : return ret;
651 : }
652 : extern int32_t eb_aom_count_primitive_refsubexpfin(uint16_t n, uint16_t k, uint16_t ref, uint16_t v);
653 :
654 414 : static int32_t count_sgrproj_bits(SgrprojInfo *sgrproj_info,
655 : SgrprojInfo *ref_sgrproj_info) {
656 414 : int32_t bits = SGRPROJ_PARAMS_BITS;
657 414 : const SgrParamsType *params = &eb_sgr_params[sgrproj_info->ep];
658 414 : if (params->r[0] > 0)
659 411 : bits += eb_aom_count_primitive_refsubexpfin(
660 : SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
661 411 : (uint16_t)(ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0),
662 411 : (uint16_t)(sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0));
663 414 : if (params->r[1] > 0)
664 324 : bits += eb_aom_count_primitive_refsubexpfin(
665 : SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
666 324 : (uint16_t)(ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1),
667 324 : (uint16_t)(sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1));
668 414 : return bits;
669 : }
670 :
671 300 : int8_t get_sg_step(int8_t sg_filter_mode) {
672 : int8_t step;
673 300 : switch (sg_filter_mode) {
674 0 : case 1:
675 0 : step = 0;
676 0 : break;
677 0 : case 2:
678 0 : step = 1;
679 0 : break;
680 0 : case 3:
681 0 : step = 4;
682 0 : break;
683 300 : case 4:
684 300 : step = 16;
685 300 : break;
686 0 : default:
687 0 : step = 16;
688 0 : break;
689 : }
690 300 : return step;
691 : }
692 :
693 0 : static void search_sgrproj(const RestorationTileLimits *limits,
694 : const AV1PixelRect *tile, int32_t rest_unit_idx,
695 : void *priv) {
696 0 : RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
697 0 : RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
698 :
699 0 : const Macroblock *const x = rsc->x;
700 0 : Av1Common *const cm = rsc->cm;
701 :
702 0 : const int32_t highbd = cm->use_highbitdepth;
703 0 : const int32_t bit_depth = cm->bit_depth;
704 :
705 0 : uint8_t *dgd_start =
706 0 : rsc->dgd_buffer + limits->v_start * rsc->dgd_stride + limits->h_start;
707 0 : const uint8_t *src_start =
708 0 : rsc->src_buffer + limits->v_start * rsc->src_stride + limits->h_start;
709 :
710 0 : const int32_t is_uv = rsc->plane > 0;
711 0 : const int32_t ss_x = is_uv && cm->subsampling_x;
712 0 : const int32_t ss_y = is_uv && cm->subsampling_y;
713 0 : const int32_t procunit_width = RESTORATION_PROC_UNIT_SIZE >> ss_x;
714 0 : const int32_t procunit_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
715 :
716 0 : int8_t step = get_sg_step(cm->sg_filter_mode);
717 :
718 0 : rusi->sgrproj = search_selfguided_restoration(
719 0 : dgd_start, limits->h_end - limits->h_start,
720 0 : limits->v_end - limits->v_start, rsc->dgd_stride, src_start,
721 : rsc->src_stride, highbd, bit_depth, procunit_width, procunit_height,
722 : cm->rst_tmpbuf
723 0 : , cm->sg_ref_frame_ep,
724 0 : cm->sg_frame_ep_cnt,
725 : step
726 : );
727 :
728 : RestorationUnitInfo rui;
729 0 : rui.restoration_type = RESTORE_SGRPROJ;
730 0 : rui.sgrproj_info = rusi->sgrproj;
731 :
732 0 : rusi->sse[RESTORE_SGRPROJ] = try_restoration_unit(rsc, limits, tile, &rui);
733 :
734 0 : const int64_t bits_none = x->sgrproj_restore_cost[0];
735 0 : const int64_t bits_sgr = x->sgrproj_restore_cost[1] +
736 0 : (count_sgrproj_bits(&rusi->sgrproj, &rsc->sgrproj)
737 0 : << AV1_PROB_COST_SHIFT);
738 :
739 0 : double cost_none =
740 0 : RDCOST_DBL(x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE]);
741 0 : double cost_sgr =
742 0 : RDCOST_DBL(x->rdmult, bits_sgr >> 4, rusi->sse[RESTORE_SGRPROJ]);
743 :
744 0 : RestorationType rtype =
745 0 : (cost_sgr < cost_none) ? RESTORE_SGRPROJ : RESTORE_NONE;
746 0 : rusi->best_rtype[RESTORE_SGRPROJ - 1] = rtype;
747 :
748 0 : rsc->sse += rusi->sse[rtype];
749 0 : rsc->bits += (cost_sgr < cost_none) ? bits_sgr : bits_none;
750 0 : if (cost_sgr < cost_none) rsc->sgrproj = rusi->sgrproj;
751 0 : }
752 :
753 0 : void eb_av1_compute_stats_c(int32_t wiener_win, const uint8_t *dgd, const uint8_t *src,
754 : int32_t h_start, int32_t h_end, int32_t v_start, int32_t v_end,
755 : int32_t dgd_stride, int32_t src_stride, int64_t *M,
756 : int64_t *H) {
757 : int32_t i, j, k, l;
758 0 : int16_t Y[WIENER_WIN2] = { 0 };
759 0 : const int32_t wiener_win2 = wiener_win * wiener_win;
760 0 : const int32_t wiener_halfwin = (wiener_win >> 1);
761 0 : uint8_t avg = find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
762 :
763 0 : memset(M, 0, sizeof(*M) * wiener_win2);
764 0 : memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
765 0 : for (i = v_start; i < v_end; i++) {
766 0 : for (j = h_start; j < h_end; j++) {
767 0 : const int16_t X = (int16_t)src[i * src_stride + j] - (int16_t)avg;
768 0 : int32_t idx = 0;
769 0 : for (k = -wiener_halfwin; k <= wiener_halfwin; k++) {
770 0 : for (l = -wiener_halfwin; l <= wiener_halfwin; l++) {
771 0 : Y[idx] = (int16_t)dgd[(i + l) * dgd_stride + (j + k)] - (int16_t)avg;
772 0 : idx++;
773 : }
774 : }
775 : assert(idx == wiener_win2);
776 0 : for (k = 0; k < wiener_win2; ++k) {
777 0 : M[k] += (int32_t)Y[k] * X;
778 0 : for (l = k; l < wiener_win2; ++l) {
779 : // H is a symmetric matrix, so we only need to fill out the upper
780 : // triangle here. We can copy it down to the lower triangle outside
781 : // the (i, j) loops.
782 0 : H[k * wiener_win2 + l] += (int32_t)Y[k] * Y[l];
783 : }
784 : }
785 : }
786 : }
787 0 : for (k = 0; k < wiener_win2; ++k) {
788 0 : for (l = k + 1; l < wiener_win2; ++l)
789 0 : H[l * wiener_win2 + k] = H[k * wiener_win2 + l];
790 : }
791 0 : }
792 :
793 : #if 0
794 : // Note: Don't delete! This is the code base for optimizations, and 4.8x, 9x or
795 : // 16x faster than the above original C version for win 3, 5 and 7
796 : // respectively, while keeping the bit-exactness.
797 :
798 : // Demostration of TAP 3:
799 : // (Left Edge) (Left Edge)
800 : // \lj 00 10 20 01 11 21 02 12 22
801 : // ki-------------------------------------------------------------
802 : // 00| 00X00 00X10 00X20 | 00X01 00X11 00X21 | 00X02 00X12 00X22 |(Top Edge)
803 : // 10| 10x10 10x20 | 10X01 10x11 10x21 | 10X02 10x12 10x22 |(Top Row)
804 : // 20| 20x20 | 20X01 20x11 20x21 | 20X02 20x12 20x22 |
805 : // | --------------------|-------------------|
806 : // 01| 01x01 01x11 01x21 | 01x02 01x12 01x22 |(Top Edge)
807 : // 11| 11x11 11x21 | 11x02 11x12 11x22 |(Mid Rows)
808 : // 21| 21x21 | 21x02 21x12 21x22 |
809 : // | --------------------|
810 : // 02| 02x02 02x12 02x22 |(Top Edge)
811 : // 12| 12x12 12x22 |(Btm Row)
812 : // 22| 22x22 |
813 : // -------------------------------------------------------------
814 : // kiXlj means the multiply-accumulate must be calculated from scratch, and
815 : // starts from a[k * a_stride + i] * b[l * b_stride + j].
816 : // kixlj means the multiply-accumulate could be derived from a neighbor.
817 :
818 : static INLINE void sub_avg_block(const uint8_t *src, const int32_t src_stride,
819 : const uint8_t avg, const int32_t width,
820 : const int32_t height, int16_t *dst,
821 : const int32_t dst_stride) {
822 : for (int32_t i = 0; i < height; i++) {
823 : for (int32_t j = 0; j < width; j++)
824 : dst[i * dst_stride + j] = (int16_t)src[i * src_stride + j] - avg;
825 : }
826 : }
827 :
828 : void av1_compute_stats_base(int32_t wiener_win, const uint8_t *dgd,
829 : const uint8_t *src, int32_t h_start, int32_t h_end,
830 : int32_t v_start, int32_t v_end, int32_t dgd_stride,
831 : int32_t src_stride, int64_t *M, int64_t *H) {
832 : const int32_t wiener_win2 = wiener_win * wiener_win;
833 : const int32_t wiener_halfwin = wiener_win >> 1;
834 : const uint8_t avg =
835 : find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
836 : const int32_t width = h_end - h_start;
837 : const int32_t height = v_end - v_start;
838 : const int32_t d_stride = (width + 2 * wiener_halfwin + 15) & ~15;
839 : const int32_t s_stride = (width + 15) & ~15;
840 : // The maximum input size is width * height, which is
841 : // (9 / 4) * RESTORATION_UNITSIZE_MAX * RESTORATION_UNITSIZE_MAX. Enlarge to
842 : // 3 * RESTORATION_UNITSIZE_MAX * RESTORATION_UNITSIZE_MAX considering
843 : // paddings.
844 : int16_t d[3 * RESTORATION_UNITSIZE_MAX * RESTORATION_UNITSIZE_MAX];
845 : int16_t s[3 * RESTORATION_UNITSIZE_MAX * RESTORATION_UNITSIZE_MAX];
846 : int32_t i, j, k, l, x, y;
847 :
848 : sub_avg_block(src + v_start * src_stride + h_start,
849 : src_stride,
850 : avg,
851 : width,
852 : height,
853 : s,
854 : s_stride);
855 : sub_avg_block(dgd + (v_start - wiener_halfwin) * dgd_stride + h_start -
856 : wiener_halfwin,
857 : dgd_stride,
858 : avg,
859 : width + 2 * wiener_halfwin,
860 : height + 2 * wiener_halfwin,
861 : d,
862 : d_stride);
863 :
864 : // Step 1: Calculate the top edge of the whole matrix, i.e., the top edge of
865 : // each triangle and square on the top row.
866 : // Demostration of TAP 3:
867 : // (Left Edge) (Left Edge)
868 : // \lj 00 10 20 01 11 21 02 12 22
869 : // ki-------------------------------------------------------------
870 : // 00| 00X00 00X10 00X20 | 00X01 00X11 00X21 | 00X02 00X12 00X22 |(Top Edge)
871 : // 10| |(Top Row)
872 : // 20| |
873 : // | --------------------|-------------------|
874 : // 01| |(Top Edge)
875 : // 11| |(Mid Rows)
876 : // 21| |
877 : // | --------------------|
878 : // 02| |(Top Edge)
879 : // 12| |(Btm Row)
880 : // 22| |
881 : // -------------------------------------------------------------
882 : for (j = 0; j < wiener_win; j++) {
883 : for (l = 0; l < wiener_win; l++) {
884 : int64_t sumM = 0;
885 : int64_t sumH = 0;
886 :
887 : for (y = 0; y < height; y++) {
888 : for (x = 0; x < width; x++) {
889 : sumM += s[y * s_stride + x] * d[(l + y) * d_stride + j + x];
890 : sumH += d[y * d_stride + x] * d[(l + y) * d_stride + j + x];
891 : }
892 : }
893 :
894 : M[j * wiener_win + l] = sumM;
895 : H[j * wiener_win + l] = sumH;
896 : }
897 : }
898 :
899 : // Step 2: Calculate the left edge of each square on the top row.
900 : // Demostration of TAP 3:
901 : // (Left Edge) (Left Edge)
902 : // \lj 00 10 20 01 11 21 02 12 22
903 : // ki-------------------------------------------------------------
904 : // 00| |(Top Edge)
905 : // 10| | 10X01 | 10X02 |(Top Row)
906 : // 20| | 20X01 | 20X02 |
907 : // | --------------------|-------------------|
908 : // 01| |(Top Edge)
909 : // 11| |(Mid Rows)
910 : // 21| |
911 : // | --------------------|
912 : // 02| |(Top Edge)
913 : // 12| |(Btm Row)
914 : // 22| |
915 : // -------------------------------------------------------------
916 : for (j = 1; j < wiener_win; j++) {
917 : for (k = 1; k < wiener_win; k++) {
918 : int64_t sumH = 0;
919 :
920 : for (y = 0; y < height; y++) {
921 : for (x = 0; x < width; x++)
922 : sumH += d[(k + y) * d_stride + x] * d[y * d_stride + j + x];
923 : }
924 :
925 : H[k * wiener_win2 + j * wiener_win] = sumH;
926 : }
927 : }
928 :
929 : // Step 3: Derive the top edge of each triangle along the diagonal. No
930 : // triangle in top row.
931 : // Demostration of TAP 3:
932 : // (Left Edge) (Left Edge)
933 : // \lj 00 10 20 01 11 21 02 12 22
934 : // ki-------------------------------------------------------------
935 : // 00| |(Top Edge)
936 : // 10| |(Top Row)
937 : // 20| |
938 : // | --------------------|-------------------|
939 : // 01| 01x01 01x11 01x21 | |(Top Edge)
940 : // 11| |(Mid Rows)
941 : // 21| |
942 : // | --------------------|
943 : // 02| 02x02 02x12 02x22 |(Top Edge)
944 : // 12| |(Btm Row)
945 : // 22| |
946 : // -------------------------------------------------------------
947 : for (i = 1; i < wiener_win; i++) {
948 : for (l = 0; l < wiener_win; l++) {
949 : int32_t delta = 0;
950 :
951 : for (y = 0; y < height; y++) {
952 : delta -=
953 : d[y * d_stride + i - 1] * d[(l + y) * d_stride + i - 1];
954 : delta += d[y * d_stride + i - 1 + width] *
955 : d[(l + y) * d_stride + i - 1 + width];
956 : }
957 :
958 : H[i * wiener_win * wiener_win2 + i * wiener_win + l] =
959 : H[(i - 1) * wiener_win * wiener_win2 + (i - 1) * wiener_win +
960 : l] +
961 : delta;
962 : }
963 : }
964 :
965 : // Step 4: Derive the top and left edge of each square. No square in top and
966 : // bottom row. (There is only 1 square in this step for TAP 3, but there are
967 : // many for TAP 5 and 7.)
968 : // Demostration of TAP 3:
969 : // (Left Edge) (Left Edge)
970 : // \lj 00 10 20 01 11 21 02 12 22
971 : // ki-------------------------------------------------------------
972 : // 00| |(Top Edge)
973 : // 10| |(Top Row)
974 : // 20| |
975 : // | --------------------|-------------------|
976 : // 01| | 01x02 01x12 01x22 |(Top Edge)
977 : // 11| | 11x02 |(Mid Rows)
978 : // 21| | 21x02 |
979 : // | --------------------|
980 : // 02| |(Top Edge)
981 : // 12| |(Btm Row)
982 : // 22| |
983 : // -------------------------------------------------------------
984 : for (i = 1; i < wiener_win - 1; i++) {
985 : for (j = i + 1; j < wiener_win; j++) {
986 : for (l = 0; l < wiener_win; l++) {
987 : int32_t delta = 0;
988 :
989 : for (y = 0; y < height; y++) {
990 : delta -=
991 : d[y * d_stride + i - 1] * d[(l + y) * d_stride + j - 1];
992 : delta += d[y * d_stride + i - 1 + width] *
993 : d[(l + y) * d_stride + j - 1 + width];
994 : }
995 :
996 : H[i * wiener_win * wiener_win2 + j * wiener_win + l] =
997 : H[(i - 1) * wiener_win * wiener_win2 +
998 : (j - 1) * wiener_win + l] +
999 : delta;
1000 : }
1001 :
1002 : for (k = 1; k < wiener_win; k++) {
1003 : int32_t delta = 0;
1004 :
1005 : for (y = 0; y < height; y++) {
1006 : delta -=
1007 : d[(k + y) * d_stride + i - 1] * d[y * d_stride + j - 1];
1008 : delta += d[(k + y) * d_stride + i - 1 + width] *
1009 : d[y * d_stride + j - 1 + width];
1010 : }
1011 :
1012 : H[(i * wiener_win + k) * wiener_win2 + j * wiener_win] =
1013 : H[((i - 1) * wiener_win + k) * wiener_win2 +
1014 : (j - 1) * wiener_win] +
1015 : delta;
1016 : }
1017 : }
1018 : }
1019 :
1020 : // Step 5: Derive other points of each square. No square in bottom row.
1021 : // Demostration of TAP 3:
1022 : // (Left Edge) (Left Edge)
1023 : // \lj 00 10 20 01 11 21 02 12 22
1024 : // ki-------------------------------------------------------------
1025 : // 00| | | |(Top Edge)
1026 : // 10| | 10x11 10x21 | 10x12 10x22 |(Top Row)
1027 : // 20| | 20x11 20x21 | 20x12 20x22 |
1028 : // | --------------------|-------------------|
1029 : // 01| | |(Top Edge)
1030 : // 11| | 11x12 11x22 |(Mid Rows)
1031 : // 21| | 21x12 21x22 |
1032 : // | --------------------|
1033 : // 02| |(Top Edge)
1034 : // 12| |(Btm Row)
1035 : // 22| |
1036 : // -------------------------------------------------------------
1037 : for (i = 0; i < wiener_win - 1; i++) {
1038 : for (j = i + 1; j < wiener_win; j++) {
1039 : for (k = 1; k < wiener_win; k++) {
1040 : for (l = 1; l < wiener_win; l++) {
1041 : int32_t delta = 0;
1042 :
1043 : for (x = 0; x < width; x++) {
1044 : delta -= d[(k - 1) * d_stride + i + x] *
1045 : d[(l - 1) * d_stride + j + x];
1046 : delta += d[(k - 1 + height) * d_stride + i + x] *
1047 : d[(l - 1 + height) * d_stride + j + x];
1048 : }
1049 :
1050 : H[(i * wiener_win + k) * wiener_win2 + j * wiener_win + l] =
1051 : H[(i * wiener_win + k - 1) * wiener_win2 +
1052 : j * wiener_win + l - 1] +
1053 : delta;
1054 : }
1055 : }
1056 : }
1057 : }
1058 :
1059 : // Step 6: Derive other points of each upper triangle along the diagonal.
1060 : // Demostration of TAP 3:
1061 : // (Left Edge) (Left Edge)
1062 : // \lj 00 10 20 01 11 21 02 12 22
1063 : // ki-------------------------------------------------------------
1064 : // 00| | | |(Top Edge)
1065 : // 10| 10x10 10x20 | | |(Top Row)
1066 : // 20| 20x20 | | |
1067 : // | --------------------|-------------------|
1068 : // 01| | |(Top Edge)
1069 : // 11| 11x11 11x21 | |(Mid Rows)
1070 : // 21| 21x21 | |
1071 : // | --------------------|
1072 : // 02| |(Top Edge)
1073 : // 12| 12x12 12x22 |(Btm Row)
1074 : // 22| 22x22 |
1075 : // -------------------------------------------------------------
1076 : for (i = 0; i < wiener_win; i++) {
1077 : for (k = 1; k < wiener_win; k++) {
1078 : for (l = k; l < wiener_win; l++) {
1079 : int32_t delta = 0;
1080 :
1081 : for (x = 0; x < width; x++) {
1082 : delta -= d[(k - 1) * d_stride + i + x] *
1083 : d[(l - 1) * d_stride + i + x];
1084 : delta += d[(k - 1 + height) * d_stride + i + x] *
1085 : d[(l - 1 + height) * d_stride + i + x];
1086 : }
1087 :
1088 : H[(i * wiener_win + k) * wiener_win2 + i * wiener_win + l] =
1089 : H[(i * wiener_win + k - 1) * wiener_win2 + i * wiener_win +
1090 : l - 1] +
1091 : delta;
1092 : }
1093 : }
1094 : }
1095 :
1096 : // H is a symmetric matrix, so we only need to fill out the upper triangle.
1097 : // We can copy it down to the lower triangle outside the (i, j) loops.
1098 : // Divided into 4x4 squares to do load-and-transpose-and-store in
1099 : // optimization.
1100 : // Demostration of TAP 3:
1101 : // \lj 00 10 20 01 11 21 02 12 22
1102 : // ki-------------------------------------------------------------
1103 : // 00| |
1104 : // |------ |
1105 : // 10| 00X10 |
1106 : // 20| 00X20 10x20 |
1107 : // 01| 00X01 10X01 20X01 |
1108 : // 11| 00X11 10x11 20x11 01x11 |
1109 : // |-------------------------- |
1110 : // 21| 00X21 10x21 20x21 01x21 | 11x21 |
1111 : // 02| 00X02 10X02 20X02 01x02 | 11x02 21x02 |
1112 : // 12| 00X12 10x12 20x12 01x12 | 11x12 21x12 02x12 |
1113 : // 22| 00X22 10x22 20x22 01x22 | 11x22 21x22 02x22 12x22 | |
1114 : // -------------------------------------------------------------
1115 : for (i = 0; i < wiener_win2; i++) {
1116 : for (j = i + 1; j < wiener_win2; j++)
1117 : H[j * wiener_win2 + i] = H[i * wiener_win2 + j];
1118 : }
1119 :
1120 : // Finally we get the whole matrix.
1121 : // Demostration of TAP 3:
1122 : // \lj 00 10 20 01 11 21 02 12 22
1123 : // ki-------------------------------------------------------------
1124 : // 00| 00X00 00X10 00X20 | 00X01 00X11 00X21 | 00X02 00X12 00X22 |
1125 : // 10| 00X10 10x10 10x20 | 10X01 10x11 10x21 | 10X02 10x12 10x22 |
1126 : // 20| 00X20 10x20 20x20 | 20X01 20x11 20x21 | 20X02 20x12 20x22 |
1127 : // |-----------------------------------------------------------|
1128 : // 01| 00X01 10X01 20X01 | 01x01 01x11 01x21 | 01x02 01x12 01x22 |
1129 : // 11| 00X11 10x11 20x11 | 01x11 11x11 11x21 | 11x02 11x12 11x22 |
1130 : // 21| 00X21 10x21 20x21 | 01x21 11x21 21x21 | 21x02 21x12 21x22 |
1131 : // |-----------------------------------------------------------|
1132 : // 02| 00X02 10X02 20X02 | 01x02 11x02 21x02 | 02x02 02x12 02x22 |
1133 : // 12| 00X12 10x12 20x12 | 01x12 11x12 21x12 | 02x12 12x12 12x22 |
1134 : // 22| 00X22 10x22 20x22 | 01x22 11x22 21x22 | 02x22 12x22 22x22 |
1135 : // -------------------------------------------------------------
1136 : }
1137 : #endif
1138 :
1139 0 : void eb_av1_compute_stats_highbd_c(int32_t wiener_win, const uint8_t *dgd8,
1140 : const uint8_t *src8, int32_t h_start, int32_t h_end,
1141 : int32_t v_start, int32_t v_end, int32_t dgd_stride,
1142 : int32_t src_stride, int64_t *M, int64_t *H,
1143 : AomBitDepth bit_depth) {
1144 : int32_t i, j, k, l;
1145 0 : int32_t Y[WIENER_WIN2] = { 0 };
1146 0 : const int32_t wiener_win2 = wiener_win * wiener_win;
1147 0 : const int32_t wiener_halfwin = (wiener_win >> 1);
1148 0 : const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
1149 0 : const uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8);
1150 : uint16_t avg =
1151 0 : find_average_highbd(dgd, h_start, h_end, v_start, v_end, dgd_stride);
1152 :
1153 0 : uint8_t bit_depth_divider = 1;
1154 0 : if (bit_depth == AOM_BITS_12)
1155 0 : bit_depth_divider = 16;
1156 0 : else if (bit_depth == AOM_BITS_10)
1157 0 : bit_depth_divider = 4;
1158 :
1159 0 : memset(M, 0, sizeof(*M) * wiener_win2);
1160 0 : memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
1161 0 : for (i = v_start; i < v_end; i++) {
1162 0 : for (j = h_start; j < h_end; j++) {
1163 0 : const int32_t X = (int32_t)src[i * src_stride + j] - (int32_t)avg;
1164 0 : int32_t idx = 0;
1165 0 : for (k = -wiener_halfwin; k <= wiener_halfwin; k++) {
1166 0 : for (l = -wiener_halfwin; l <= wiener_halfwin; l++) {
1167 0 : Y[idx] = (int32_t)dgd[(i + l) * dgd_stride + (j + k)] - (int32_t)avg;
1168 0 : idx++;
1169 : }
1170 : }
1171 : assert(idx == wiener_win2);
1172 0 : for (k = 0; k < wiener_win2; ++k) {
1173 0 : M[k] += (int64_t)Y[k] * X;
1174 0 : for (l = k; l < wiener_win2; ++l) {
1175 : // H is a symmetric matrix, so we only need to fill out the upper
1176 : // triangle here. We can copy it down to the lower triangle outside
1177 : // the (i, j) loops.
1178 0 : H[k * wiener_win2 + l] += (int64_t)Y[k] * Y[l];
1179 : }
1180 : }
1181 : }
1182 : }
1183 0 : for (k = 0; k < wiener_win2; ++k) {
1184 0 : M[k] /= bit_depth_divider;
1185 0 : H[k * wiener_win2 + k] /= bit_depth_divider;
1186 0 : for (l = k + 1; l < wiener_win2; ++l) {
1187 0 : H[k * wiener_win2 + l] /= bit_depth_divider;
1188 0 : H[l * wiener_win2 + k] = H[k * wiener_win2 + l];
1189 : }
1190 : }
1191 0 : }
1192 :
1193 4206720 : static INLINE int32_t wrap_index(int32_t i, int32_t wiener_win) {
1194 4206720 : const int32_t wiener_halfwin1 = (wiener_win >> 1) + 1;
1195 4206720 : return (i >= wiener_halfwin1 ? wiener_win - 1 - i : i);
1196 : }
1197 :
1198 : // Solve linear equations to find Wiener filter tap values
1199 : // Taps are output scaled by WIENER_FILT_STEP
1200 2400 : static int32_t linsolve_wiener(int32_t n, int64_t *A, int32_t stride, int64_t *b,
1201 : int32_t *x) {
1202 6240 : for (int32_t k = 0; k < n - 1; k++) {
1203 : // Partial pivoting: bring the row with the largest pivot to the top
1204 9120 : for (int32_t i = n - 1; i > k; i--) {
1205 : // If row i has a better (bigger) pivot than row (i-1), swap them
1206 5280 : if (llabs(A[(i - 1) * stride + k]) < llabs(A[i * stride + k])) {
1207 0 : for (int32_t j = 0; j < n; j++) {
1208 0 : const int64_t c = A[i * stride + j];
1209 0 : A[i * stride + j] = A[(i - 1) * stride + j];
1210 0 : A[(i - 1) * stride + j] = c;
1211 : }
1212 0 : const int64_t c = b[i];
1213 0 : b[i] = b[i - 1];
1214 0 : b[i - 1] = c;
1215 : }
1216 : }
1217 : // Forward elimination (convert A to row-echelon form)
1218 9120 : for (int32_t i = k; i < n - 1; i++) {
1219 5280 : if (A[k * stride + k] == 0) return 0;
1220 5280 : const int64_t c = A[(i + 1) * stride + k];
1221 5280 : const int64_t cd = A[k * stride + k];
1222 20160 : for (int32_t j = 0; j < n; j++)
1223 14880 : A[(i + 1) * stride + j] -= c / 256 * A[k * stride + j] / cd * 256;
1224 5280 : b[i + 1] -= c * b[k] / cd;
1225 : }
1226 : }
1227 : // Back-substitution
1228 8640 : for (int32_t i = n - 1; i >= 0; i--) {
1229 6240 : if (A[i * stride + i] == 0) return 0;
1230 6240 : int64_t c = 0;
1231 11520 : for (int32_t j = i + 1; j <= n - 1; j++)
1232 5280 : c += A[i * stride + j] * x[j] / WIENER_TAP_SCALE_FACTOR;
1233 : // Store filter taps x in scaled form.
1234 6240 : x[i] = (int32_t)(WIENER_TAP_SCALE_FACTOR * (b[i] - c) / A[i * stride + i]);
1235 : }
1236 :
1237 2400 : return 1;
1238 : }
1239 : // Fix vector b, update vector a
1240 1200 : static void update_a_sep_sym(int32_t wiener_win, int64_t **Mc, int64_t **Hc,
1241 : int32_t *a, int32_t *b) {
1242 : int32_t i, j;
1243 : int32_t S[WIENER_WIN];
1244 : int64_t A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
1245 1200 : const int32_t wiener_win2 = wiener_win * wiener_win;
1246 1200 : const int32_t wiener_halfwin1 = (wiener_win >> 1) + 1;
1247 1200 : memset(A, 0, sizeof(A));
1248 1200 : memset(B, 0, sizeof(B));
1249 8640 : for (i = 0; i < wiener_win; i++) {
1250 54720 : for (j = 0; j < wiener_win; ++j) {
1251 47280 : const int32_t jj = wrap_index(j, wiener_win);
1252 47280 : A[jj] += Mc[i][j] * b[i] / WIENER_TAP_SCALE_FACTOR;
1253 : }
1254 : }
1255 8640 : for (i = 0; i < wiener_win; i++) {
1256 54720 : for (j = 0; j < wiener_win; j++) {
1257 : int32_t k, l;
1258 354240 : for (k = 0; k < wiener_win; ++k) {
1259 2335680 : for (l = 0; l < wiener_win; ++l) {
1260 2028720 : const int32_t kk = wrap_index(k, wiener_win);
1261 2028720 : const int32_t ll = wrap_index(l, wiener_win);
1262 2028720 : B[ll * wiener_halfwin1 + kk] +=
1263 2028720 : Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] /
1264 2028720 : WIENER_TAP_SCALE_FACTOR * b[j] / WIENER_TAP_SCALE_FACTOR;
1265 : }
1266 : }
1267 : }
1268 : }
1269 : // Normalization enforcement in the system of equations itself
1270 : assert(wiener_halfwin1 <= WIENER_HALFWIN1);
1271 1200 : int64_t A_halfwin1 = A[wiener_halfwin1 - 1];
1272 4320 : for (i = 0; i < wiener_halfwin1 - 1; ++i) {
1273 3120 : A[i] -= A_halfwin1 * 2 +
1274 3120 : B[i * wiener_halfwin1 + wiener_halfwin1 - 1] -
1275 3120 : 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + (wiener_halfwin1 - 1)];
1276 : }
1277 4320 : for (i = 0; i < wiener_halfwin1 - 1; ++i) {
1278 11520 : for (j = 0; j < wiener_halfwin1 - 1; ++j) {
1279 8400 : B[i * wiener_halfwin1 + j] -=
1280 8400 : 2 * (B[i * wiener_halfwin1 + (wiener_halfwin1 - 1)] +
1281 8400 : B[(wiener_halfwin1 - 1) * wiener_halfwin1 + j] -
1282 8400 : 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 +
1283 8400 : (wiener_halfwin1 - 1)]);
1284 : }
1285 : }
1286 1200 : if (linsolve_wiener(wiener_halfwin1 - 1, B, wiener_halfwin1, A, S)) {
1287 1200 : S[wiener_halfwin1 - 1] = WIENER_TAP_SCALE_FACTOR;
1288 4320 : for (i = wiener_halfwin1; i < wiener_win; ++i) {
1289 3120 : S[i] = S[wiener_win - 1 - i];
1290 3120 : S[wiener_halfwin1 - 1] -= 2 * S[i];
1291 : }
1292 1200 : memcpy(a, S, wiener_win * sizeof(*a));
1293 : }
1294 1200 : }
1295 :
1296 : // Fix vector a, update vector b
1297 1200 : static void update_b_sep_sym(int32_t wiener_win, int64_t **Mc, int64_t **Hc,
1298 : int32_t *a, int32_t *b) {
1299 : int32_t i, j;
1300 : int32_t S[WIENER_WIN];
1301 : int64_t A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
1302 1200 : const int32_t wiener_win2 = wiener_win * wiener_win;
1303 1200 : const int32_t wiener_halfwin1 = (wiener_win >> 1) + 1;
1304 1200 : memset(A, 0, sizeof(A));
1305 1200 : memset(B, 0, sizeof(B));
1306 8640 : for (i = 0; i < wiener_win; i++) {
1307 7440 : const int32_t ii = wrap_index(i, wiener_win);
1308 54720 : for (j = 0; j < wiener_win; j++)
1309 47280 : A[ii] += Mc[i][j] * a[j] / WIENER_TAP_SCALE_FACTOR;
1310 : }
1311 :
1312 8640 : for (i = 0; i < wiener_win; i++) {
1313 54720 : for (j = 0; j < wiener_win; j++) {
1314 47280 : const int32_t ii = wrap_index(i, wiener_win);
1315 47280 : const int32_t jj = wrap_index(j, wiener_win);
1316 : int32_t k, l;
1317 354240 : for (k = 0; k < wiener_win; ++k) {
1318 2335680 : for (l = 0; l < wiener_win; ++l) {
1319 2028720 : B[jj * wiener_halfwin1 + ii] +=
1320 2028720 : Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] /
1321 2028720 : WIENER_TAP_SCALE_FACTOR * a[l] / WIENER_TAP_SCALE_FACTOR;
1322 : }
1323 : }
1324 : }
1325 : }
1326 : // Normalization enforcement in the system of equations itself
1327 1200 : int64_t A_halfwin1 = A[wiener_halfwin1 - 1];
1328 4320 : for (i = 0; i < wiener_halfwin1 - 1; ++i) {
1329 3120 : A[i] -= A_halfwin1 * 2 +
1330 3120 : B[i * wiener_halfwin1 + wiener_halfwin1 - 1] -
1331 3120 : 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + (wiener_halfwin1 - 1)];
1332 : }
1333 4320 : for (i = 0; i < wiener_halfwin1 - 1; ++i) {
1334 11520 : for (j = 0; j < wiener_halfwin1 - 1; ++j) {
1335 8400 : B[i * wiener_halfwin1 + j] -=
1336 8400 : 2 * (B[i * wiener_halfwin1 + (wiener_halfwin1 - 1)] +
1337 8400 : B[(wiener_halfwin1 - 1) * wiener_halfwin1 + j] -
1338 8400 : 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 +
1339 8400 : (wiener_halfwin1 - 1)]);
1340 : }
1341 : }
1342 1200 : if (linsolve_wiener(wiener_halfwin1 - 1, B, wiener_halfwin1, A, S)) {
1343 1200 : S[wiener_halfwin1 - 1] = WIENER_TAP_SCALE_FACTOR;
1344 4320 : for (i = wiener_halfwin1; i < wiener_win; ++i) {
1345 3120 : S[i] = S[wiener_win - 1 - i];
1346 3120 : S[wiener_halfwin1 - 1] -= 2 * S[i];
1347 : }
1348 1200 : memcpy(b, S, wiener_win * sizeof(*b));
1349 : }
1350 1200 : }
1351 :
1352 300 : static int32_t wiener_decompose_sep_sym(int32_t wiener_win, int64_t *M, int64_t *H,
1353 : int32_t *a, int32_t *b) {
1354 : static const int32_t init_filt[WIENER_WIN] = {
1355 : WIENER_FILT_TAP0_MIDV, WIENER_FILT_TAP1_MIDV, WIENER_FILT_TAP2_MIDV,
1356 : WIENER_FILT_TAP3_MIDV, WIENER_FILT_TAP2_MIDV, WIENER_FILT_TAP1_MIDV,
1357 : WIENER_FILT_TAP0_MIDV,
1358 : };
1359 : int64_t *Hc[WIENER_WIN2];
1360 : int64_t *Mc[WIENER_WIN];
1361 : int32_t i, j, iter;
1362 300 : const int32_t plane_off = (WIENER_WIN - wiener_win) >> 1;
1363 300 : const int32_t wiener_win2 = wiener_win * wiener_win;
1364 2160 : for (i = 0; i < wiener_win; i++) {
1365 1860 : a[i] = b[i] =
1366 1860 : WIENER_TAP_SCALE_FACTOR / WIENER_FILT_STEP * init_filt[i + plane_off];
1367 : }
1368 2160 : for (i = 0; i < wiener_win; i++) {
1369 1860 : Mc[i] = M + i * wiener_win;
1370 13680 : for (j = 0; j < wiener_win; j++) {
1371 11820 : Hc[i * wiener_win + j] =
1372 11820 : H + i * wiener_win * wiener_win2 + j * wiener_win;
1373 : }
1374 : }
1375 :
1376 300 : iter = 1;
1377 1500 : while (iter < NUM_WIENER_ITERS) {
1378 1200 : update_a_sep_sym(wiener_win, Mc, Hc, a, b);
1379 1200 : update_b_sep_sym(wiener_win, Mc, Hc, a, b);
1380 1200 : iter++;
1381 : }
1382 300 : return 1;
1383 : }
1384 300 : static int64_t compute_score(int32_t wiener_win, int64_t *M, int64_t *H,
1385 : InterpKernel vfilt, InterpKernel hfilt) {
1386 : int32_t ab[WIENER_WIN * WIENER_WIN];
1387 : int16_t a[WIENER_WIN], b[WIENER_WIN];
1388 300 : int64_t P = 0, Q = 0;
1389 300 : int64_t iP = 0, iQ = 0;
1390 : int64_t Score, iScore;
1391 : int32_t i, k, l;
1392 300 : const int32_t plane_off = (WIENER_WIN - wiener_win) >> 1;
1393 300 : const int32_t wiener_win2 = wiener_win * wiener_win;
1394 :
1395 300 : aom_clear_system_state();
1396 :
1397 300 : a[WIENER_HALFWIN] = b[WIENER_HALFWIN] = WIENER_FILT_STEP;
1398 1200 : for (i = 0; i < WIENER_HALFWIN; ++i) {
1399 900 : a[i] = a[WIENER_WIN - i - 1] = vfilt[i];
1400 900 : b[i] = b[WIENER_WIN - i - 1] = hfilt[i];
1401 900 : a[WIENER_HALFWIN] -= 2 * a[i];
1402 900 : b[WIENER_HALFWIN] -= 2 * b[i];
1403 : }
1404 300 : memset(ab, 0, sizeof(ab));
1405 2160 : for (k = 0; k < wiener_win; ++k) {
1406 13680 : for (l = 0; l < wiener_win; ++l)
1407 11820 : ab[k * wiener_win + l] = a[l + plane_off] * b[k + plane_off];
1408 : }
1409 12120 : for (k = 0; k < wiener_win2; ++k) {
1410 11820 : P += ab[k] * M[k] / WIENER_FILT_STEP / WIENER_FILT_STEP;
1411 519000 : for (l = 0; l < wiener_win2; ++l) {
1412 507180 : Q += ab[k] * H[k * wiener_win2 + l] * ab[l] / WIENER_FILT_STEP /
1413 507180 : WIENER_FILT_STEP / WIENER_FILT_STEP / WIENER_FILT_STEP;
1414 : }
1415 : }
1416 300 : Score = Q - 2 * P;
1417 :
1418 300 : iP = M[wiener_win2 >> 1];
1419 300 : iQ = H[(wiener_win2 >> 1) * wiener_win2 + (wiener_win2 >> 1)];
1420 300 : iScore = iQ - 2 * iP;
1421 :
1422 300 : return Score - iScore;
1423 : }
1424 :
1425 600 : static void finalize_sym_filter(int32_t wiener_win, int32_t *f, InterpKernel fi) {
1426 : int32_t i;
1427 600 : const int32_t wiener_halfwin = (wiener_win >> 1);
1428 :
1429 2160 : for (i = 0; i < wiener_halfwin; ++i) {
1430 1560 : const int64_t dividend = f[i] * WIENER_FILT_STEP;
1431 1560 : const int64_t divisor = WIENER_TAP_SCALE_FACTOR;
1432 : // Perform this division with proper rounding rather than truncation
1433 1560 : if (dividend < 0)
1434 823 : fi[i] = (int16_t)((dividend - (divisor / 2)) / divisor);
1435 : else
1436 737 : fi[i] = (int16_t)((dividend + (divisor / 2)) / divisor);
1437 : }
1438 : // Specialize for 7-tap filter
1439 600 : if (wiener_win == WIENER_WIN) {
1440 360 : fi[0] = CLIP(fi[0], WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP0_MAXV);
1441 360 : fi[1] = CLIP(fi[1], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV);
1442 360 : fi[2] = CLIP(fi[2], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV);
1443 : }
1444 : else {
1445 240 : fi[2] = CLIP(fi[1], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV);
1446 240 : fi[1] = CLIP(fi[0], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV);
1447 240 : fi[0] = 0;
1448 : }
1449 : // Satisfy filter constraints
1450 600 : fi[WIENER_WIN - 1] = fi[0];
1451 600 : fi[WIENER_WIN - 2] = fi[1];
1452 600 : fi[WIENER_WIN - 3] = fi[2];
1453 : // The central element has an implicit +WIENER_FILT_STEP
1454 600 : fi[3] = -2 * (fi[0] + fi[1] + fi[2]);
1455 600 : }
1456 :
1457 391 : static int32_t count_wiener_bits(int32_t wiener_win, WienerInfo *wiener_info,
1458 : WienerInfo *ref_wiener_info) {
1459 391 : int32_t bits = 0;
1460 391 : if (wiener_win == WIENER_WIN)
1461 271 : bits += eb_aom_count_primitive_refsubexpfin(
1462 : WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
1463 : WIENER_FILT_TAP0_SUBEXP_K,
1464 271 : ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV,
1465 271 : wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV);
1466 782 : bits += eb_aom_count_primitive_refsubexpfin(
1467 : WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
1468 : WIENER_FILT_TAP1_SUBEXP_K,
1469 391 : ref_wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV,
1470 391 : wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV);
1471 782 : bits += eb_aom_count_primitive_refsubexpfin(
1472 : WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1,
1473 : WIENER_FILT_TAP2_SUBEXP_K,
1474 391 : ref_wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV,
1475 391 : wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV);
1476 391 : if (wiener_win == WIENER_WIN)
1477 271 : bits += eb_aom_count_primitive_refsubexpfin(
1478 : WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
1479 : WIENER_FILT_TAP0_SUBEXP_K,
1480 271 : ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV,
1481 271 : wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV);
1482 782 : bits += eb_aom_count_primitive_refsubexpfin(
1483 : WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
1484 : WIENER_FILT_TAP1_SUBEXP_K,
1485 391 : ref_wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV,
1486 391 : wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV);
1487 782 : bits += eb_aom_count_primitive_refsubexpfin(
1488 : WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1,
1489 : WIENER_FILT_TAP2_SUBEXP_K,
1490 391 : ref_wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV,
1491 391 : wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV);
1492 391 : return bits;
1493 : }
1494 :
1495 : #define USE_WIENER_REFINEMENT_SEARCH 1
1496 0 : static int64_t finer_tile_search_wiener(const RestSearchCtxt *rsc,
1497 : const RestorationTileLimits *limits,
1498 : const AV1PixelRect *tile,
1499 : RestorationUnitInfo *rui,
1500 : int32_t wiener_win) {
1501 0 : const int32_t plane_off = (WIENER_WIN - wiener_win) >> 1;
1502 0 : int64_t err = try_restoration_unit(rsc, limits, tile, rui);
1503 : #if USE_WIENER_REFINEMENT_SEARCH
1504 : int64_t err2;
1505 0 : int32_t tap_min[] = { WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP1_MINV,
1506 : WIENER_FILT_TAP2_MINV };
1507 0 : int32_t tap_max[] = { WIENER_FILT_TAP0_MAXV, WIENER_FILT_TAP1_MAXV,
1508 : WIENER_FILT_TAP2_MAXV };
1509 :
1510 0 : WienerInfo *plane_wiener = &rui->wiener_info;
1511 :
1512 : // printf("err pre = %"PRId64"\n", err);
1513 0 : const int32_t start_step = 4;
1514 0 : for (int32_t s = start_step; s >= 1; s >>= 1) {
1515 0 : for (int32_t p = plane_off; p < WIENER_HALFWIN; ++p) {
1516 0 : int32_t skip = 0;
1517 : do {
1518 0 : if (plane_wiener->hfilter[p] - s >= tap_min[p]) {
1519 0 : plane_wiener->hfilter[p] -= (int16_t)s;
1520 0 : plane_wiener->hfilter[WIENER_WIN - p - 1] -= (int16_t)s;
1521 0 : plane_wiener->hfilter[WIENER_HALFWIN] += 2 * (int16_t)s;
1522 0 : err2 = try_restoration_unit(rsc, limits, tile, rui);
1523 0 : if (err2 > err) {
1524 0 : plane_wiener->hfilter[p] += (int16_t)s;
1525 0 : plane_wiener->hfilter[WIENER_WIN - p - 1] += (int16_t)s;
1526 0 : plane_wiener->hfilter[WIENER_HALFWIN] -= 2 * (int16_t)s;
1527 : }
1528 : else {
1529 0 : err = err2;
1530 0 : skip = 1;
1531 : // At the highest step size continue moving in the same direction
1532 0 : if (s == start_step) continue;
1533 : }
1534 : }
1535 0 : break;
1536 : } while (1);
1537 0 : if (skip) break;
1538 : do {
1539 0 : if (plane_wiener->hfilter[p] + s <= tap_max[p]) {
1540 0 : plane_wiener->hfilter[p] += (int16_t)s;
1541 0 : plane_wiener->hfilter[WIENER_WIN - p - 1] += (int16_t)s;
1542 0 : plane_wiener->hfilter[WIENER_HALFWIN] -= 2 * (int16_t)s;
1543 0 : err2 = try_restoration_unit(rsc, limits, tile, rui);
1544 0 : if (err2 > err) {
1545 0 : plane_wiener->hfilter[p] -= (int16_t)s;
1546 0 : plane_wiener->hfilter[WIENER_WIN - p - 1] -= (int16_t)s;
1547 0 : plane_wiener->hfilter[WIENER_HALFWIN] += 2 * (int16_t)s;
1548 : }
1549 : else {
1550 0 : err = err2;
1551 : // At the highest step size continue moving in the same direction
1552 0 : if (s == start_step) continue;
1553 : }
1554 : }
1555 0 : break;
1556 : } while (1);
1557 : }
1558 0 : for (int32_t p = plane_off; p < WIENER_HALFWIN; ++p) {
1559 0 : int32_t skip = 0;
1560 : do {
1561 0 : if (plane_wiener->vfilter[p] - s >= tap_min[p]) {
1562 0 : plane_wiener->vfilter[p] -= (int16_t)s;
1563 0 : plane_wiener->vfilter[WIENER_WIN - p - 1] -= (int16_t)s;
1564 0 : plane_wiener->vfilter[WIENER_HALFWIN] += 2 * (int16_t)s;
1565 0 : err2 = try_restoration_unit(rsc, limits, tile, rui);
1566 0 : if (err2 > err) {
1567 0 : plane_wiener->vfilter[p] += (int16_t)s;
1568 0 : plane_wiener->vfilter[WIENER_WIN - p - 1] += (int16_t)s;
1569 0 : plane_wiener->vfilter[WIENER_HALFWIN] -= 2 * (int16_t)s;
1570 : }
1571 : else {
1572 0 : err = err2;
1573 0 : skip = 1;
1574 : // At the highest step size continue moving in the same direction
1575 0 : if (s == start_step) continue;
1576 : }
1577 : }
1578 0 : break;
1579 : } while (1);
1580 0 : if (skip) break;
1581 : do {
1582 0 : if (plane_wiener->vfilter[p] + s <= tap_max[p]) {
1583 0 : plane_wiener->vfilter[p] += (int16_t)s;
1584 0 : plane_wiener->vfilter[WIENER_WIN - p - 1] += (int16_t)s;
1585 0 : plane_wiener->vfilter[WIENER_HALFWIN] -= 2 * (int16_t)s;
1586 0 : err2 = try_restoration_unit(rsc, limits, tile, rui);
1587 0 : if (err2 > err) {
1588 0 : plane_wiener->vfilter[p] -= (int16_t)s;
1589 0 : plane_wiener->vfilter[WIENER_WIN - p - 1] -= (int16_t)s;
1590 0 : plane_wiener->vfilter[WIENER_HALFWIN] += 2 * (int16_t)s;
1591 : }
1592 : else {
1593 0 : err = err2;
1594 : // At the highest step size continue moving in the same direction
1595 0 : if (s == start_step) continue;
1596 : }
1597 : }
1598 0 : break;
1599 : } while (1);
1600 : }
1601 : }
1602 : // printf("err post = %"PRId64"\n", err);
1603 : #endif // USE_WIENER_REFINEMENT_SEARCH
1604 0 : return err;
1605 : }
1606 295 : static int64_t finer_tile_search_wiener_seg(const RestSearchCtxt *rsc,
1607 : const RestorationTileLimits *limits,
1608 : const AV1PixelRect *tile,
1609 : RestorationUnitInfo *rui,
1610 : int32_t wiener_win) {
1611 295 : const int32_t plane_off = (WIENER_WIN - wiener_win) >> 1;
1612 295 : int64_t err = try_restoration_unit_seg(rsc, limits, tile, rui);
1613 : #if USE_WIENER_REFINEMENT_SEARCH
1614 : int64_t err2;
1615 295 : int32_t tap_min[] = { WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP1_MINV,
1616 : WIENER_FILT_TAP2_MINV };
1617 295 : int32_t tap_max[] = { WIENER_FILT_TAP0_MAXV, WIENER_FILT_TAP1_MAXV,
1618 : WIENER_FILT_TAP2_MAXV };
1619 :
1620 295 : WienerInfo *plane_wiener = &rui->wiener_info;
1621 :
1622 : // printf("err pre = %"PRId64"\n", err);
1623 295 : const int32_t start_step = 4;
1624 1180 : for (int32_t s = start_step; s >= 1; s >>= 1) {
1625 2901 : for (int32_t p = plane_off; p < WIENER_HALFWIN; ++p) {
1626 2254 : int32_t skip = 0;
1627 : do {
1628 2324 : if (plane_wiener->hfilter[p] - s >= tap_min[p]) {
1629 2311 : plane_wiener->hfilter[p] -= (int16_t)s;
1630 2311 : plane_wiener->hfilter[WIENER_WIN - p - 1] -= (int16_t)s;
1631 2311 : plane_wiener->hfilter[WIENER_HALFWIN] += 2 * (int16_t)s;
1632 2311 : err2 = try_restoration_unit_seg(rsc, limits, tile, rui);
1633 2311 : if (err2 > err) {
1634 2068 : plane_wiener->hfilter[p] += (int16_t)s;
1635 2068 : plane_wiener->hfilter[WIENER_WIN - p - 1] += (int16_t)s;
1636 2068 : plane_wiener->hfilter[WIENER_HALFWIN] -= 2 * (int16_t)s;
1637 : }
1638 : else {
1639 243 : err = err2;
1640 243 : skip = 1;
1641 : // At the highest step size continue moving in the same direction
1642 243 : if (s == start_step) continue;
1643 : }
1644 : }
1645 2254 : break;
1646 : } while (1);
1647 2254 : if (skip) break;
1648 : do {
1649 2051 : if (plane_wiener->hfilter[p] + s <= tap_max[p]) {
1650 2043 : plane_wiener->hfilter[p] += (int16_t)s;
1651 2043 : plane_wiener->hfilter[WIENER_WIN - p - 1] += (int16_t)s;
1652 2043 : plane_wiener->hfilter[WIENER_HALFWIN] -= 2 * (int16_t)s;
1653 2043 : err2 = try_restoration_unit_seg(rsc, limits, tile, rui);
1654 2043 : if (err2 > err) {
1655 1873 : plane_wiener->hfilter[p] -= (int16_t)s;
1656 1873 : plane_wiener->hfilter[WIENER_WIN - p - 1] -= (int16_t)s;
1657 1873 : plane_wiener->hfilter[WIENER_HALFWIN] += 2 * (int16_t)s;
1658 : }
1659 : else {
1660 170 : err = err2;
1661 : // At the highest step size continue moving in the same direction
1662 170 : if (s == start_step) continue;
1663 : }
1664 : }
1665 2016 : break;
1666 : } while (1);
1667 : }
1668 3060 : for (int32_t p = plane_off; p < WIENER_HALFWIN; ++p) {
1669 2280 : int32_t skip = 0;
1670 : do {
1671 2287 : if (plane_wiener->vfilter[p] - s >= tap_min[p]) {
1672 2248 : plane_wiener->vfilter[p] -= (int16_t)s;
1673 2248 : plane_wiener->vfilter[WIENER_WIN - p - 1] -= (int16_t)s;
1674 2248 : plane_wiener->vfilter[WIENER_HALFWIN] += 2 * (int16_t)s;
1675 2248 : err2 = try_restoration_unit_seg(rsc, limits, tile, rui);
1676 2248 : if (err2 > err) {
1677 2143 : plane_wiener->vfilter[p] += (int16_t)s;
1678 2143 : plane_wiener->vfilter[WIENER_WIN - p - 1] += (int16_t)s;
1679 2143 : plane_wiener->vfilter[WIENER_HALFWIN] -= 2 * (int16_t)s;
1680 : }
1681 : else {
1682 105 : err = err2;
1683 105 : skip = 1;
1684 : // At the highest step size continue moving in the same direction
1685 105 : if (s == start_step) continue;
1686 : }
1687 : }
1688 2280 : break;
1689 : } while (1);
1690 2280 : if (skip) break;
1691 : do {
1692 2222 : if (plane_wiener->vfilter[p] + s <= tap_max[p]) {
1693 2148 : plane_wiener->vfilter[p] += (int16_t)s;
1694 2148 : plane_wiener->vfilter[WIENER_WIN - p - 1] += (int16_t)s;
1695 2148 : plane_wiener->vfilter[WIENER_HALFWIN] -= 2 * (int16_t)s;
1696 2148 : err2 = try_restoration_unit_seg(rsc, limits, tile, rui);
1697 2148 : if (err2 > err) {
1698 1994 : plane_wiener->vfilter[p] -= (int16_t)s;
1699 1994 : plane_wiener->vfilter[WIENER_WIN - p - 1] -= (int16_t)s;
1700 1994 : plane_wiener->vfilter[WIENER_HALFWIN] += 2 * (int16_t)s;
1701 : }
1702 : else {
1703 154 : err = err2;
1704 : // At the highest step size continue moving in the same direction
1705 154 : if (s == start_step) continue;
1706 : }
1707 : }
1708 2175 : break;
1709 : } while (1);
1710 : }
1711 : }
1712 : // printf("err post = %"PRId64"\n", err);
1713 : #endif // USE_WIENER_REFINEMENT_SEARCH
1714 295 : return err;
1715 : }
1716 :
1717 0 : static void search_wiener(const RestorationTileLimits *limits,
1718 : const AV1PixelRect *tile_rect, int32_t rest_unit_idx,
1719 : void *priv) {
1720 0 : RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
1721 0 : RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
1722 0 : const Av1Common *const cm = rsc->cm;
1723 0 : int32_t wn_luma = cm->wn_filter_mode == 1 ? WIENER_WIN_3TAP : cm->wn_filter_mode == 2 ? WIENER_WIN_CHROMA : WIENER_WIN;
1724 0 : const int32_t wiener_win = cm->wn_filter_mode == 1 ? WIENER_WIN_3TAP :
1725 0 : (rsc->plane == AOM_PLANE_Y) ? wn_luma : WIENER_WIN_CHROMA;
1726 : int64_t M[WIENER_WIN2];
1727 : int64_t H[WIENER_WIN2 * WIENER_WIN2];
1728 : int32_t vfilterd[WIENER_WIN], hfilterd[WIENER_WIN];
1729 :
1730 0 : if (cm->use_highbitdepth)
1731 : {
1732 0 : if (rsc->plane == AOM_PLANE_Y) {
1733 0 : eb_av1_compute_stats_highbd(wiener_win, rsc->dgd_buffer, rsc->src_buffer,
1734 : limits->h_start, limits->h_end, limits->v_start,
1735 : limits->v_end, rsc->dgd_stride, rsc->src_stride, M,
1736 0 : H, (AomBitDepth)cm->bit_depth);
1737 : }
1738 : else {
1739 0 : eb_av1_compute_stats_highbd(wiener_win, rsc->dgd_buffer, rsc->src_buffer,
1740 : limits->h_start, limits->h_end, limits->v_start,
1741 : limits->v_end, rsc->dgd_stride, rsc->src_stride, M,
1742 0 : H, (AomBitDepth)cm->bit_depth);
1743 : }
1744 : }
1745 : else {
1746 0 : eb_av1_compute_stats(wiener_win, rsc->dgd_buffer, rsc->src_buffer, limits->h_start,
1747 : limits->h_end, limits->v_start, limits->v_end,
1748 : rsc->dgd_stride, rsc->src_stride, M, H);
1749 : }
1750 :
1751 0 : const Macroblock *const x = rsc->x;
1752 0 : const int64_t bits_none = x->wiener_restore_cost[0];
1753 :
1754 0 : if (!wiener_decompose_sep_sym(wiener_win, M, H, vfilterd, hfilterd)) {
1755 0 : rsc->bits += bits_none;
1756 0 : rsc->sse += rusi->sse[RESTORE_NONE];
1757 0 : rusi->best_rtype[RESTORE_WIENER - 1] = RESTORE_NONE;
1758 0 : rusi->sse[RESTORE_WIENER] = INT64_MAX;
1759 0 : return;
1760 : }
1761 :
1762 : RestorationUnitInfo rui;
1763 0 : memset(&rui, 0, sizeof(rui));
1764 0 : rui.restoration_type = RESTORE_WIENER;
1765 0 : finalize_sym_filter(wiener_win, vfilterd, rui.wiener_info.vfilter);
1766 0 : finalize_sym_filter(wiener_win, hfilterd, rui.wiener_info.hfilter);
1767 :
1768 : // Filter score computes the value of the function x'*A*x - x'*b for the
1769 : // learned filter and compares it against identity filer. If there is no
1770 : // reduction in the function, the filter is reverted back to identity
1771 0 : if (compute_score(wiener_win, M, H, rui.wiener_info.vfilter,
1772 : rui.wiener_info.hfilter) > 0) {
1773 0 : rsc->bits += bits_none;
1774 0 : rsc->sse += rusi->sse[RESTORE_NONE];
1775 0 : rusi->best_rtype[RESTORE_WIENER - 1] = RESTORE_NONE;
1776 0 : rusi->sse[RESTORE_WIENER] = INT64_MAX;
1777 0 : return;
1778 : }
1779 :
1780 0 : aom_clear_system_state();
1781 :
1782 0 : rusi->sse[RESTORE_WIENER] =
1783 0 : finer_tile_search_wiener(rsc, limits, tile_rect, &rui, wiener_win);
1784 0 : rusi->wiener = rui.wiener_info;
1785 :
1786 : if (wiener_win != WIENER_WIN) {
1787 : assert(rui.wiener_info.vfilter[0] == 0 &&
1788 : rui.wiener_info.vfilter[WIENER_WIN - 1] == 0);
1789 : assert(rui.wiener_info.hfilter[0] == 0 &&
1790 : rui.wiener_info.hfilter[WIENER_WIN - 1] == 0);
1791 : }
1792 :
1793 0 : const int64_t bits_wiener =
1794 0 : x->wiener_restore_cost[1] +
1795 0 : (count_wiener_bits(wiener_win, &rusi->wiener, &rsc->wiener)
1796 0 : << AV1_PROB_COST_SHIFT);
1797 :
1798 0 : double cost_none =
1799 0 : RDCOST_DBL(x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE]);
1800 0 : double cost_wiener =
1801 0 : RDCOST_DBL(x->rdmult, bits_wiener >> 4, rusi->sse[RESTORE_WIENER]);
1802 :
1803 0 : RestorationType rtype =
1804 0 : (cost_wiener < cost_none) ? RESTORE_WIENER : RESTORE_NONE;
1805 0 : rusi->best_rtype[RESTORE_WIENER - 1] = rtype;
1806 :
1807 0 : rsc->sse += rusi->sse[rtype];
1808 0 : rsc->bits += (cost_wiener < cost_none) ? bits_wiener : bits_none;
1809 0 : if (cost_wiener < cost_none) rsc->wiener = rusi->wiener;
1810 : }
1811 :
1812 0 : static void search_norestore(const RestorationTileLimits *limits,
1813 : const AV1PixelRect *tile_rect, int32_t rest_unit_idx,
1814 : void *priv) {
1815 : (void)tile_rect;
1816 :
1817 0 : RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
1818 0 : RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
1819 :
1820 0 : const int32_t highbd = rsc->cm->use_highbitdepth;
1821 0 : rusi->sse[RESTORE_NONE] = sse_restoration_unit(
1822 0 : limits, rsc->src, rsc->cm->frame_to_show, rsc->plane, highbd);
1823 :
1824 0 : rsc->sse += rusi->sse[RESTORE_NONE];
1825 0 : }
1826 :
1827 180 : static void search_switchable(const RestorationTileLimits *limits,
1828 : const AV1PixelRect *tile_rect, int32_t rest_unit_idx,
1829 : void *priv) {
1830 : (void)limits;
1831 : (void)tile_rect;
1832 180 : RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
1833 180 : RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
1834 :
1835 180 : const Macroblock *const x = rsc->x;
1836 :
1837 180 : const int32_t wiener_win =
1838 180 : (rsc->plane == AOM_PLANE_Y) ? WIENER_WIN : WIENER_WIN_CHROMA;
1839 :
1840 180 : double best_cost = 0;
1841 180 : int64_t best_bits = 0;
1842 180 : RestorationType best_rtype = RESTORE_NONE;
1843 :
1844 : //CHKN for (RestorationType r = 0; r < RESTORE_SWITCHABLE_TYPES; ++r) {
1845 720 : for (int32_t restType = 0; restType < RESTORE_SWITCHABLE_TYPES; ++restType) {
1846 540 : RestorationType r = (RestorationType)restType;
1847 :
1848 : // Check for the condition that wiener or sgrproj search could not
1849 : // find a solution or the solution was worse than RESTORE_NONE.
1850 : // In either case the best_rtype will be set as RESTORE_NONE. These
1851 : // should be skipped from the test below.
1852 540 : if (r > RESTORE_NONE)
1853 360 : if (rusi->best_rtype[r - 1] == RESTORE_NONE) continue;
1854 390 : const int64_t sse = rusi->sse[r];
1855 390 : int64_t coeff_pcost = 0;
1856 390 : switch (r) {
1857 180 : case RESTORE_NONE: coeff_pcost = 0; break;
1858 96 : case RESTORE_WIENER:
1859 96 : coeff_pcost =
1860 96 : count_wiener_bits(wiener_win, &rusi->wiener, &rsc->wiener);
1861 96 : break;
1862 114 : case RESTORE_SGRPROJ:
1863 114 : coeff_pcost = count_sgrproj_bits(&rusi->sgrproj, &rsc->sgrproj);
1864 114 : break;
1865 0 : default: assert(0); break;
1866 : }
1867 390 : const int64_t coeff_bits = coeff_pcost << AV1_PROB_COST_SHIFT;
1868 390 : const int64_t bits = x->switchable_restore_cost[r] + coeff_bits;
1869 390 : double cost = RDCOST_DBL(x->rdmult, bits >> 4, sse);
1870 390 : if (r == 0 || cost < best_cost) {
1871 335 : best_cost = cost;
1872 335 : best_bits = bits;
1873 335 : best_rtype = r;
1874 : }
1875 : }
1876 :
1877 180 : rusi->best_rtype[RESTORE_SWITCHABLE - 1] = best_rtype;
1878 :
1879 180 : rsc->sse += rusi->sse[best_rtype];
1880 180 : rsc->bits += best_bits;
1881 180 : if (best_rtype == RESTORE_WIENER) rsc->wiener = rusi->wiener;
1882 180 : if (best_rtype == RESTORE_SGRPROJ) rsc->sgrproj = rusi->sgrproj;
1883 180 : }
1884 :
1885 192 : static void copy_unit_info(RestorationType frame_rtype,
1886 : const RestUnitSearchInfo *rusi,
1887 : RestorationUnitInfo *rui) {
1888 192 : if (frame_rtype >= 1)
1889 192 : rui->restoration_type = rusi->best_rtype[frame_rtype - 1];
1890 192 : if (rui->restoration_type == RESTORE_WIENER)
1891 65 : rui->wiener_info = rusi->wiener;
1892 : else
1893 127 : rui->sgrproj_info = rusi->sgrproj;
1894 192 : }
1895 :
1896 0 : static double search_rest_type(RestSearchCtxt *rsc, RestorationType rtype)
1897 : {
1898 : static const RestUnitVisitor funs[RESTORE_TYPES] = {
1899 : search_norestore, search_wiener, search_sgrproj, search_switchable
1900 : };
1901 :
1902 0 : reset_rsc(rsc);
1903 :
1904 0 : av1_foreach_rest_unit_in_frame(rsc->cm, rsc->plane, rsc_on_tile, funs[rtype], rsc);
1905 :
1906 0 : return RDCOST_DBL(rsc->x->rdmult, rsc->bits >> 4, rsc->sse);
1907 : }
1908 :
1909 120 : static int32_t rest_tiles_in_plane(const Av1Common *cm, int32_t plane) {
1910 120 : const RestorationInfo *rsi = &cm->rst_info[plane];
1911 120 : return rsi->units_per_tile;
1912 : }
1913 :
1914 : void *eb_aom_memalign(size_t align, size_t size);
1915 : void eb_aom_free(void *memblk);
1916 :
1917 0 : void eb_av1_pick_filter_restoration(const Yv12BufferConfig *src, Yv12BufferConfig * trial_frame_rst /*Av1Comp *cpi*/, Macroblock *x, Av1Common *const cm) {
1918 : //CHKN Av1Common *const cm = &cpi->common;
1919 0 : const int32_t num_planes = 3;// av1_num_planes(cm);
1920 : // assert(!cm->all_lossless);
1921 0 : RestorationType force_restore_type_d = (cm->wn_filter_mode) ? RESTORE_TYPES : RESTORE_SGRPROJ;
1922 :
1923 : int32_t ntiles[2];
1924 0 : for (int32_t is_uv = 0; is_uv < 2; ++is_uv)
1925 0 : ntiles[is_uv] = rest_tiles_in_plane(cm, is_uv);
1926 :
1927 : assert(ntiles[1] <= ntiles[0]);
1928 : RestUnitSearchInfo *rusi =
1929 0 : (RestUnitSearchInfo *)eb_aom_memalign(16, sizeof(*rusi) * ntiles[0]);
1930 :
1931 : // If the restoration unit dimensions are not multiples of
1932 : // rsi->restoration_unit_size then some elements of the rusi array may be
1933 : // left uninitialised when we reach copy_unit_info(...). This is not a
1934 : // problem, as these elements are ignored later, but in order to quiet
1935 : // Valgrind's warnings we initialise the array below.
1936 0 : memset(rusi, 0, sizeof(*rusi) * ntiles[0]);
1937 :
1938 : RestSearchCtxt rsc;
1939 0 : const int32_t plane_start = AOM_PLANE_Y;
1940 0 : const int32_t plane_end = num_planes > 1 ? AOM_PLANE_V : AOM_PLANE_Y;
1941 0 : for (int32_t plane = plane_start; plane <= plane_end; ++plane) {
1942 : //CHKN init_rsc(src, cpi->common, &cpi->td.mb, plane, rusi, &cpi->trial_frame_rst,&rsc);
1943 0 : init_rsc(src, cm, x, plane, rusi, trial_frame_rst, &rsc);
1944 :
1945 0 : const int32_t plane_ntiles = ntiles[plane > 0];
1946 0 : const RestorationType num_rtypes =
1947 0 : (plane_ntiles > 1) ? RESTORE_TYPES : RESTORE_SWITCHABLE_TYPES;
1948 :
1949 0 : double best_cost = 0;
1950 0 : RestorationType best_rtype = RESTORE_NONE;
1951 0 : const int32_t highbd = rsc.cm->use_highbitdepth;
1952 :
1953 0 : eb_extend_frame(rsc.dgd_buffer, rsc.plane_width, rsc.plane_height,
1954 : rsc.dgd_stride, RESTORATION_BORDER, RESTORATION_BORDER,
1955 : highbd);
1956 :
1957 : //CHKN for (RestorationType r = 0; r < num_rtypes; ++r) {
1958 0 : for (int32_t restType = 0; restType < num_rtypes; ++restType) {
1959 0 : RestorationType r = (RestorationType)restType;
1960 :
1961 0 : if ((force_restore_type_d != RESTORE_TYPES) && (r != RESTORE_NONE) &&
1962 : (r != force_restore_type_d))
1963 0 : continue;
1964 :
1965 0 : double cost = search_rest_type(&rsc, r);
1966 :
1967 0 : if (r == 0 || cost < best_cost)
1968 : {
1969 0 : best_cost = cost;
1970 0 : best_rtype = r;
1971 : }
1972 : }
1973 0 : cm->rst_info[plane].frame_restoration_type = best_rtype;
1974 : if (force_restore_type_d != RESTORE_TYPES)
1975 : assert(best_rtype == force_restore_type_d || best_rtype == RESTORE_NONE);
1976 :
1977 0 : if (best_rtype != RESTORE_NONE) {
1978 0 : for (int32_t u = 0; u < plane_ntiles; ++u)
1979 0 : copy_unit_info(best_rtype, &rusi[u], &cm->rst_info[plane].unit_info[u]);
1980 : }
1981 : }
1982 :
1983 0 : eb_aom_free(rusi);
1984 0 : }
1985 :
1986 300 : static void search_sgrproj_seg(const RestorationTileLimits *limits,
1987 : const AV1PixelRect *tile, int32_t rest_unit_idx,
1988 : void *priv) {
1989 300 : RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
1990 300 : RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
1991 :
1992 300 : Av1Common *const cm = rsc->cm;
1993 300 : const int32_t highbd = cm->use_highbitdepth;
1994 300 : const int32_t bit_depth = cm->bit_depth;
1995 :
1996 300 : uint8_t *dgd_start =
1997 300 : rsc->dgd_buffer + limits->v_start * rsc->dgd_stride + limits->h_start;
1998 300 : const uint8_t *src_start =
1999 300 : rsc->src_buffer + limits->v_start * rsc->src_stride + limits->h_start;
2000 :
2001 300 : const int32_t is_uv = rsc->plane > 0;
2002 300 : const int32_t ss_x = is_uv && cm->subsampling_x;
2003 300 : const int32_t ss_y = is_uv && cm->subsampling_y;
2004 300 : const int32_t procunit_width = RESTORATION_PROC_UNIT_SIZE >> ss_x;
2005 300 : const int32_t procunit_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
2006 300 : int8_t step = get_sg_step(cm->sg_filter_mode);
2007 :
2008 300 : rusi->sgrproj = search_selfguided_restoration(
2009 300 : dgd_start, limits->h_end - limits->h_start,
2010 300 : limits->v_end - limits->v_start, rsc->dgd_stride, src_start,
2011 : rsc->src_stride, highbd, bit_depth, procunit_width, procunit_height,
2012 : rsc->tmpbuf
2013 300 : , cm->sg_ref_frame_ep,
2014 300 : cm->sg_frame_ep_cnt,
2015 : step
2016 : );
2017 :
2018 : RestorationUnitInfo rui;
2019 300 : rui.restoration_type = RESTORE_SGRPROJ;
2020 300 : rui.sgrproj_info = rusi->sgrproj;
2021 :
2022 300 : rusi->sse[RESTORE_SGRPROJ] = try_restoration_unit_seg(rsc, limits, tile, &rui);
2023 300 : }
2024 :
2025 300 : static void search_sgrproj_finish(const RestorationTileLimits *limits,
2026 : const AV1PixelRect *tile, int32_t rest_unit_idx,
2027 : void *priv) {
2028 : (void)limits;
2029 : (void)tile;
2030 300 : RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
2031 300 : RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
2032 :
2033 300 : const Macroblock *const x = rsc->x;
2034 :
2035 300 : rusi->sse[RESTORE_SGRPROJ] = rsc->rusi_pic[rest_unit_idx].sse[RESTORE_SGRPROJ];
2036 300 : rusi->sgrproj = rsc->rusi_pic[rest_unit_idx].sgrproj;
2037 :
2038 300 : const int64_t bits_none = x->sgrproj_restore_cost[0];
2039 600 : const int64_t bits_sgr = x->sgrproj_restore_cost[1] +
2040 300 : (count_sgrproj_bits(&rusi->sgrproj, &rsc->sgrproj)
2041 300 : << AV1_PROB_COST_SHIFT);
2042 :
2043 300 : double cost_none =
2044 300 : RDCOST_DBL(x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE]);
2045 300 : double cost_sgr =
2046 300 : RDCOST_DBL(x->rdmult, bits_sgr >> 4, rusi->sse[RESTORE_SGRPROJ]);
2047 :
2048 300 : RestorationType rtype =
2049 300 : (cost_sgr < cost_none) ? RESTORE_SGRPROJ : RESTORE_NONE;
2050 300 : rusi->best_rtype[RESTORE_SGRPROJ - 1] = rtype;
2051 :
2052 300 : rsc->sse += rusi->sse[rtype];
2053 300 : rsc->bits += (cost_sgr < cost_none) ? bits_sgr : bits_none;
2054 300 : if (cost_sgr < cost_none) rsc->sgrproj = rusi->sgrproj;
2055 300 : }
2056 :
2057 300 : static void search_wiener_seg(const RestorationTileLimits *limits,
2058 : const AV1PixelRect *tile_rect, int32_t rest_unit_idx,
2059 : void *priv) {
2060 300 : RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
2061 300 : RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
2062 300 : const Av1Common *const cm = rsc->cm;
2063 300 : int32_t wn_luma = cm->wn_filter_mode == 1 ? WIENER_WIN_3TAP : cm->wn_filter_mode == 2 ? WIENER_WIN_CHROMA : WIENER_WIN;
2064 600 : const int32_t wiener_win = cm->wn_filter_mode == 1 ? WIENER_WIN_3TAP :
2065 300 : (rsc->plane == AOM_PLANE_Y) ? wn_luma : WIENER_WIN_CHROMA;
2066 : EB_ALIGN(32) int64_t M[WIENER_WIN2];
2067 : EB_ALIGN(32) int64_t H[WIENER_WIN2 * WIENER_WIN2];
2068 : int32_t vfilterd[WIENER_WIN], hfilterd[WIENER_WIN];
2069 :
2070 300 : if (cm->use_highbitdepth)
2071 : {
2072 0 : if (rsc->plane == AOM_PLANE_Y) {
2073 0 : eb_av1_compute_stats_highbd(wiener_win, rsc->dgd_buffer, rsc->src_buffer,
2074 : limits->h_start, limits->h_end, limits->v_start,
2075 : limits->v_end, rsc->dgd_stride, rsc->src_stride, M,
2076 0 : H, (AomBitDepth)cm->bit_depth);
2077 : }
2078 : else {
2079 0 : eb_av1_compute_stats_highbd(wiener_win, rsc->dgd_buffer, rsc->src_buffer,
2080 : limits->h_start, limits->h_end, limits->v_start,
2081 : limits->v_end, rsc->dgd_stride, rsc->src_stride, M,
2082 0 : H, (AomBitDepth)cm->bit_depth);
2083 : }
2084 : }
2085 : else {
2086 300 : eb_av1_compute_stats(wiener_win, rsc->dgd_buffer, rsc->src_buffer, limits->h_start,
2087 : limits->h_end, limits->v_start, limits->v_end,
2088 : rsc->dgd_stride, rsc->src_stride, M, H);
2089 : }
2090 :
2091 300 : if (!wiener_decompose_sep_sym(wiener_win, M, H, vfilterd, hfilterd)) {
2092 0 : printf("CHKN never get here\n");
2093 0 : rusi->best_rtype[RESTORE_WIENER - 1] = RESTORE_NONE;
2094 0 : rusi->sse[RESTORE_WIENER] = INT64_MAX;
2095 5 : return;
2096 : }
2097 :
2098 : RestorationUnitInfo rui;
2099 300 : memset(&rui, 0, sizeof(rui));
2100 300 : rui.restoration_type = RESTORE_WIENER;
2101 300 : finalize_sym_filter(wiener_win, vfilterd, rui.wiener_info.vfilter);
2102 300 : finalize_sym_filter(wiener_win, hfilterd, rui.wiener_info.hfilter);
2103 :
2104 : // Filter score computes the value of the function x'*A*x - x'*b for the
2105 : // learned filter and compares it against identity filer. If there is no
2106 : // reduction in the function, the filter is reverted back to identity
2107 300 : if (compute_score(wiener_win, M, H, rui.wiener_info.vfilter,
2108 : rui.wiener_info.hfilter) > 0) {
2109 5 : rusi->sse[RESTORE_WIENER] = INT64_MAX;
2110 5 : return;
2111 : }
2112 :
2113 295 : aom_clear_system_state();
2114 :
2115 295 : rusi->sse[RESTORE_WIENER] =
2116 295 : finer_tile_search_wiener_seg(rsc, limits, tile_rect, &rui, wiener_win);
2117 295 : rusi->wiener = rui.wiener_info;
2118 :
2119 : if (wiener_win != WIENER_WIN) {
2120 : assert(rui.wiener_info.vfilter[0] == 0 &&
2121 : rui.wiener_info.vfilter[WIENER_WIN - 1] == 0);
2122 : assert(rui.wiener_info.hfilter[0] == 0 &&
2123 : rui.wiener_info.hfilter[WIENER_WIN - 1] == 0);
2124 : }
2125 : }
2126 300 : static void search_wiener_finish(const RestorationTileLimits *limits,
2127 : const AV1PixelRect *tile_rect, int32_t rest_unit_idx,
2128 : void *priv) {
2129 : (void)limits;
2130 : (void)tile_rect;
2131 300 : RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
2132 300 : RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
2133 300 : const Av1Common *const cm = rsc->cm;
2134 300 : int32_t wn_luma = cm->wn_filter_mode == 1 ? WIENER_WIN_3TAP : cm->wn_filter_mode == 2 ? WIENER_WIN_CHROMA : WIENER_WIN;
2135 600 : const int32_t wiener_win = cm->wn_filter_mode == 1 ? WIENER_WIN_3TAP :
2136 300 : (rsc->plane == AOM_PLANE_Y) ? wn_luma : WIENER_WIN_CHROMA;
2137 :
2138 300 : const Macroblock *const x = rsc->x;
2139 300 : const int64_t bits_none = x->wiener_restore_cost[0];
2140 :
2141 : RestorationUnitInfo rui;
2142 300 : memset(&rui, 0, sizeof(rui));
2143 300 : rui.restoration_type = RESTORE_WIENER;
2144 :
2145 : // Filter score computes the value of the function x'*A*x - x'*b for the
2146 : // learned filter and compares it against identity filer. If there is no
2147 : // reduction in the function, the filter is reverted back to identity
2148 :
2149 300 : rusi->sse[RESTORE_WIENER] = rsc->rusi_pic[rest_unit_idx].sse[RESTORE_WIENER];
2150 300 : if (rusi->sse[RESTORE_WIENER] == INT64_MAX)
2151 : {
2152 5 : rsc->bits += bits_none;
2153 5 : rsc->sse += rusi->sse[RESTORE_NONE];
2154 5 : rusi->best_rtype[RESTORE_WIENER - 1] = RESTORE_NONE;
2155 5 : rusi->sse[RESTORE_WIENER] = INT64_MAX;
2156 5 : return;
2157 : }
2158 :
2159 295 : aom_clear_system_state();
2160 :
2161 295 : rusi->wiener = rsc->rusi_pic[rest_unit_idx].wiener;
2162 :
2163 295 : const int64_t bits_wiener =
2164 590 : x->wiener_restore_cost[1] +
2165 295 : (count_wiener_bits(wiener_win, &rusi->wiener, &rsc->wiener)
2166 295 : << AV1_PROB_COST_SHIFT);
2167 :
2168 295 : double cost_none =
2169 295 : RDCOST_DBL(x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE]);
2170 295 : double cost_wiener =
2171 295 : RDCOST_DBL(x->rdmult, bits_wiener >> 4, rusi->sse[RESTORE_WIENER]);
2172 :
2173 295 : RestorationType rtype =
2174 295 : (cost_wiener < cost_none) ? RESTORE_WIENER : RESTORE_NONE;
2175 295 : rusi->best_rtype[RESTORE_WIENER - 1] = rtype;
2176 :
2177 295 : rsc->sse += rusi->sse[rtype];
2178 295 : rsc->bits += (cost_wiener < cost_none) ? bits_wiener : bits_none;
2179 295 : if (cost_wiener < cost_none) rsc->wiener = rusi->wiener;
2180 : }
2181 300 : static void search_norestore_seg(const RestorationTileLimits *limits,
2182 : const AV1PixelRect *tile_rect, int32_t rest_unit_idx,
2183 : void *priv) {
2184 : (void)tile_rect;
2185 :
2186 300 : RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
2187 300 : RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
2188 :
2189 300 : const int32_t highbd = rsc->cm->use_highbitdepth;
2190 600 : rusi->sse[RESTORE_NONE] = sse_restoration_unit(
2191 300 : limits, rsc->src, rsc->cm->frame_to_show, rsc->plane, highbd);
2192 300 : }
2193 300 : static void search_norestore_finish(const RestorationTileLimits *limits,
2194 : const AV1PixelRect *tile_rect, int32_t rest_unit_idx,
2195 : void *priv) {
2196 : (void)tile_rect;
2197 : (void)limits;
2198 :
2199 300 : RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
2200 300 : RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
2201 :
2202 300 : rusi->sse[RESTORE_NONE] = rsc->rusi_pic[rest_unit_idx].sse[RESTORE_NONE];
2203 :
2204 300 : rsc->sse += rusi->sse[RESTORE_NONE];
2205 300 : }
2206 600 : static double search_rest_type_finish(RestSearchCtxt *rsc, RestorationType rtype)
2207 : {
2208 : static const RestUnitVisitor funs[RESTORE_TYPES] = {
2209 : search_norestore_finish, search_wiener_finish, search_sgrproj_finish, search_switchable
2210 : };
2211 :
2212 600 : reset_rsc(rsc);
2213 :
2214 600 : av1_foreach_rest_unit_in_frame(rsc->cm, rsc->plane, rsc_on_tile, funs[rtype], rsc);
2215 :
2216 600 : return RDCOST_DBL(rsc->x->rdmult, rsc->bits >> 4, rsc->sse);
2217 : }
2218 :
2219 60 : void restoration_seg_search(
2220 : RestContext *context_ptr,
2221 : Yv12BufferConfig *org_fts,
2222 : const Yv12BufferConfig *src,
2223 : Yv12BufferConfig *trial_frame_rst ,
2224 : PictureControlSet *pcs_ptr,
2225 : uint32_t segment_index )
2226 : {
2227 60 : Av1Common *const cm = pcs_ptr->parent_pcs_ptr->av1_cm;
2228 60 : Macroblock *x = pcs_ptr->parent_pcs_ptr->av1x;
2229 60 : const int32_t num_planes = 3;
2230 :
2231 : // If the restoration unit dimensions are not multiples of
2232 : // rsi->restoration_unit_size then some elements of the rusi array may be
2233 : // left uninitialised when we reach copy_unit_info(...). This is not a
2234 : // problem, as these elements are ignored later, but in order to quiet
2235 : // Valgrind's warnings we initialise the array to zero.
2236 :
2237 : RestSearchCtxt rsc; //this context is specific for this segment
2238 60 : RestSearchCtxt* rsc_p = &rsc;
2239 :
2240 60 : const int32_t plane_start = AOM_PLANE_Y;
2241 60 : const int32_t plane_end = num_planes > 1 ? AOM_PLANE_V : AOM_PLANE_Y;
2242 240 : for (int32_t plane = plane_start; plane <= plane_end; ++plane)
2243 : {
2244 180 : RestUnitSearchInfo *rusi = pcs_ptr->parent_pcs_ptr->rusi_picture[plane];
2245 :
2246 180 : init_rsc_seg(org_fts,src, cm, x, plane, rusi, trial_frame_rst, &rsc);
2247 :
2248 180 : rsc_p->tmpbuf = context_ptr->rst_tmpbuf;
2249 :
2250 180 : const int32_t highbd = rsc.cm->use_highbitdepth;
2251 180 : eb_extend_frame(rsc.dgd_buffer, rsc.plane_width, rsc.plane_height,
2252 : rsc.dgd_stride, RESTORATION_BORDER, RESTORATION_BORDER,
2253 : highbd);
2254 :
2255 180 : av1_foreach_rest_unit_in_frame_seg(rsc_p->cm, rsc_p->plane, rsc_on_tile, search_norestore_seg, rsc_p, pcs_ptr, segment_index);
2256 180 : if (cm->wn_filter_mode)
2257 180 : av1_foreach_rest_unit_in_frame_seg(rsc_p->cm, rsc_p->plane, rsc_on_tile, search_wiener_seg, rsc_p, pcs_ptr, segment_index);
2258 180 : av1_foreach_rest_unit_in_frame_seg(rsc_p->cm, rsc_p->plane, rsc_on_tile, search_sgrproj_seg, rsc_p, pcs_ptr, segment_index);
2259 : }
2260 60 : }
2261 60 : void rest_finish_search(Macroblock *x, Av1Common *const cm)
2262 : {
2263 60 : const int32_t num_planes = 3;
2264 60 : RestorationType force_restore_type_d = (cm->wn_filter_mode) ? RESTORE_TYPES : RESTORE_SGRPROJ;
2265 : int32_t ntiles[2];
2266 180 : for (int32_t is_uv = 0; is_uv < 2; ++is_uv)
2267 120 : ntiles[is_uv] = rest_tiles_in_plane(cm, is_uv);
2268 :
2269 : assert(ntiles[1] <= ntiles[0]);
2270 : RestUnitSearchInfo *rusi =
2271 60 : (RestUnitSearchInfo *)eb_aom_memalign(16, sizeof(*rusi) * ntiles[0]);
2272 :
2273 : // If the restoration unit dimensions are not multiples of
2274 : // rsi->restoration_unit_size then some elements of the rusi array may be
2275 : // left uninitialised when we reach copy_unit_info(...). This is not a
2276 : // problem, as these elements are ignored later, but in order to quiet
2277 : // Valgrind's warnings we initialise the array below.
2278 60 : memset(rusi, 0, sizeof(*rusi) * ntiles[0]);
2279 :
2280 : RestSearchCtxt rsc;
2281 60 : const int32_t plane_start = AOM_PLANE_Y;
2282 60 : const int32_t plane_end = num_planes > 1 ? AOM_PLANE_V : AOM_PLANE_Y;
2283 240 : for (int32_t plane = plane_start; plane <= plane_end; ++plane) {
2284 : //init rsc context for this plane
2285 180 : rsc.cm = cm;
2286 180 : rsc.x = x;
2287 180 : rsc.plane = plane;
2288 180 : rsc.rusi = rusi;
2289 180 : rsc.pic_num = (uint32_t)cm->p_pcs_ptr->picture_number;
2290 180 : rsc.rusi_pic = cm->p_pcs_ptr->rusi_picture[plane];
2291 :
2292 180 : const int32_t plane_ntiles = ntiles[plane > 0];
2293 180 : const RestorationType num_rtypes =
2294 180 : (plane_ntiles > 1) ? RESTORE_TYPES : RESTORE_SWITCHABLE_TYPES;
2295 :
2296 180 : double best_cost = 0;
2297 180 : RestorationType best_rtype = RESTORE_NONE;
2298 :
2299 780 : for (int32_t restType = 0; restType < num_rtypes; ++restType) {
2300 600 : RestorationType r = (RestorationType)restType;
2301 :
2302 600 : if ((force_restore_type_d != RESTORE_TYPES) && (r != RESTORE_NONE) &&
2303 : (r != force_restore_type_d))
2304 0 : continue;
2305 :
2306 600 : double cost = search_rest_type_finish(&rsc, r);
2307 :
2308 600 : if (r == 0 || cost < best_cost)
2309 : {
2310 294 : best_cost = cost;
2311 294 : best_rtype = r;
2312 : }
2313 : }
2314 :
2315 180 : cm->rst_info[plane].frame_restoration_type = best_rtype;
2316 : if (force_restore_type_d != RESTORE_TYPES)
2317 : assert(best_rtype == force_restore_type_d || best_rtype == RESTORE_NONE);
2318 :
2319 180 : if (best_rtype != RESTORE_NONE) {
2320 264 : for (int32_t u = 0; u < plane_ntiles; ++u)
2321 192 : copy_unit_info(best_rtype, &rusi[u], &cm->rst_info[plane].unit_info[u]);
2322 : }
2323 : }
2324 :
2325 60 : eb_aom_free(rusi);
2326 60 : }
|