Line data Source code
1 : /*
2 : * Copyright (c) 2017, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include <math.h>
13 : #include <stdio.h>
14 : #include <stdlib.h>
15 : #include "noise_model.h"
16 : #include "noise_util.h"
17 : #include "mathutils.h"
18 :
19 : #define kLowPolyNumParams 3
20 :
21 : static const int32_t kMaxLag = 4;
22 :
23 : void *eb_aom_memalign(size_t align, size_t size);
24 : void eb_aom_free(void *memblk);
25 :
26 : void un_pack2d(
27 : uint16_t *in16_bit_buffer,
28 : uint32_t in_stride,
29 : uint8_t *out8_bit_buffer,
30 : uint32_t out8_stride,
31 : uint8_t *outn_bit_buffer,
32 : uint32_t outn_stride,
33 : uint32_t width,
34 : uint32_t height);
35 :
36 : void pack2d_src(
37 : uint8_t *in8_bit_buffer,
38 : uint32_t in8_stride,
39 : uint8_t *inn_bit_buffer,
40 : uint32_t inn_stride,
41 : uint16_t *out16_bit_buffer,
42 : uint32_t out_stride,
43 : uint32_t width,
44 : uint32_t height);
45 :
46 : // Defines a function that can be used to obtain the mean of a block for the
47 : // provided data type (uint8_t, or uint16_t)
48 : #define GET_BLOCK_MEAN(INT_TYPE, suffix) \
49 : static double get_block_mean_##suffix(const INT_TYPE *data, int32_t w, int32_t h, \
50 : int32_t stride, int32_t x_o, int32_t y_o, \
51 : int32_t block_size) { \
52 : const int32_t max_h = AOMMIN(h - y_o, block_size); \
53 : const int32_t max_w = AOMMIN(w - x_o, block_size); \
54 : double block_mean = 0; \
55 : for (int32_t y = 0; y < max_h; ++y) { \
56 : for (int32_t x = 0; x < max_w; ++x) { \
57 : block_mean += data[(y_o + y) * stride + x_o + x]; \
58 : } \
59 : } \
60 : return block_mean / (max_w * max_h); \
61 : }
62 :
63 0 : GET_BLOCK_MEAN(uint8_t, lowbd);
64 0 : GET_BLOCK_MEAN(uint16_t, highbd);
65 :
66 0 : static INLINE double get_block_mean(const uint8_t *data, int32_t w, int32_t h,
67 : int32_t stride, int32_t x_o, int32_t y_o,
68 : int32_t block_size, int32_t use_highbd) {
69 0 : if (use_highbd)
70 0 : return get_block_mean_highbd((const uint16_t *)data, w, h, stride, x_o, y_o,
71 : block_size);
72 0 : return get_block_mean_lowbd(data, w, h, stride, x_o, y_o, block_size);
73 : }
74 :
75 0 : static INLINE double fclamp(double value, double low, double high) {
76 0 : return value < low ? low : (value > high ? high : value);
77 : }
78 :
79 : // Defines a function that can be used to obtain the variance of a block
80 : // for the provided data type (uint8_t, or uint16_t)
81 : #define GET_NOISE_VAR(INT_TYPE, suffix) \
82 : static double get_noise_var_##suffix( \
83 : const INT_TYPE *data, const INT_TYPE *denoised, int32_t stride, int32_t w, \
84 : int32_t h, int32_t x_o, int32_t y_o, int32_t block_size_x, int32_t block_size_y) { \
85 : const int32_t max_h = AOMMIN(h - y_o, block_size_y); \
86 : const int32_t max_w = AOMMIN(w - x_o, block_size_x); \
87 : double noise_var = 0; \
88 : double noise_mean = 0; \
89 : for (int32_t y = 0; y < max_h; ++y) { \
90 : for (int32_t x = 0; x < max_w; ++x) { \
91 : double noise = (double)data[(y_o + y) * stride + x_o + x] - \
92 : denoised[(y_o + y) * stride + x_o + x]; \
93 : noise_mean += noise; \
94 : noise_var += noise * noise; \
95 : } \
96 : } \
97 : noise_mean /= (max_w * max_h); \
98 : return noise_var / (max_w * max_h) - noise_mean * noise_mean; \
99 : }
100 :
101 0 : GET_NOISE_VAR(uint8_t, lowbd);
102 0 : GET_NOISE_VAR(uint16_t, highbd);
103 :
104 0 : static INLINE double get_noise_var(const uint8_t *data, const uint8_t *denoised,
105 : int32_t w, int32_t h, int32_t stride, int32_t x_o, int32_t y_o,
106 : int32_t block_size_x, int32_t block_size_y,
107 : int32_t use_highbd) {
108 0 : if (use_highbd)
109 0 : return get_noise_var_highbd((const uint16_t *)data,
110 : (const uint16_t *)denoised, w, h, stride, x_o,
111 : y_o, block_size_x, block_size_y);
112 0 : return get_noise_var_lowbd(data, denoised, w, h, stride, x_o, y_o,
113 : block_size_x, block_size_y);
114 : }
115 :
116 0 : static void equation_system_clear(aom_equation_system_t *eqns) {
117 0 : const int32_t n = eqns->n;
118 0 : memset(eqns->A, 0, sizeof(*eqns->A) * n * n);
119 0 : memset(eqns->x, 0, sizeof(*eqns->x) * n);
120 0 : memset(eqns->b, 0, sizeof(*eqns->b) * n);
121 0 : }
122 :
123 0 : static void equation_system_copy(aom_equation_system_t *dst,
124 : const aom_equation_system_t *src) {
125 0 : const int32_t n = dst->n;
126 0 : memcpy(dst->A, src->A, sizeof(*dst->A) * n * n);
127 0 : memcpy(dst->x, src->x, sizeof(*dst->x) * n);
128 0 : memcpy(dst->b, src->b, sizeof(*dst->b) * n);
129 0 : }
130 :
131 0 : static int32_t equation_system_init(aom_equation_system_t *eqns, int32_t n) {
132 0 : eqns->A = (double *)malloc(sizeof(*eqns->A) * n * n);
133 0 : eqns->b = (double *)malloc(sizeof(*eqns->b) * n);
134 0 : eqns->x = (double *)malloc(sizeof(*eqns->x) * n);
135 0 : eqns->n = n;
136 0 : if (!eqns->A || !eqns->b || !eqns->x) {
137 0 : fprintf(stderr, "Failed to allocate system of equations of size %d\n", n);
138 0 : free(eqns->A);
139 0 : free(eqns->b);
140 0 : free(eqns->x);
141 0 : memset(eqns, 0, sizeof(*eqns));
142 0 : return 0;
143 : }
144 0 : equation_system_clear(eqns);
145 0 : return 1;
146 : }
147 :
148 0 : static int32_t equation_system_solve(aom_equation_system_t *eqns) {
149 0 : const int32_t n = eqns->n;
150 0 : double *b = (double *)malloc(sizeof(*b) * n);
151 0 : double *A = (double *)malloc(sizeof(*A) * n * n);
152 0 : int32_t ret = 0;
153 0 : if (A == NULL || b == NULL) {
154 0 : fprintf(stderr, "Unable to allocate temp values of size %dx%d\n", n, n);
155 0 : free(b);
156 0 : free(A);
157 0 : return 0;
158 : }
159 0 : memcpy(A, eqns->A, sizeof(*eqns->A) * n * n);
160 0 : memcpy(b, eqns->b, sizeof(*eqns->b) * n);
161 0 : ret = linsolve(n, A, eqns->n, b, eqns->x);
162 0 : free(b);
163 0 : free(A);
164 :
165 0 : if (ret == 0)
166 0 : return 0;
167 0 : return 1;
168 : }
169 : /*
170 : static void equation_system_add(aom_equation_system_t *dest,
171 : aom_equation_system_t *src) {
172 : const int32_t n = dest->n;
173 : int32_t i, j;
174 : for (i = 0; i < n; ++i) {
175 : for (j = 0; j < n; ++j)
176 : dest->A[i * n + j] += src->A[i * n + j];
177 : dest->b[i] += src->b[i];
178 : }
179 : }
180 : */
181 0 : static void equation_system_free(aom_equation_system_t *eqns) {
182 0 : if (!eqns) return;
183 0 : free(eqns->A);
184 0 : free(eqns->b);
185 0 : free(eqns->x);
186 0 : memset(eqns, 0, sizeof(*eqns));
187 : }
188 :
189 0 : static void noise_strength_solver_clear(aom_noise_strength_solver_t *solver) {
190 0 : equation_system_clear(&solver->eqns);
191 0 : solver->num_equations = 0;
192 0 : solver->total = 0;
193 0 : }
194 :
195 : /*
196 : static void noise_strength_solver_add(aom_noise_strength_solver_t *dest,
197 : aom_noise_strength_solver_t *src) {
198 : equation_system_add(&dest->eqns, &src->eqns);
199 : dest->num_equations += src->num_equations;
200 : dest->total += src->total;
201 : }
202 : */
203 :
204 0 : static void noise_strength_solver_copy(aom_noise_strength_solver_t *dest,
205 : aom_noise_strength_solver_t *src) {
206 0 : equation_system_copy(&dest->eqns, &src->eqns);
207 0 : dest->num_equations = src->num_equations;
208 0 : dest->total = src->total;
209 0 : }
210 :
211 : // Return the number of coefficients required for the given parameters
212 0 : static int32_t num_coeffs(const aom_noise_model_params_t params) {
213 0 : const int32_t n = 2 * params.lag + 1;
214 0 : switch (params.shape) {
215 0 : case AOM_NOISE_SHAPE_DIAMOND: return params.lag * (params.lag + 1);
216 0 : case AOM_NOISE_SHAPE_SQUARE: return (n * n) / 2;
217 : }
218 0 : return 0;
219 : }
220 :
221 0 : static int32_t noise_state_init(aom_noise_state_t *state, int32_t n, int32_t bit_depth) {
222 0 : const int32_t kNumBins = 20;
223 0 : if (!equation_system_init(&state->eqns, n)) {
224 0 : fprintf(stderr, "Failed initialization noise state with size %d\n", n);
225 0 : return 0;
226 : }
227 0 : state->ar_gain = 1.0;
228 0 : state->num_observations = 0;
229 0 : return eb_aom_noise_strength_solver_init(&state->strength_solver, kNumBins,
230 : bit_depth);
231 : }
232 :
233 0 : static void set_chroma_coefficient_fallback_soln(aom_equation_system_t *eqns) {
234 0 : const double kTolerance = 1e-6;
235 0 : const int32_t last = eqns->n - 1;
236 : // Set all of the AR coefficients to zero, but try to solve for correlation
237 : // with the luma channel
238 0 : memset(eqns->x, 0, sizeof(*eqns->x) * eqns->n);
239 0 : if (fabs(eqns->A[last * eqns->n + last]) > kTolerance)
240 0 : eqns->x[last] = eqns->b[last] / eqns->A[last * eqns->n + last];
241 0 : }
242 :
243 0 : int32_t eb_aom_noise_strength_lut_init(aom_noise_strength_lut_t *lut, int32_t num_points) {
244 0 : if (!lut) return 0;
245 0 : lut->points = (double(*)[2])malloc(num_points * sizeof(*lut->points));
246 0 : if (!lut->points) return 0;
247 0 : lut->num_points = num_points;
248 0 : memset(lut->points, 0, sizeof(*lut->points) * num_points);
249 0 : return 1;
250 : }
251 :
252 0 : void eb_aom_noise_strength_lut_free(aom_noise_strength_lut_t *lut) {
253 0 : if (!lut) return;
254 0 : free(lut->points);
255 0 : memset(lut, 0, sizeof(*lut));
256 : }
257 :
258 0 : double eb_aom_noise_strength_lut_eval(const aom_noise_strength_lut_t *lut,
259 : double x) {
260 0 : int32_t i = 0;
261 : // Constant extrapolation for x < x_0.
262 0 : if (x < lut->points[0][0]) return lut->points[0][1];
263 0 : for (i = 0; i < lut->num_points - 1; ++i) {
264 0 : if (x >= lut->points[i][0] && x <= lut->points[i + 1][0]) {
265 0 : const double a =
266 0 : (x - lut->points[i][0]) / (lut->points[i + 1][0] - lut->points[i][0]);
267 0 : return lut->points[i + 1][1] * a + lut->points[i][1] * (1.0 - a);
268 : }
269 : }
270 : // Constant extrapolation for x > x_{n-1}
271 0 : return lut->points[lut->num_points - 1][1];
272 : }
273 :
274 0 : static double noise_strength_solver_get_bin_index(
275 : const aom_noise_strength_solver_t *solver, double value) {
276 : const double val =
277 0 : fclamp(value, solver->min_intensity, solver->max_intensity);
278 0 : const double range = solver->max_intensity - solver->min_intensity;
279 0 : return (solver->num_bins - 1) * (val - solver->min_intensity) / range;
280 : }
281 :
282 0 : static double noise_strength_solver_get_value(
283 : const aom_noise_strength_solver_t *solver, double x) {
284 0 : const double bin = noise_strength_solver_get_bin_index(solver, x);
285 0 : const int32_t bin_i0 = (int32_t)floor(bin);
286 0 : const int32_t bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
287 0 : const double a = bin - bin_i0;
288 0 : return (1.0 - a) * solver->eqns.x[bin_i0] + a * solver->eqns.x[bin_i1];
289 : }
290 :
291 0 : void eb_aom_noise_strength_solver_add_measurement(
292 : aom_noise_strength_solver_t *solver, double block_mean, double noise_std) {
293 0 : const double bin = noise_strength_solver_get_bin_index(solver, block_mean);
294 0 : const int32_t bin_i0 = (int32_t)floor(bin);
295 0 : const int32_t bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
296 0 : const double a = bin - bin_i0;
297 0 : const int32_t n = solver->num_bins;
298 0 : solver->eqns.A[bin_i0 * n + bin_i0] += (1.0 - a) * (1.0 - a);
299 0 : solver->eqns.A[bin_i1 * n + bin_i0] += a * (1.0 - a);
300 0 : solver->eqns.A[bin_i1 * n + bin_i1] += a * a;
301 0 : solver->eqns.A[bin_i0 * n + bin_i1] += a * (1.0 - a);
302 0 : solver->eqns.b[bin_i0] += (1.0 - a) * noise_std;
303 0 : solver->eqns.b[bin_i1] += a * noise_std;
304 0 : solver->total += noise_std;
305 0 : solver->num_equations++;
306 0 : }
307 :
308 0 : int32_t eb_aom_noise_strength_solver_solve(aom_noise_strength_solver_t *solver) {
309 : // Add regularization proportional to the number of constraints
310 0 : const int32_t n = solver->num_bins;
311 0 : const double kAlpha = 2.0 * (double)(solver->num_equations) / n;
312 0 : int32_t result = 0;
313 0 : double mean = 0;
314 :
315 : // Do this in a non-destructive manner so it is not confusing to the caller
316 0 : double *old_A = solver->eqns.A;
317 0 : double *A = (double *)malloc(sizeof(*A) * n * n);
318 0 : if (!A) {
319 0 : fprintf(stderr, "Unable to allocate copy of A\n");
320 0 : return 0;
321 : }
322 0 : memcpy(A, old_A, sizeof(*A) * n * n);
323 :
324 0 : for (int32_t i = 0; i < n; ++i) {
325 0 : const int32_t i_lo = AOMMAX(0, i - 1);
326 0 : const int32_t i_hi = AOMMIN(n - 1, i + 1);
327 0 : A[i * n + i_lo] -= kAlpha;
328 0 : A[i * n + i] += 2 * kAlpha;
329 0 : A[i * n + i_hi] -= kAlpha;
330 : }
331 :
332 : // Small regularization to give average noise strength
333 0 : mean = solver->total / solver->num_equations;
334 0 : for (int32_t i = 0; i < n; ++i) {
335 0 : A[i * n + i] += 1.0 / 8192.;
336 0 : solver->eqns.b[i] += mean / 8192.;
337 : }
338 0 : solver->eqns.A = A;
339 0 : result = equation_system_solve(&solver->eqns);
340 0 : solver->eqns.A = old_A;
341 :
342 0 : free(A);
343 0 : return result;
344 : }
345 :
346 0 : int32_t eb_aom_noise_strength_solver_init(aom_noise_strength_solver_t *solver,
347 : int32_t num_bins, int32_t bit_depth) {
348 0 : if (!solver) return 0;
349 0 : memset(solver, 0, sizeof(*solver));
350 0 : solver->num_bins = num_bins;
351 0 : solver->min_intensity = 0;
352 0 : solver->max_intensity = (1 << bit_depth) - 1;
353 0 : solver->total = 0;
354 0 : solver->num_equations = 0;
355 0 : return equation_system_init(&solver->eqns, num_bins);
356 : }
357 :
358 0 : double eb_aom_noise_strength_solver_get_center(
359 : const aom_noise_strength_solver_t *solver, int32_t i) {
360 0 : const double range = solver->max_intensity - solver->min_intensity;
361 0 : const int32_t n = solver->num_bins;
362 0 : return ((double)i) / (n - 1) * range + solver->min_intensity;
363 : }
364 :
365 : // Computes the residual if a point were to be removed from the lut. This is
366 : // calculated as the area between the output of the solver and the line segment
367 : // that would be formed between [x_{i - 1}, x_{i + 1}).
368 0 : static void update_piecewise_linear_residual(
369 : const aom_noise_strength_solver_t *solver,
370 : const aom_noise_strength_lut_t *lut, double *residual, int32_t start, int32_t end) {
371 0 : const double dx = 255. / solver->num_bins;
372 0 : for (int32_t i = AOMMAX(start, 1); i < AOMMIN(end, lut->num_points - 1); ++i) {
373 0 : const int32_t lower = AOMMAX(0, (int32_t)floor(noise_strength_solver_get_bin_index(
374 : solver, lut->points[i - 1][0])));
375 0 : const int32_t upper = AOMMIN(solver->num_bins - 1,
376 : (int32_t)ceil(noise_strength_solver_get_bin_index(
377 : solver, lut->points[i + 1][0])));
378 0 : double r = 0;
379 0 : for (int32_t j = lower; j <= upper; ++j) {
380 0 : const double x = eb_aom_noise_strength_solver_get_center(solver, j);
381 0 : if (x < lut->points[i - 1][0]) continue;
382 0 : if (x >= lut->points[i + 1][0]) continue;
383 0 : const double y = solver->eqns.x[j];
384 0 : const double a = (x - lut->points[i - 1][0]) /
385 0 : (lut->points[i + 1][0] - lut->points[i - 1][0]);
386 0 : const double estimate_y =
387 0 : lut->points[i - 1][1] * (1.0 - a) + lut->points[i + 1][1] * a;
388 0 : r += fabs(y - estimate_y);
389 : }
390 0 : residual[i] = r * dx;
391 : }
392 0 : }
393 :
394 0 : int32_t eb_aom_noise_strength_solver_fit_piecewise(
395 : const aom_noise_strength_solver_t *solver, int32_t max_output_points,
396 : aom_noise_strength_lut_t *lut) {
397 : // The tolerance is normalized to be give consistent results between
398 : // different bit-depths.
399 0 : const double kTolerance = solver->max_intensity * 0.00625 / 255.0;
400 0 : if (!eb_aom_noise_strength_lut_init(lut, solver->num_bins)) {
401 0 : fprintf(stderr, "Failed to init lut\n");
402 0 : return 0;
403 : }
404 0 : for (int32_t i = 0; i < solver->num_bins; ++i) {
405 0 : lut->points[i][0] = eb_aom_noise_strength_solver_get_center(solver, i);
406 0 : lut->points[i][1] = solver->eqns.x[i];
407 : }
408 0 : if (max_output_points < 0)
409 0 : max_output_points = solver->num_bins;
410 0 : double *residual = malloc(solver->num_bins * sizeof(*residual));
411 : ASSERT(residual != NULL);
412 0 : memset(residual, 0, sizeof(*residual) * solver->num_bins);
413 :
414 0 : update_piecewise_linear_residual(solver, lut, residual, 0, solver->num_bins);
415 :
416 : // Greedily remove points if there are too many or if it doesn't hurt local
417 : // approximation (never remove the end points)
418 0 : while (lut->num_points > 2) {
419 0 : int32_t min_index = 1;
420 0 : for (int32_t j = 1; j < lut->num_points - 1; ++j) {
421 0 : if (residual[j] < residual[min_index])
422 0 : min_index = j;
423 : }
424 0 : const double dx =
425 0 : lut->points[min_index + 1][0] - lut->points[min_index - 1][0];
426 0 : const double avg_residual = residual[min_index] / dx;
427 0 : if (lut->num_points <= max_output_points && avg_residual > kTolerance)
428 0 : break;
429 0 : const int32_t num_remaining = lut->num_points - min_index - 1;
430 0 : memmove(lut->points + min_index, lut->points + min_index + 1,
431 : sizeof(lut->points[0]) * num_remaining);
432 0 : lut->num_points--;
433 :
434 0 : update_piecewise_linear_residual(solver, lut, residual, min_index - 1,
435 : min_index + 1);
436 : }
437 0 : free(residual);
438 0 : return 1;
439 : }
440 :
441 0 : int32_t eb_aom_flat_block_finder_init(aom_flat_block_finder_t *block_finder,
442 : int32_t block_size, int32_t bit_depth, int32_t use_highbd) {
443 0 : const int32_t n = block_size * block_size;
444 : aom_equation_system_t eqns;
445 0 : double *AtA_inv = 0;
446 0 : double *A = 0;
447 0 : int32_t x = 0, y = 0, i = 0, j = 0;
448 0 : if (!equation_system_init(&eqns, kLowPolyNumParams)) {
449 0 : fprintf(stderr, "Failed to init equation system for block_size=%d\n",
450 : block_size);
451 0 : return 0;
452 : }
453 :
454 0 : AtA_inv = (double *)malloc(kLowPolyNumParams * kLowPolyNumParams *
455 : sizeof(*AtA_inv));
456 0 : A = (double *)malloc(kLowPolyNumParams * n * sizeof(*A));
457 0 : if (AtA_inv == NULL || A == NULL) {
458 0 : fprintf(stderr, "Failed to alloc A or AtA_inv for block_size=%d\n",
459 : block_size);
460 0 : free(AtA_inv);
461 0 : free(A);
462 0 : equation_system_free(&eqns);
463 0 : return 0;
464 : }
465 :
466 0 : block_finder->A = A;
467 0 : block_finder->AtA_inv = AtA_inv;
468 0 : block_finder->block_size = block_size;
469 0 : block_finder->normalization = (1 << bit_depth) - 1;
470 0 : block_finder->use_highbd = use_highbd;
471 :
472 0 : for (y = 0; y < block_size; ++y) {
473 0 : const double yd = ((double)y - block_size / 2.) / (block_size / 2.);
474 0 : for (x = 0; x < block_size; ++x) {
475 0 : const double xd = ((double)x - block_size / 2.) / (block_size / 2.);
476 0 : const double coords[3] = { yd, xd, 1 };
477 0 : const int32_t row = y * block_size + x;
478 0 : A[kLowPolyNumParams * row + 0] = yd;
479 0 : A[kLowPolyNumParams * row + 1] = xd;
480 0 : A[kLowPolyNumParams * row + 2] = 1;
481 :
482 0 : for (i = 0; i < kLowPolyNumParams; ++i) {
483 0 : for (j = 0; j < kLowPolyNumParams; ++j)
484 0 : eqns.A[kLowPolyNumParams * i + j] += coords[i] * coords[j];
485 : }
486 : }
487 : }
488 :
489 : // Lazy inverse using existing equation solver.
490 0 : for (i = 0; i < kLowPolyNumParams; ++i) {
491 0 : memset(eqns.b, 0, sizeof(*eqns.b) * kLowPolyNumParams);
492 0 : eqns.b[i] = 1;
493 0 : equation_system_solve(&eqns);
494 :
495 0 : for (j = 0; j < kLowPolyNumParams; ++j)
496 0 : AtA_inv[j * kLowPolyNumParams + i] = eqns.x[j];
497 : }
498 0 : equation_system_free(&eqns);
499 0 : return 1;
500 : }
501 :
502 0 : void eb_aom_flat_block_finder_free(aom_flat_block_finder_t *block_finder) {
503 0 : if (!block_finder) return;
504 0 : free(block_finder->A);
505 0 : free(block_finder->AtA_inv);
506 0 : memset(block_finder, 0, sizeof(*block_finder));
507 : }
508 :
509 0 : void eb_aom_flat_block_finder_extract_block(
510 : const aom_flat_block_finder_t *block_finder, const uint8_t *const data,
511 : int32_t w, int32_t h, int32_t stride, int32_t offsx, int32_t offsy, double *plane,
512 : double *block) {
513 0 : const int32_t block_size = block_finder->block_size;
514 0 : const int32_t n = block_size * block_size;
515 0 : const double *A = block_finder->A;
516 0 : const double *AtA_inv = block_finder->AtA_inv;
517 : double plane_coords[kLowPolyNumParams];
518 : double AtA_inv_b[kLowPolyNumParams];
519 : int32_t xi, yi, i;
520 :
521 0 : if (block_finder->use_highbd) {
522 0 : const uint16_t *const data16 = (const uint16_t *const)data;
523 0 : for (yi = 0; yi < block_size; ++yi) {
524 0 : const int32_t y = clamp(offsy + yi, 0, h - 1);
525 0 : for (xi = 0; xi < block_size; ++xi) {
526 0 : const int32_t x = clamp(offsx + xi, 0, w - 1);
527 0 : block[yi * block_size + xi] =
528 0 : ((double)data16[y * stride + x]) / block_finder->normalization;
529 : }
530 : }
531 : }
532 : else {
533 0 : for (yi = 0; yi < block_size; ++yi) {
534 0 : const int32_t y = clamp(offsy + yi, 0, h - 1);
535 0 : for (xi = 0; xi < block_size; ++xi) {
536 0 : const int32_t x = clamp(offsx + xi, 0, w - 1);
537 0 : block[yi * block_size + xi] =
538 0 : ((double)data[y * stride + x]) / block_finder->normalization;
539 : }
540 : }
541 : }
542 0 : multiply_mat(block, A, AtA_inv_b, 1, n, kLowPolyNumParams);
543 0 : multiply_mat(AtA_inv, AtA_inv_b, plane_coords, kLowPolyNumParams,
544 : kLowPolyNumParams, 1);
545 0 : multiply_mat(A, plane_coords, plane, n, kLowPolyNumParams, 1);
546 :
547 0 : for (i = 0; i < n; ++i)
548 0 : block[i] -= plane[i];
549 0 : }
550 :
551 : typedef struct {
552 : int32_t index;
553 : float score;
554 : } index_and_score_t;
555 :
556 0 : static int32_t compare_scores(const void *a, const void *b) {
557 0 : const float diff =
558 0 : ((index_and_score_t *)a)->score - ((index_and_score_t *)b)->score;
559 0 : if (diff < 0)
560 0 : return -1;
561 0 : else if (diff > 0)
562 0 : return 1;
563 0 : return 0;
564 : }
565 :
566 0 : int32_t eb_aom_flat_block_finder_run(const aom_flat_block_finder_t *block_finder,
567 : const uint8_t *const data, int32_t w, int32_t h,
568 : int32_t stride, uint8_t *flat_blocks) {
569 : // The gradient-based features used in this code are based on:
570 : // A. Kokaram, D. Kelly, H. Denman and A. Crawford, "Measuring noise
571 : // correlation for improved video denoising," 2012 19th, ICIP.
572 : // The thresholds are more lenient to allow for correct grain modeling
573 : // if extreme cases.
574 0 : const int32_t block_size = block_finder->block_size;
575 0 : const int32_t n = block_size * block_size;
576 0 : const double kTraceThreshold = 0.15 / (32 * 32);
577 0 : const double kRatioThreshold = 1.25;
578 0 : const double kNormThreshold = 0.08 / (32 * 32);
579 0 : const double kVarThreshold = 0.005 / (double)n;
580 0 : const int32_t num_blocks_w = (w + block_size - 1) / block_size;
581 0 : const int32_t num_blocks_h = (h + block_size - 1) / block_size;
582 0 : int32_t num_flat = 0;
583 0 : int32_t bx = 0, by = 0;
584 0 : double *plane = (double *)malloc(n * sizeof(*plane));
585 0 : double *block = (double *)malloc(n * sizeof(*block));
586 0 : index_and_score_t *scores = (index_and_score_t *)malloc(
587 0 : num_blocks_w * num_blocks_h * sizeof(*scores));
588 0 : if (plane == NULL || block == NULL || scores == NULL) {
589 0 : fprintf(stderr, "Failed to allocate memory for block of size %d\n", n);
590 0 : free(plane);
591 0 : free(block);
592 0 : free(scores);
593 0 : return -1;
594 : }
595 :
596 : #ifdef NOISE_MODEL_LOG_SCORE
597 : fprintf(stderr, "score = [");
598 : #endif
599 0 : for (by = 0; by < num_blocks_h; ++by) {
600 0 : for (bx = 0; bx < num_blocks_w; ++bx) {
601 : // Compute gradient covariance matrix.
602 0 : double Gxx = 0, Gxy = 0, Gyy = 0;
603 0 : double var = 0;
604 0 : double mean = 0;
605 : int32_t xi, yi;
606 0 : eb_aom_flat_block_finder_extract_block(block_finder, data, w, h, stride,
607 : bx * block_size, by * block_size,
608 : plane, block);
609 :
610 0 : for (yi = 1; yi < block_size - 1; ++yi) {
611 0 : for (xi = 1; xi < block_size - 1; ++xi) {
612 0 : const double gx = (block[yi * block_size + xi + 1] -
613 0 : block[yi * block_size + xi - 1]) /
614 : 2;
615 0 : const double gy = (block[yi * block_size + xi + block_size] -
616 0 : block[yi * block_size + xi - block_size]) /
617 : 2;
618 0 : Gxx += gx * gx;
619 0 : Gxy += gx * gy;
620 0 : Gyy += gy * gy;
621 :
622 0 : mean += block[yi * block_size + xi];
623 0 : var += block[yi * block_size + xi] * block[yi * block_size + xi];
624 : }
625 : }
626 0 : mean /= (block_size - 2) * (block_size - 2);
627 :
628 : // Normalize gradients by BlockSize.
629 0 : Gxx /= ((block_size - 2) * (block_size - 2));
630 0 : Gxy /= ((block_size - 2) * (block_size - 2));
631 0 : Gyy /= ((block_size - 2) * (block_size - 2));
632 0 : var = var / ((block_size - 2) * (block_size - 2)) - mean * mean;
633 :
634 : {
635 0 : const double trace = Gxx + Gyy;
636 0 : const double det = Gxx * Gyy - Gxy * Gxy;
637 0 : const double e1 = (trace + sqrt(trace * trace - 4 * det)) / 2.;
638 0 : const double e2 = (trace - sqrt(trace * trace - 4 * det)) / 2.;
639 0 : const double norm = e1; // Spectral norm
640 0 : const double ratio = (e1 / AOMMAX(e2, 1e-6));
641 0 : const int32_t is_flat = (trace < kTraceThreshold) &&
642 0 : (ratio < kRatioThreshold) &&
643 0 : (norm < kNormThreshold) && (var > kVarThreshold);
644 : // The following weights are used to combine the above features to give
645 : // a sigmoid score for flatness. If the input was normalized to [0,100]
646 : // the magnitude of these values would be close to 1 (e.g., weights
647 : // corresponding to variance would be a factor of 10000x smaller).
648 : // The weights are given in the following order:
649 : // [{var}, {ratio}, {trace}, {norm}, offset]
650 : // with one of the most discriminative being simply the variance.
651 : const double weights[5] = { -6682, -0.2056, 13087, -12434, 2.5694 };
652 0 : const float score =
653 0 : (float)(1.0 / (1 + exp(-(weights[0] * var + weights[1] * ratio +
654 0 : weights[2] * trace + weights[3] * norm +
655 0 : weights[4]))));
656 0 : flat_blocks[by * num_blocks_w + bx] = is_flat ? 255 : 0;
657 0 : scores[by * num_blocks_w + bx].score = var > kVarThreshold ? score : 0;
658 0 : scores[by * num_blocks_w + bx].index = by * num_blocks_w + bx;
659 : #ifdef NOISE_MODEL_LOG_SCORE
660 : fprintf(stderr, "%g %g %g %g %g %d ", score, var, ratio, trace, norm,
661 : is_flat);
662 : #endif
663 0 : num_flat += is_flat;
664 : }
665 : }
666 : #ifdef NOISE_MODEL_LOG_SCORE
667 : fprintf(stderr, "\n");
668 : #endif
669 : }
670 : #ifdef NOISE_MODEL_LOG_SCORE
671 : fprintf(stderr, "];\n");
672 : #endif
673 : // Find the top-scored blocks (most likely to be flat) and set the flat blocks
674 : // be the union of the thresholded results and the top 10th percentile of the
675 : // scored results.
676 0 : qsort(scores, num_blocks_w * num_blocks_h, sizeof(*scores), &compare_scores);
677 0 : const int32_t top_nth_percentile = num_blocks_w * num_blocks_h * 90 / 100;
678 0 : const float score_threshold = scores[top_nth_percentile].score;
679 0 : for (int32_t i = 0; i < num_blocks_w * num_blocks_h; ++i) {
680 0 : if (scores[i].score >= score_threshold) {
681 0 : num_flat += flat_blocks[scores[i].index] == 0;
682 0 : flat_blocks[scores[i].index] |= 1;
683 : }
684 : }
685 0 : free(block);
686 0 : free(plane);
687 0 : free(scores);
688 0 : return num_flat;
689 : }
690 :
691 0 : int32_t eb_aom_noise_model_init(aom_noise_model_t *model,
692 : const aom_noise_model_params_t params) {
693 0 : const int32_t n = num_coeffs(params);
694 0 : const int32_t lag = params.lag;
695 0 : const int32_t bit_depth = params.bit_depth;
696 0 : int32_t x = 0, y = 0, i = 0, c = 0;
697 :
698 0 : memset(model, 0, sizeof(*model));
699 0 : if (params.lag < 1) {
700 0 : fprintf(stderr, "Invalid noise param: lag = %d must be >= 1\n", params.lag);
701 0 : return 0;
702 : }
703 0 : if (params.lag > kMaxLag) {
704 0 : fprintf(stderr, "Invalid noise param: lag = %d must be <= %d\n", params.lag,
705 : kMaxLag);
706 0 : return 0;
707 : }
708 :
709 0 : memcpy(&model->params, ¶ms, sizeof(params));
710 0 : for (c = 0; c < 3; ++c) {
711 0 : if (!noise_state_init(&model->combined_state[c], n + (c > 0), bit_depth)) {
712 0 : fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
713 0 : eb_aom_noise_model_free(model);
714 0 : return 0;
715 : }
716 0 : if (!noise_state_init(&model->latest_state[c], n + (c > 0), bit_depth)) {
717 0 : fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
718 0 : eb_aom_noise_model_free(model);
719 0 : return 0;
720 : }
721 : }
722 0 : model->n = n;
723 0 : model->coords = (int32_t(*)[2])malloc(sizeof(*model->coords) * n);
724 : ASSERT(model->coords != NULL);
725 0 : for (y = -lag; y <= 0; ++y) {
726 0 : const int32_t max_x = y == 0 ? -1 : lag;
727 0 : for (x = -lag; x <= max_x; ++x) {
728 0 : switch (params.shape) {
729 0 : case AOM_NOISE_SHAPE_DIAMOND:
730 0 : if (abs(x) <= y + lag) {
731 0 : model->coords[i][0] = x;
732 0 : model->coords[i][1] = y;
733 0 : ++i;
734 : }
735 0 : break;
736 0 : case AOM_NOISE_SHAPE_SQUARE:
737 0 : model->coords[i][0] = x;
738 0 : model->coords[i][1] = y;
739 0 : ++i;
740 0 : break;
741 0 : default:
742 0 : fprintf(stderr, "Invalid shape\n");
743 0 : eb_aom_noise_model_free(model);
744 0 : return 0;
745 : }
746 : }
747 : }
748 0 : assert(i == n);
749 0 : return 1;
750 : }
751 :
752 0 : void eb_aom_noise_model_free(aom_noise_model_t *model) {
753 0 : int32_t c = 0;
754 0 : if (!model) return;
755 :
756 0 : free(model->coords);
757 0 : for (c = 0; c < 3; ++c) {
758 0 : equation_system_free(&model->latest_state[c].eqns);
759 0 : equation_system_free(&model->combined_state[c].eqns);
760 :
761 0 : equation_system_free(&model->latest_state[c].strength_solver.eqns);
762 0 : equation_system_free(&model->combined_state[c].strength_solver.eqns);
763 : }
764 0 : memset(model, 0, sizeof(*model));
765 : }
766 :
767 : // Extracts the neighborhood defined by coords around point (x, y) from
768 : // the difference between the data and denoised images. Also extracts the
769 : // entry (possibly downsampled) for (x, y) in the alt_data (e.g., luma).
770 : #define EXTRACT_AR_ROW(INT_TYPE, suffix) \
771 : static double extract_ar_row_##suffix( \
772 : int32_t(*coords)[2], int32_t num_coords, const INT_TYPE *const data, \
773 : const INT_TYPE *const denoised, int32_t stride, int32_t sub_log2[2], \
774 : const INT_TYPE *const alt_data, const INT_TYPE *const alt_denoised, \
775 : int32_t alt_stride, int32_t x, int32_t y, double *buffer) { \
776 : for (int32_t i = 0; i < num_coords; ++i) { \
777 : const int32_t x_i = x + coords[i][0], y_i = y + coords[i][1]; \
778 : buffer[i] = \
779 : (double)data[y_i * stride + x_i] - denoised[y_i * stride + x_i]; \
780 : } \
781 : const double val = \
782 : (double)data[y * stride + x] - denoised[y * stride + x]; \
783 : \
784 : if (alt_data && alt_denoised) { \
785 : double avg_data = 0, avg_denoised = 0; \
786 : int32_t num_samples = 0; \
787 : for (int32_t dy_i = 0; dy_i < (1 << sub_log2[1]); dy_i++) { \
788 : const int32_t y_up = (y << sub_log2[1]) + dy_i; \
789 : for (int32_t dx_i = 0; dx_i < (1 << sub_log2[0]); dx_i++) { \
790 : const int32_t x_up = (x << sub_log2[0]) + dx_i; \
791 : avg_data += alt_data[y_up * alt_stride + x_up]; \
792 : avg_denoised += alt_denoised[y_up * alt_stride + x_up]; \
793 : num_samples++; \
794 : } \
795 : } \
796 : buffer[num_coords] = (avg_data - avg_denoised) / num_samples; \
797 : } \
798 : return val; \
799 : }
800 :
801 0 : EXTRACT_AR_ROW(uint8_t, lowbd);
802 0 : EXTRACT_AR_ROW(uint16_t, highbd);
803 :
804 0 : static int32_t add_block_observations(
805 : aom_noise_model_t *noise_model, int32_t c, const uint8_t *const data,
806 : const uint8_t *const denoised, int32_t w, int32_t h, int32_t stride, int32_t sub_log2[2],
807 : const uint8_t *const alt_data, const uint8_t *const alt_denoised,
808 : int32_t alt_stride, const uint8_t *const flat_blocks, int32_t block_size,
809 : int32_t num_blocks_w, int32_t num_blocks_h) {
810 0 : const int32_t lag = noise_model->params.lag;
811 0 : const int32_t num_coords = noise_model->n;
812 0 : const double normalization = (1 << noise_model->params.bit_depth) - 1;
813 0 : double *A = noise_model->latest_state[c].eqns.A;
814 0 : double *b = noise_model->latest_state[c].eqns.b;
815 0 : double *buffer = (double *)malloc(sizeof(*buffer) * (num_coords + 1));
816 0 : const int32_t n = noise_model->latest_state[c].eqns.n;
817 :
818 0 : if (!buffer) {
819 0 : fprintf(stderr, "Unable to allocate buffer of size %d\n", num_coords + 1);
820 0 : return 0;
821 : }
822 0 : for (int32_t by = 0; by < num_blocks_h; ++by) {
823 0 : const int32_t y_o = by * (block_size >> sub_log2[1]);
824 0 : for (int32_t bx = 0; bx < num_blocks_w; ++bx) {
825 0 : const int32_t x_o = bx * (block_size >> sub_log2[0]);
826 0 : if (!flat_blocks[by * num_blocks_w + bx])
827 0 : continue;
828 0 : int32_t y_start =
829 0 : (by > 0 && flat_blocks[(by - 1) * num_blocks_w + bx]) ? 0 : lag;
830 0 : int32_t x_start =
831 0 : (bx > 0 && flat_blocks[by * num_blocks_w + bx - 1]) ? 0 : lag;
832 0 : int32_t y_end = AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
833 : block_size >> sub_log2[1]);
834 0 : int32_t x_end = AOMMIN(
835 : (w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]) - lag,
836 : (bx + 1 < num_blocks_w && flat_blocks[by * num_blocks_w + bx + 1])
837 : ? (block_size >> sub_log2[0])
838 : : ((block_size >> sub_log2[0]) - lag));
839 0 : for (int32_t y = y_start; y < y_end; ++y) {
840 0 : for (int32_t x = x_start; x < x_end; ++x) {
841 0 : const double val =
842 0 : noise_model->params.use_highbd
843 0 : ? extract_ar_row_highbd(noise_model->coords, num_coords,
844 : (const uint16_t *const)data,
845 : (const uint16_t *const)denoised,
846 : stride, sub_log2,
847 : (const uint16_t *const)alt_data,
848 : (const uint16_t *const)alt_denoised,
849 : alt_stride, x + x_o, y + y_o, buffer)
850 0 : : extract_ar_row_lowbd(noise_model->coords, num_coords, data,
851 : denoised, stride, sub_log2, alt_data,
852 : alt_denoised, alt_stride, x + x_o,
853 : y + y_o, buffer);
854 0 : for (int32_t i = 0; i < n; ++i) {
855 0 : for (int32_t j = 0; j < n; ++j) {
856 0 : A[i * n + j] +=
857 0 : (buffer[i] * buffer[j]) / (normalization * normalization);
858 : }
859 0 : b[i] += (buffer[i] * val) / (normalization * normalization);
860 : }
861 0 : noise_model->latest_state[c].num_observations++;
862 : }
863 : }
864 : }
865 : }
866 0 : free(buffer);
867 0 : return 1;
868 : }
869 :
870 0 : static void add_noise_std_observations(
871 : aom_noise_model_t *noise_model, int32_t c, const double *coeffs,
872 : const uint8_t *const data, const uint8_t *const denoised, int32_t w, int32_t h,
873 : int32_t stride, int32_t sub_log2[2], const uint8_t *const alt_data, int32_t alt_stride,
874 : const uint8_t *const flat_blocks, int32_t block_size, int32_t num_blocks_w,
875 : int32_t num_blocks_h) {
876 0 : const int32_t num_coords = noise_model->n;
877 0 : aom_noise_strength_solver_t *noise_strength_solver =
878 : &noise_model->latest_state[c].strength_solver;
879 :
880 0 : const aom_noise_strength_solver_t *noise_strength_luma =
881 : &noise_model->latest_state[0].strength_solver;
882 0 : const double luma_gain = noise_model->latest_state[0].ar_gain;
883 0 : const double noise_gain = noise_model->latest_state[c].ar_gain;
884 0 : for (int32_t by = 0; by < num_blocks_h; ++by) {
885 0 : const int32_t y_o = by * (block_size >> sub_log2[1]);
886 0 : for (int32_t bx = 0; bx < num_blocks_w; ++bx) {
887 0 : const int32_t x_o = bx * (block_size >> sub_log2[0]);
888 0 : if (!flat_blocks[by * num_blocks_w + bx])
889 0 : continue;
890 0 : const int32_t num_samples_h =
891 0 : AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
892 : block_size >> sub_log2[1]);
893 0 : const int32_t num_samples_w =
894 0 : AOMMIN((w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]),
895 : (block_size >> sub_log2[0]));
896 : // Make sure that we have a reasonable amount of samples to consider the
897 : // block
898 0 : if (num_samples_w * num_samples_h > block_size) {
899 0 : const double block_mean = get_block_mean(
900 : alt_data ? alt_data : data, w, h, alt_data ? alt_stride : stride,
901 0 : x_o << sub_log2[0], y_o << sub_log2[1], block_size,
902 : noise_model->params.use_highbd);
903 0 : const double noise_var = get_noise_var(
904 0 : data, denoised, stride, w >> sub_log2[0], h >> sub_log2[1], x_o,
905 0 : y_o, block_size >> sub_log2[0], block_size >> sub_log2[1],
906 : noise_model->params.use_highbd);
907 : // We want to remove the part of the noise that came from being
908 : // correlated with luma. Note that the noise solver for luma must
909 : // have already been run.
910 0 : const double luma_strength =
911 0 : c > 0 ? luma_gain * noise_strength_solver_get_value(
912 : noise_strength_luma, block_mean)
913 0 : : 0;
914 0 : const double corr = c > 0 ? coeffs[num_coords] : 0;
915 : // Chroma noise:
916 : // N(0, noise_var) = N(0, uncorr_var) + corr * N(0, luma_strength^2)
917 : // The uncorrelated component:
918 : // uncorr_var = noise_var - (corr * luma_strength)^2
919 : // But don't allow fully correlated noise (hence the max), since the
920 : // synthesis cannot model it.
921 0 : const double uncorr_std = sqrt(
922 0 : AOMMAX(noise_var / 16, noise_var - pow(corr * luma_strength, 2)));
923 : // After we've removed correlation with luma, undo the gain that will
924 : // come from running the IIR filter.
925 0 : const double adjusted_strength = uncorr_std / noise_gain;
926 0 : eb_aom_noise_strength_solver_add_measurement(
927 : noise_strength_solver, block_mean, adjusted_strength);
928 : }
929 : }
930 : }
931 0 : }
932 :
933 : // Return true if the noise estimate appears to be different from the combined
934 : // (multi-frame) estimate. The difference is measured by checking whether the
935 : // AR coefficients have diverged (using a threshold on normalized cross
936 : // correlation), or whether the noise strength has changed.
937 : /*
938 : static int32_t is_noise_model_different(aom_noise_model_t *const noise_model) {
939 : // These thresholds are kind of arbitrary and will likely need further tuning
940 : // (or exported as parameters). The threshold on noise strength is a weighted
941 : // difference between the noise strength histograms
942 : const double kCoeffThreshold = 0.9;
943 : const double kStrengthThreshold =
944 : 0.005 * (1 << (noise_model->params.bit_depth - 8));
945 : for (int32_t c = 0; c < 1; ++c) {
946 : const double corr =
947 : eb_aom_normalized_cross_correlation(noise_model->latest_state[c].eqns.x,
948 : noise_model->combined_state[c].eqns.x,
949 : noise_model->combined_state[c].eqns.n);
950 : if (corr < kCoeffThreshold) return 1;
951 :
952 : const double dx =
953 : 1.0 / noise_model->latest_state[c].strength_solver.num_bins;
954 :
955 : const aom_equation_system_t *latest_eqns =
956 : &noise_model->latest_state[c].strength_solver.eqns;
957 : const aom_equation_system_t *combined_eqns =
958 : &noise_model->combined_state[c].strength_solver.eqns;
959 : double diff = 0;
960 : double total_weight = 0;
961 : for (int32_t j = 0; j < latest_eqns->n; ++j) {
962 : double weight = 0;
963 : for (int32_t i = 0; i < latest_eqns->n; ++i)
964 : weight += latest_eqns->A[i * latest_eqns->n + j];
965 : weight = sqrt(weight);
966 : diff += weight * fabs(latest_eqns->x[j] - combined_eqns->x[j]);
967 : total_weight += weight;
968 : }
969 : if (diff * dx / total_weight > kStrengthThreshold) return 1;
970 : }
971 : return 0;
972 : }
973 : */
974 :
975 0 : static int32_t ar_equation_system_solve(aom_noise_state_t *state, int32_t is_chroma) {
976 0 : const int32_t ret = equation_system_solve(&state->eqns);
977 0 : state->ar_gain = 1.0;
978 0 : if (!ret) return ret;
979 :
980 : // Update the AR gain from the equation system as it will be used to fit
981 : // the noise strength as a function of intensity. In the Yule-Walker
982 : // equations, the diagonal should be the variance of the correlated noise.
983 : // In the case of the least squares estimate, there will be some variability
984 : // in the diagonal. So use the mean of the diagonal as the estimate of
985 : // overall variance (this works for least squares or Yule-Walker formulation).
986 0 : double var = 0;
987 0 : const int32_t n = state->eqns.n;
988 0 : for (int32_t i = 0; i < (state->eqns.n - is_chroma); ++i)
989 0 : var += state->eqns.A[i * n + i] / state->num_observations;
990 0 : var /= (n - is_chroma);
991 :
992 : // Keep track of E(Y^2) = <b, x> + E(X^2)
993 : // In the case that we are using chroma and have an estimate of correlation
994 : // with luma we adjust that estimate slightly to remove the correlated bits by
995 : // subtracting out the last column of a scaled by our correlation estimate
996 : // from b. E(y^2) = <b - A(:, end)*x(end), x>
997 0 : double sum_covar = 0;
998 0 : for (int32_t i = 0; i < state->eqns.n - is_chroma; ++i) {
999 0 : double bi = state->eqns.b[i];
1000 0 : if (is_chroma)
1001 0 : bi -= state->eqns.A[i * n + (n - 1)] * state->eqns.x[n - 1];
1002 0 : sum_covar += (bi * state->eqns.x[i]) / state->num_observations;
1003 : }
1004 : // Now, get an estimate of the variance of uncorrelated noise signal and use
1005 : // it to determine the gain of the AR filter.
1006 0 : const double noise_var = AOMMAX(var - sum_covar, 1e-6);
1007 0 : state->ar_gain = AOMMAX(1, sqrt(AOMMAX(var / noise_var, 1e-6)));
1008 0 : return ret;
1009 : }
1010 :
1011 0 : aom_noise_status_t eb_aom_noise_model_update(
1012 : aom_noise_model_t *const noise_model, const uint8_t *const data[3],
1013 : const uint8_t *const denoised[3], int32_t w, int32_t h, int32_t stride[3],
1014 : int32_t chroma_sub_log2[2], const uint8_t *const flat_blocks, int32_t block_size) {
1015 0 : const int32_t num_blocks_w = (w + block_size - 1) / block_size;
1016 0 : const int32_t num_blocks_h = (h + block_size - 1) / block_size;
1017 : // int32_t y_model_different = 0;
1018 0 : int32_t num_blocks = 0;
1019 0 : int32_t i = 0, channel = 0;
1020 :
1021 0 : if (block_size <= 1) {
1022 0 : fprintf(stderr, "BlockSize = %d must be > 1\n", block_size);
1023 0 : return AOM_NOISE_STATUS_INVALID_ARGUMENT;
1024 : }
1025 :
1026 0 : if (block_size < noise_model->params.lag * 2 + 1) {
1027 0 : fprintf(stderr, "BlockSize = %d must be >= %d\n", block_size,
1028 0 : noise_model->params.lag * 2 + 1);
1029 0 : return AOM_NOISE_STATUS_INVALID_ARGUMENT;
1030 : }
1031 :
1032 : // Clear the latest equation system
1033 0 : for (i = 0; i < 3; ++i) {
1034 0 : equation_system_clear(&noise_model->latest_state[i].eqns);
1035 0 : noise_model->latest_state[i].num_observations = 0;
1036 0 : noise_strength_solver_clear(&noise_model->latest_state[i].strength_solver);
1037 : }
1038 :
1039 : // Check that we have enough flat blocks
1040 0 : for (i = 0; i < num_blocks_h * num_blocks_w; ++i) {
1041 0 : if (flat_blocks[i])
1042 0 : num_blocks++;
1043 : }
1044 :
1045 0 : if (num_blocks <= 1) {
1046 0 : fprintf(stderr, "Not enough flat blocks to update noise estimate\n");
1047 0 : return AOM_NOISE_STATUS_INSUFFICIENT_FLAT_BLOCKS;
1048 : }
1049 :
1050 0 : for (channel = 0; channel < 3; ++channel) {
1051 0 : int32_t no_subsampling[2] = { 0, 0 };
1052 0 : const uint8_t *alt_data = channel > 0 ? data[0] : 0;
1053 0 : const uint8_t *alt_denoised = channel > 0 ? denoised[0] : 0;
1054 0 : int32_t *sub = channel > 0 ? chroma_sub_log2 : no_subsampling;
1055 0 : const int32_t is_chroma = channel != 0;
1056 0 : if (!data[channel] || !denoised[channel]) break;
1057 0 : if (!add_block_observations(noise_model, channel, data[channel],
1058 0 : denoised[channel], w, h, stride[channel], sub,
1059 : alt_data, alt_denoised, stride[0], flat_blocks,
1060 : block_size, num_blocks_w, num_blocks_h)) {
1061 0 : fprintf(stderr, "Adding block observation failed\n");
1062 0 : return AOM_NOISE_STATUS_INTERNAL_ERROR;
1063 : }
1064 :
1065 0 : if (!ar_equation_system_solve(&noise_model->latest_state[channel],
1066 : is_chroma)) {
1067 0 : if (is_chroma) {
1068 0 : set_chroma_coefficient_fallback_soln(
1069 : &noise_model->latest_state[channel].eqns);
1070 : }
1071 : else {
1072 0 : fprintf(stderr, "Solving latest noise equation system failed %d!\n",
1073 : channel);
1074 0 : return AOM_NOISE_STATUS_INTERNAL_ERROR;
1075 : }
1076 : }
1077 :
1078 0 : add_noise_std_observations(
1079 0 : noise_model, channel, noise_model->latest_state[channel].eqns.x,
1080 0 : data[channel], denoised[channel], w, h, stride[channel], sub, alt_data,
1081 : stride[0], flat_blocks, block_size, num_blocks_w, num_blocks_h);
1082 :
1083 0 : if (!eb_aom_noise_strength_solver_solve(
1084 : &noise_model->latest_state[channel].strength_solver)) {
1085 0 : fprintf(stderr, "Solving latest noise strength failed!\n");
1086 0 : return AOM_NOISE_STATUS_INTERNAL_ERROR;
1087 : }
1088 :
1089 : // Check noise characteristics and return if error.
1090 : // if (channel == 0 &&
1091 : // noise_model->combined_state[channel].strength_solver.num_equations >
1092 : // 0 &&
1093 : // is_noise_model_different(noise_model)) {
1094 : // y_model_different = 1;
1095 : // }
1096 :
1097 0 : noise_model->combined_state[channel].num_observations =
1098 0 : noise_model->latest_state[channel].num_observations;
1099 0 : equation_system_copy(&noise_model->combined_state[channel].eqns,
1100 0 : &noise_model->latest_state[channel].eqns);
1101 0 : if (!ar_equation_system_solve(&noise_model->combined_state[channel],
1102 : is_chroma)) {
1103 0 : if (is_chroma) {
1104 0 : set_chroma_coefficient_fallback_soln(
1105 : &noise_model->combined_state[channel].eqns);
1106 : }
1107 : else {
1108 0 : fprintf(stderr, "Solving combined noise equation system failed %d!\n",
1109 : channel);
1110 0 : return AOM_NOISE_STATUS_INTERNAL_ERROR;
1111 : }
1112 : }
1113 :
1114 0 : noise_strength_solver_copy(
1115 : &noise_model->combined_state[channel].strength_solver,
1116 : &noise_model->latest_state[channel].strength_solver);
1117 :
1118 0 : if (!eb_aom_noise_strength_solver_solve(
1119 : &noise_model->combined_state[channel].strength_solver)) {
1120 0 : fprintf(stderr, "Solving combined noise strength failed!\n");
1121 0 : return AOM_NOISE_STATUS_INTERNAL_ERROR;
1122 : }
1123 : }
1124 :
1125 0 : return AOM_NOISE_STATUS_OK;
1126 : }
1127 :
1128 0 : void eb_aom_noise_model_save_latest(aom_noise_model_t *noise_model) {
1129 0 : for (int32_t c = 0; c < 3; c++) {
1130 0 : equation_system_copy(&noise_model->combined_state[c].eqns,
1131 0 : &noise_model->latest_state[c].eqns);
1132 0 : equation_system_copy(&noise_model->combined_state[c].strength_solver.eqns,
1133 0 : &noise_model->latest_state[c].strength_solver.eqns);
1134 0 : noise_model->combined_state[c].strength_solver.num_equations =
1135 0 : noise_model->latest_state[c].strength_solver.num_equations;
1136 0 : noise_model->combined_state[c].num_observations =
1137 0 : noise_model->latest_state[c].num_observations;
1138 0 : noise_model->combined_state[c].ar_gain =
1139 0 : noise_model->latest_state[c].ar_gain;
1140 : }
1141 0 : }
1142 :
1143 0 : int32_t eb_aom_noise_model_get_grain_parameters(aom_noise_model_t *const noise_model,
1144 : aom_film_grain_t *film_grain) {
1145 0 : if (noise_model->params.lag > 3) {
1146 0 : fprintf(stderr, "params.lag = %d > 3\n", noise_model->params.lag);
1147 0 : return 0;
1148 : }
1149 0 : uint16_t random_seed = film_grain->random_seed;
1150 0 : memset(film_grain, 0, sizeof(*film_grain));
1151 0 : film_grain->random_seed = random_seed;
1152 :
1153 0 : film_grain->apply_grain = 1;
1154 0 : film_grain->update_parameters = 1;
1155 :
1156 0 : film_grain->ar_coeff_lag = noise_model->params.lag;
1157 :
1158 : // Convert the scaling functions to 8 bit values
1159 : aom_noise_strength_lut_t scaling_points[3];
1160 0 : eb_aom_noise_strength_solver_fit_piecewise(
1161 0 : &noise_model->combined_state[0].strength_solver, 14, scaling_points + 0);
1162 0 : eb_aom_noise_strength_solver_fit_piecewise(
1163 0 : &noise_model->combined_state[1].strength_solver, 10, scaling_points + 1);
1164 0 : eb_aom_noise_strength_solver_fit_piecewise(
1165 0 : &noise_model->combined_state[2].strength_solver, 10, scaling_points + 2);
1166 :
1167 : // Both the domain and the range of the scaling functions in the film_grain
1168 : // are normalized to 8-bit (e.g., they are implicitly scaled during grain
1169 : // synthesis).
1170 0 : const double strength_divisor = 1 << (noise_model->params.bit_depth - 8);
1171 0 : double max_scaling_value = 1e-4;
1172 0 : for (int32_t c = 0; c < 3; ++c) {
1173 0 : for (int32_t i = 0; i < scaling_points[c].num_points; ++i) {
1174 0 : scaling_points[c].points[i][0] =
1175 0 : AOMMIN(255, scaling_points[c].points[i][0] / strength_divisor);
1176 0 : scaling_points[c].points[i][1] =
1177 0 : AOMMIN(255, scaling_points[c].points[i][1] / strength_divisor);
1178 0 : max_scaling_value =
1179 0 : AOMMAX(scaling_points[c].points[i][1], max_scaling_value);
1180 : }
1181 : }
1182 :
1183 : // Scaling_shift values are in the range [8,11]
1184 : const int32_t max_scaling_value_log2 =
1185 0 : clamp((int32_t)floor(log2(max_scaling_value) + 1), 2, 5);
1186 0 : film_grain->scaling_shift = 5 + (8 - max_scaling_value_log2);
1187 :
1188 0 : const double scale_factor = 1 << (8 - max_scaling_value_log2);
1189 0 : film_grain->num_y_points = scaling_points[0].num_points;
1190 0 : film_grain->num_cb_points = scaling_points[1].num_points;
1191 0 : film_grain->num_cr_points = scaling_points[2].num_points;
1192 :
1193 0 : int32_t(*film_grain_scaling[3])[2] = {
1194 0 : film_grain->scaling_points_y,
1195 0 : film_grain->scaling_points_cb,
1196 0 : film_grain->scaling_points_cr,
1197 : };
1198 0 : for (int32_t c = 0; c < 3; c++) {
1199 0 : for (int32_t i = 0; i < scaling_points[c].num_points; ++i) {
1200 0 : film_grain_scaling[c][i][0] = (int32_t)(scaling_points[c].points[i][0] + 0.5);
1201 0 : film_grain_scaling[c][i][1] = clamp(
1202 0 : (int32_t)(scale_factor * scaling_points[c].points[i][1] + 0.5), 0, 255);
1203 : }
1204 : }
1205 0 : eb_aom_noise_strength_lut_free(scaling_points + 0);
1206 0 : eb_aom_noise_strength_lut_free(scaling_points + 1);
1207 0 : eb_aom_noise_strength_lut_free(scaling_points + 2);
1208 :
1209 : // Convert the ar_coeffs into 8-bit values
1210 0 : const int32_t n_coeff = noise_model->combined_state[0].eqns.n;
1211 0 : double max_coeff = 1e-4, min_coeff = -1e-4;
1212 0 : double y_corr[2] = { 0, 0 };
1213 0 : double avg_luma_strength = 0;
1214 0 : for (int32_t c = 0; c < 3; c++) {
1215 0 : aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
1216 0 : for (int32_t i = 0; i < n_coeff; ++i) {
1217 0 : max_coeff = AOMMAX(max_coeff, eqns->x[i]);
1218 0 : min_coeff = AOMMIN(min_coeff, eqns->x[i]);
1219 : }
1220 : // Since the correlation between luma/chroma was computed in an already
1221 : // scaled space, we adjust it in the un-scaled space.
1222 0 : aom_noise_strength_solver_t *solver =
1223 : &noise_model->combined_state[c].strength_solver;
1224 : // Compute a weighted average of the strength for the channel.
1225 0 : double average_strength = 0, total_weight = 0;
1226 0 : for (int32_t i = 0; i < solver->eqns.n; ++i) {
1227 0 : double w = 0;
1228 0 : for (int32_t j = 0; j < solver->eqns.n; ++j)
1229 0 : w += solver->eqns.A[i * solver->eqns.n + j];
1230 0 : w = sqrt(w);
1231 0 : average_strength += solver->eqns.x[i] * w;
1232 0 : total_weight += w;
1233 : }
1234 0 : if (total_weight == 0)
1235 0 : average_strength = 1;
1236 : else
1237 0 : average_strength /= total_weight;
1238 0 : if (c == 0)
1239 0 : avg_luma_strength = average_strength;
1240 : else {
1241 0 : y_corr[c - 1] = avg_luma_strength * eqns->x[n_coeff] / average_strength;
1242 0 : max_coeff = AOMMAX(max_coeff, y_corr[c - 1]);
1243 0 : min_coeff = AOMMIN(min_coeff, y_corr[c - 1]);
1244 : }
1245 : }
1246 : // Shift value: AR coeffs range (values 6-9)
1247 : // 6: [-2, 2), 7: [-1, 1), 8: [-0.5, 0.5), 9: [-0.25, 0.25)
1248 0 : film_grain->ar_coeff_shift =
1249 0 : clamp(7 - (int32_t)AOMMAX(1 + floor(log2(max_coeff)), ceil(log2(-min_coeff))),
1250 : 6, 9);
1251 0 : double scale_ar_coeff = 1 << film_grain->ar_coeff_shift;
1252 0 : int32_t *ar_coeffs[3] = {
1253 0 : film_grain->ar_coeffs_y,
1254 0 : film_grain->ar_coeffs_cb,
1255 0 : film_grain->ar_coeffs_cr,
1256 : };
1257 0 : for (int32_t c = 0; c < 3; ++c) {
1258 0 : aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
1259 0 : for (int32_t i = 0; i < n_coeff; ++i) {
1260 0 : ar_coeffs[c][i] =
1261 0 : clamp((int32_t)round(scale_ar_coeff * eqns->x[i]), -128, 127);
1262 : }
1263 0 : if (c > 0) {
1264 0 : ar_coeffs[c][n_coeff] =
1265 0 : clamp((int32_t)round(scale_ar_coeff * y_corr[c - 1]), -128, 127);
1266 : }
1267 : }
1268 :
1269 : // At the moment, the noise modeling code assumes that the chroma scaling
1270 : // functions are a function of luma.
1271 0 : film_grain->cb_mult = 128; // 8 bits
1272 0 : film_grain->cb_luma_mult = 192; // 8 bits
1273 0 : film_grain->cb_offset = 256; // 9 bits
1274 :
1275 0 : film_grain->cr_mult = 128; // 8 bits
1276 0 : film_grain->cr_luma_mult = 192; // 8 bits
1277 0 : film_grain->cr_offset = 256; // 9 bits
1278 :
1279 0 : film_grain->chroma_scaling_from_luma = 0;
1280 0 : film_grain->grain_scale_shift = 0;
1281 0 : film_grain->overlap_flag = 1;
1282 0 : return 1;
1283 : }
1284 :
1285 0 : static void pointwise_multiply(const float *a, float *b, int32_t n) {
1286 0 : for (int32_t i = 0; i < n; ++i)
1287 0 : b[i] *= a[i];
1288 0 : }
1289 :
1290 0 : static float *get_half_cos_window(int32_t block_size) {
1291 : float *window_function =
1292 0 : (float *)malloc(block_size * block_size * sizeof(*window_function));
1293 : ASSERT(window_function);
1294 0 : for (int32_t y = 0; y < block_size; ++y) {
1295 0 : const double cos_yd = cos((.5 + y) * PI / block_size - PI / 2);
1296 0 : for (int32_t x = 0; x < block_size; ++x) {
1297 0 : const double cos_xd = cos((.5 + x) * PI / block_size - PI / 2);
1298 0 : window_function[y * block_size + x] = (float)(cos_yd * cos_xd);
1299 : }
1300 : }
1301 0 : return window_function;
1302 : }
1303 :
1304 : #define DITHER_AND_QUANTIZE(INT_TYPE, suffix) \
1305 : static void dither_and_quantize_##suffix( \
1306 : float *result, int32_t result_stride, INT_TYPE *denoised, int32_t w, int32_t h, \
1307 : int32_t stride, int32_t chroma_sub_w, int32_t chroma_sub_h, int32_t block_size, \
1308 : float block_normalization) { \
1309 : for (int32_t y = 0; y < (h >> chroma_sub_h); ++y) { \
1310 : for (int32_t x = 0; x < (w >> chroma_sub_w); ++x) { \
1311 : const int32_t result_idx = \
1312 : (y + (block_size >> chroma_sub_h)) * result_stride + x + \
1313 : (block_size >> chroma_sub_w); \
1314 : INT_TYPE new_val = (INT_TYPE)AOMMIN( \
1315 : AOMMAX(result[result_idx] * block_normalization + 0.5f, 0), \
1316 : block_normalization); \
1317 : const float err = \
1318 : -(((float)new_val) / block_normalization - result[result_idx]); \
1319 : denoised[y * stride + x] = new_val; \
1320 : if (x + 1 < (w >> chroma_sub_w)) { \
1321 : result[result_idx + 1] += err * 7.0f / 16.0f; \
1322 : } \
1323 : if (y + 1 < (h >> chroma_sub_h)) { \
1324 : if (x > 0) { \
1325 : result[result_idx + result_stride - 1] += err * 3.0f / 16.0f; \
1326 : } \
1327 : result[result_idx + result_stride] += err * 5.0f / 16.0f; \
1328 : if (x + 1 < (w >> chroma_sub_w)) { \
1329 : result[result_idx + result_stride + 1] += err * 1.0f / 16.0f; \
1330 : } \
1331 : } \
1332 : } \
1333 : } \
1334 : }
1335 :
1336 0 : DITHER_AND_QUANTIZE(uint8_t, lowbd);
1337 0 : DITHER_AND_QUANTIZE(uint16_t, highbd);
1338 :
1339 0 : int32_t eb_aom_wiener_denoise_2d(const uint8_t *const data[3], uint8_t *denoised[3],
1340 : int32_t w, int32_t h, int32_t stride[3], int32_t chroma_sub[2],
1341 : float *noise_psd[3], int32_t block_size, int32_t bit_depth,
1342 : int32_t use_highbd) {
1343 0 : float *plane = NULL, *window_full = NULL,
1344 0 : *window_chroma = NULL;
1345 : DECLARE_ALIGNED(32, float, *block);
1346 0 : block = NULL;
1347 0 : double *block_d = NULL, *plane_d = NULL;
1348 0 : struct aom_noise_tx_t *tx_full = NULL;
1349 0 : struct aom_noise_tx_t *tx_chroma = NULL;
1350 0 : const int32_t num_blocks_w = (w + block_size - 1) / block_size;
1351 0 : const int32_t num_blocks_h = (h + block_size - 1) / block_size;
1352 0 : const int32_t result_stride = (num_blocks_w + 2) * block_size;
1353 0 : const int32_t result_height = (num_blocks_h + 2) * block_size;
1354 0 : float *result = NULL;
1355 0 : int32_t init_success = 1;
1356 : aom_flat_block_finder_t block_finder_full;
1357 : aom_flat_block_finder_t block_finder_chroma;
1358 0 : const float kBlockNormalization = (float)((1 << bit_depth) - 1);
1359 0 : if (chroma_sub[0] != chroma_sub[1]) {
1360 0 : fprintf(stderr,
1361 : "eb_aom_wiener_denoise_2d doesn't handle different chroma "
1362 : "subsampling");
1363 0 : return 0;
1364 : }
1365 0 : init_success &= eb_aom_flat_block_finder_init(&block_finder_full, block_size,
1366 : bit_depth, use_highbd);
1367 0 : result = (float *)malloc((num_blocks_h + 2) * block_size * result_stride *
1368 : sizeof(*result));
1369 0 : plane = (float *)malloc(block_size * block_size * sizeof(*plane));
1370 : block =
1371 0 : (float *)eb_aom_memalign(32, 2 * block_size * block_size * sizeof(*block));
1372 0 : block_d = (double *)malloc(block_size * block_size * sizeof(*block_d));
1373 0 : plane_d = (double *)malloc(block_size * block_size * sizeof(*plane_d));
1374 0 : window_full = get_half_cos_window(block_size);
1375 0 : tx_full = eb_aom_noise_tx_malloc(block_size);
1376 :
1377 0 : if (chroma_sub[0] != 0) {
1378 0 : init_success &= eb_aom_flat_block_finder_init(&block_finder_chroma,
1379 0 : block_size >> chroma_sub[0],
1380 : bit_depth, use_highbd);
1381 0 : window_chroma = get_half_cos_window(block_size >> chroma_sub[0]);
1382 0 : tx_chroma = eb_aom_noise_tx_malloc(block_size >> chroma_sub[0]);
1383 : }
1384 : else {
1385 0 : window_chroma = window_full;
1386 0 : tx_chroma = tx_full;
1387 : }
1388 :
1389 0 : init_success &= (int32_t)((tx_full != NULL) && (tx_chroma != NULL) && (plane != NULL) &&
1390 0 : (plane_d != NULL) && (block != NULL) && (block_d != NULL) &&
1391 0 : (window_full != NULL) && (window_chroma != NULL) &&
1392 : (result != NULL));
1393 0 : for (int32_t c = init_success ? 0 : 3; c < 3; ++c) {
1394 0 : float *window_function = c == 0 ? window_full : window_chroma;
1395 0 : aom_flat_block_finder_t *block_finder = &block_finder_full;
1396 0 : const int32_t chroma_sub_h = c > 0 ? chroma_sub[1] : 0;
1397 0 : const int32_t chroma_sub_w = c > 0 ? chroma_sub[0] : 0;
1398 0 : struct aom_noise_tx_t *tx =
1399 0 : (c > 0 && chroma_sub[0] > 0) ? tx_chroma : tx_full;
1400 0 : if (!data[c] || !denoised[c]) continue;
1401 0 : if (c > 0 && chroma_sub[0] != 0)
1402 0 : block_finder = &block_finder_chroma;
1403 0 : memset(result, 0, sizeof(*result) * result_stride * result_height);
1404 : // Do overlapped block processing (half overlapped). The block rows can
1405 : // easily be done in parallel
1406 0 : for (int32_t offsy = 0; offsy < (block_size >> chroma_sub_h);
1407 0 : offsy += (block_size >> chroma_sub_h) / 2) {
1408 0 : for (int32_t offsx = 0; offsx < (block_size >> chroma_sub_w);
1409 0 : offsx += (block_size >> chroma_sub_w) / 2) {
1410 : // Pad the boundary when processing each block-set.
1411 0 : for (int32_t by = -1; by < num_blocks_h; ++by) {
1412 0 : for (int32_t bx = -1; bx < num_blocks_w; ++bx) {
1413 0 : const int32_t pixels_per_block =
1414 0 : (block_size >> chroma_sub_w) * (block_size >> chroma_sub_h);
1415 0 : eb_aom_flat_block_finder_extract_block(
1416 0 : block_finder, data[c], w >> chroma_sub_w, h >> chroma_sub_h,
1417 0 : stride[c], bx * (block_size >> chroma_sub_w) + offsx,
1418 0 : by * (block_size >> chroma_sub_h) + offsy, plane_d, block_d);
1419 0 : for (int32_t j = 0; j < pixels_per_block; ++j) {
1420 0 : block[j] = (float)block_d[j];
1421 0 : plane[j] = (float)plane_d[j];
1422 : }
1423 0 : pointwise_multiply(window_function, block, pixels_per_block);
1424 0 : eb_aom_noise_tx_forward(tx, block);
1425 0 : eb_aom_noise_tx_filter(tx, noise_psd[c]);
1426 0 : eb_aom_noise_tx_inverse(tx, block);
1427 :
1428 : // Apply window function to the plane approximation (we will apply
1429 : // it to the sum of plane + block when composing the results).
1430 0 : pointwise_multiply(window_function, plane, pixels_per_block);
1431 :
1432 0 : for (int32_t y = 0; y < (block_size >> chroma_sub_h); ++y) {
1433 0 : const int32_t y_result =
1434 0 : y + (by + 1) * (block_size >> chroma_sub_h) + offsy;
1435 0 : for (int32_t x = 0; x < (block_size >> chroma_sub_w); ++x) {
1436 0 : const int32_t x_result =
1437 0 : x + (bx + 1) * (block_size >> chroma_sub_w) + offsx;
1438 0 : result[y_result * result_stride + x_result] +=
1439 0 : (block[y * (block_size >> chroma_sub_w) + x] +
1440 0 : plane[y * (block_size >> chroma_sub_w) + x]) *
1441 0 : window_function[y * (block_size >> chroma_sub_w) + x];
1442 : }
1443 : }
1444 : }
1445 : }
1446 : }
1447 : }
1448 0 : if (use_highbd) {
1449 0 : dither_and_quantize_highbd(result, result_stride, (uint16_t *)denoised[c],
1450 0 : w, h, stride[c], chroma_sub_w, chroma_sub_h,
1451 : block_size, kBlockNormalization);
1452 : }
1453 : else {
1454 0 : dither_and_quantize_lowbd(result, result_stride, denoised[c], w, h,
1455 0 : stride[c], chroma_sub_w, chroma_sub_h,
1456 : block_size, kBlockNormalization);
1457 : }
1458 : }
1459 0 : free(result);
1460 0 : free(plane);
1461 0 : eb_aom_free(block);
1462 0 : free(plane_d);
1463 0 : free(block_d);
1464 0 : free(window_full);
1465 :
1466 0 : eb_aom_noise_tx_free(tx_full);
1467 :
1468 0 : eb_aom_flat_block_finder_free(&block_finder_full);
1469 0 : if (chroma_sub[0] != 0) {
1470 0 : eb_aom_flat_block_finder_free(&block_finder_chroma);
1471 0 : free(window_chroma);
1472 0 : eb_aom_noise_tx_free(tx_chroma);
1473 : }
1474 0 : return init_success;
1475 : }
1476 :
1477 0 : EbErrorType eb_aom_denoise_and_model_alloc(aom_denoise_and_model_t *ctx,
1478 : int32_t bit_depth,
1479 : int32_t block_size,
1480 : float noise_level) {
1481 :
1482 0 : ctx->block_size = block_size;
1483 0 : ctx->noise_level = noise_level;
1484 0 : ctx->bit_depth = bit_depth;
1485 :
1486 0 : EB_MALLOC_ARRAY(ctx->noise_psd[0], block_size * block_size);
1487 0 : EB_MALLOC_ARRAY(ctx->noise_psd[1], block_size * block_size);
1488 0 : EB_MALLOC_ARRAY(ctx->noise_psd[2], block_size * block_size);
1489 0 : return EB_ErrorNone;
1490 : }
1491 :
1492 0 : void denoise_and_model_dctor(EbPtr p) {
1493 0 : aom_denoise_and_model_t *obj = (aom_denoise_and_model_t*)p;
1494 :
1495 0 : free(obj->flat_blocks);
1496 0 : for (int32_t i = 0; i < 3; ++i) {
1497 0 : EB_FREE_ARRAY(obj->denoised[i]);
1498 0 : EB_FREE_ARRAY(obj->noise_psd[i]);
1499 0 : EB_FREE_ARRAY(obj->packed[i]);
1500 : }
1501 0 : eb_aom_noise_model_free(&obj->noise_model);
1502 0 : eb_aom_flat_block_finder_free(&obj->flat_block_finder);
1503 0 : }
1504 :
1505 :
1506 0 : EbErrorType denoise_and_model_ctor(aom_denoise_and_model_t *object_ptr,
1507 : EbPtr object_init_data_ptr) {
1508 0 : denoise_and_model_init_data_t *init_data_ptr = (denoise_and_model_init_data_t*)object_init_data_ptr;
1509 0 : EbErrorType return_error = EB_ErrorNone;
1510 0 : uint32_t use_highbd = init_data_ptr->encoder_bit_depth > EB_8BIT ? 10 : 8;
1511 :
1512 0 : int32_t chroma_sub_log2[2] = { 1, 1 }; //todo: send chroma subsampling
1513 0 : chroma_sub_log2[0] = (init_data_ptr->encoder_color_format == EB_YUV444 ? 1 : 2) - 1;
1514 0 : chroma_sub_log2[1] = (init_data_ptr->encoder_color_format >= EB_YUV422 ? 1 : 2) - 1;
1515 :
1516 0 : object_ptr->dctor = denoise_and_model_dctor;
1517 :
1518 0 : return_error = eb_aom_denoise_and_model_alloc(object_ptr,
1519 0 : init_data_ptr->encoder_bit_depth > EB_8BIT ? 10 : 8,
1520 : DENOISING_BlockSize,
1521 0 : (float)(init_data_ptr->noise_level / 10.0));
1522 0 : if (return_error != EB_ErrorNone)
1523 0 : return return_error;
1524 0 : object_ptr->width = init_data_ptr->width;
1525 0 : object_ptr->height = init_data_ptr->height;
1526 0 : object_ptr->y_stride = init_data_ptr->stride_y;
1527 0 : object_ptr->uv_stride = init_data_ptr->stride_cb;
1528 :
1529 : //todo: consider replacing with EbPictureBuffersDesc
1530 :
1531 0 : EB_MALLOC_ARRAY(object_ptr->denoised[0], (object_ptr->y_stride * object_ptr->height) << use_highbd);
1532 0 : EB_MALLOC_ARRAY(object_ptr->denoised[1], (object_ptr->uv_stride * (object_ptr->height >> chroma_sub_log2[0])) << use_highbd);
1533 0 : EB_MALLOC_ARRAY(object_ptr->denoised[2], (object_ptr->uv_stride * (object_ptr->height >> chroma_sub_log2[0])) << use_highbd);
1534 :
1535 0 : if (use_highbd) {
1536 0 : EB_MALLOC_ARRAY(object_ptr->packed[0], (object_ptr->y_stride * object_ptr->height));
1537 0 : EB_MALLOC_ARRAY(object_ptr->packed[1], (object_ptr->uv_stride * (object_ptr->height >> chroma_sub_log2[0])));
1538 0 : EB_MALLOC_ARRAY(object_ptr->packed[2], (object_ptr->uv_stride * (object_ptr->height >> chroma_sub_log2[0])));
1539 : }
1540 :
1541 0 : return return_error;
1542 : }
1543 :
1544 0 : static int32_t denoise_and_model_realloc_if_necessary(struct aom_denoise_and_model_t *ctx,
1545 : EbPictureBufferDesc *sd, int32_t use_highbd) {
1546 0 : int32_t chroma_sub_log2[2] = { 1, 1 }; //todo: send chroma subsampling
1547 :
1548 0 : const int32_t block_size = ctx->block_size;
1549 :
1550 0 : free(ctx->flat_blocks);
1551 0 : ctx->flat_blocks = NULL;
1552 :
1553 0 : ctx->num_blocks_w = (sd->width + ctx->block_size - 1) / ctx->block_size;
1554 0 : ctx->num_blocks_h = (sd->height + ctx->block_size - 1) / ctx->block_size;
1555 0 : ctx->flat_blocks = malloc(ctx->num_blocks_w * ctx->num_blocks_h);
1556 :
1557 0 : eb_aom_flat_block_finder_free(&ctx->flat_block_finder);
1558 0 : if (!eb_aom_flat_block_finder_init(&ctx->flat_block_finder, ctx->block_size,
1559 : ctx->bit_depth, use_highbd)) {
1560 0 : fprintf(stderr, "Unable to init flat block finder\n");
1561 0 : return 0;
1562 : }
1563 :
1564 0 : const aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3,
1565 0 : ctx->bit_depth, use_highbd };
1566 : // eb_aom_noise_model_free(&ctx->noise_model);
1567 0 : if (!eb_aom_noise_model_init(&ctx->noise_model, params)) {
1568 0 : fprintf(stderr, "Unable to init noise model\n");
1569 0 : return 0;
1570 : }
1571 :
1572 : // Simply use a flat PSD (although we could use the flat blocks to estimate
1573 : // PSD) those to estimate an actual noise PSD)
1574 : const float y_noise_level =
1575 0 : eb_aom_noise_psd_get_default_value(ctx->block_size, ctx->noise_level);
1576 0 : const float uv_noise_level = eb_aom_noise_psd_get_default_value(
1577 0 : ctx->block_size >> chroma_sub_log2[1], ctx->noise_level);
1578 0 : for (int32_t i = 0; i < block_size * block_size; ++i) {
1579 0 : ctx->noise_psd[0][i] = y_noise_level;
1580 0 : ctx->noise_psd[1][i] = ctx->noise_psd[2][i] = uv_noise_level;
1581 : }
1582 0 : return 1;
1583 : }
1584 :
1585 0 : static void pack_2d_pic(EbPictureBufferDesc *inputPicture,
1586 : uint16_t *packed[3]) {
1587 0 : const uint32_t input_luma_offset = ((inputPicture->origin_y) * inputPicture->stride_y) + (inputPicture->origin_x);
1588 0 : const uint32_t inputBitIncLumaOffset = ((inputPicture->origin_y) * inputPicture->stride_bit_inc_y) + (inputPicture->origin_x);
1589 0 : const uint32_t input_cb_offset = (((inputPicture->origin_y) >> 1) * inputPicture->stride_cb) + ((inputPicture->origin_x) >> 1);
1590 0 : const uint32_t inputBitIncCbOffset = (((inputPicture->origin_y) >> 1) * inputPicture->stride_bit_inc_cb) + ((inputPicture->origin_x) >> 1);
1591 0 : const uint32_t input_cr_offset = (((inputPicture->origin_y) >> 1) * inputPicture->stride_cr) + ((inputPicture->origin_x) >> 1);
1592 0 : const uint32_t inputBitIncCrOffset = (((inputPicture->origin_y) >> 1) * inputPicture->stride_bit_inc_cr) + ((inputPicture->origin_x) >> 1);
1593 :
1594 0 : pack2d_src(
1595 0 : inputPicture->buffer_y + input_luma_offset,
1596 0 : inputPicture->stride_y,
1597 0 : inputPicture->buffer_bit_inc_y + inputBitIncLumaOffset,
1598 0 : inputPicture->stride_bit_inc_y,
1599 : (uint16_t *)packed[0],
1600 0 : inputPicture->stride_y,
1601 0 : inputPicture->width,
1602 0 : inputPicture->height);
1603 :
1604 0 : pack2d_src(
1605 0 : inputPicture->buffer_cb + input_cb_offset,
1606 0 : inputPicture->stride_cr,
1607 0 : inputPicture->buffer_bit_inc_cb + inputBitIncCbOffset,
1608 0 : inputPicture->stride_bit_inc_cr,
1609 0 : (uint16_t *)packed[1],
1610 0 : inputPicture->stride_cr,
1611 0 : inputPicture->width >> 1,
1612 0 : inputPicture->height >> 1);
1613 :
1614 0 : pack2d_src(
1615 0 : inputPicture->buffer_cr + input_cr_offset,
1616 0 : inputPicture->stride_cr,
1617 0 : inputPicture->buffer_bit_inc_cr + inputBitIncCrOffset,
1618 0 : inputPicture->stride_bit_inc_cr,
1619 0 : (uint16_t *)packed[2],
1620 0 : inputPicture->stride_cr,
1621 0 : inputPicture->width >> 1,
1622 0 : inputPicture->height >> 1);
1623 0 : }
1624 :
1625 0 : static void unpack_2d_pic(uint8_t *packed[3],
1626 : EbPictureBufferDesc *outputPicturePtr)
1627 : {
1628 0 : uint32_t lumaBufferOffset = ((outputPicturePtr->origin_y) * outputPicturePtr->stride_y) + (outputPicturePtr->origin_x);
1629 0 : uint32_t chromaBufferOffset = (((outputPicturePtr->origin_y) >> 1) * outputPicturePtr->stride_cb) + ((outputPicturePtr->origin_x) >> 1);
1630 0 : uint16_t luma_width = (uint16_t)(outputPicturePtr->width);
1631 0 : uint16_t chroma_width = luma_width >> 1;
1632 0 : uint16_t luma_height = (uint16_t)(outputPicturePtr->height);
1633 0 : uint16_t chroma_height = luma_height >> 1;
1634 :
1635 0 : un_pack2d(
1636 : (uint16_t*)(packed[0]),
1637 0 : outputPicturePtr->stride_y,
1638 0 : outputPicturePtr->buffer_y + lumaBufferOffset,
1639 0 : outputPicturePtr->stride_y,
1640 0 : outputPicturePtr->buffer_bit_inc_y + lumaBufferOffset,
1641 0 : outputPicturePtr->stride_bit_inc_y,
1642 : luma_width,
1643 : luma_height);
1644 :
1645 0 : un_pack2d(
1646 0 : (uint16_t*)(packed[1]),
1647 0 : outputPicturePtr->stride_cb,
1648 0 : outputPicturePtr->buffer_cb + chromaBufferOffset,
1649 0 : outputPicturePtr->stride_cb,
1650 0 : outputPicturePtr->buffer_bit_inc_cb + chromaBufferOffset,
1651 0 : outputPicturePtr->stride_bit_inc_cb,
1652 : chroma_width,
1653 : chroma_height);
1654 :
1655 0 : un_pack2d(
1656 0 : (uint16_t*)(packed[2]),
1657 0 : outputPicturePtr->stride_cr,
1658 0 : outputPicturePtr->buffer_cr + chromaBufferOffset,
1659 0 : outputPicturePtr->stride_cr,
1660 0 : outputPicturePtr->buffer_bit_inc_cr + chromaBufferOffset,
1661 0 : outputPicturePtr->stride_bit_inc_cr,
1662 : chroma_width,
1663 : chroma_height);
1664 0 : }
1665 :
1666 0 : int32_t eb_aom_denoise_and_model_run(struct aom_denoise_and_model_t *ctx,
1667 : EbPictureBufferDesc *sd,
1668 : aom_film_grain_t *film_grain,
1669 : int32_t use_highbd)
1670 : {
1671 0 : const int32_t block_size = ctx->block_size;
1672 : uint8_t *raw_data[3];
1673 0 : int32_t chroma_sub_log2[2] = { 1, 1 }; //todo: send chroma subsampling
1674 0 : int32_t strides[3] = { sd->stride_y, sd->stride_cb, sd->stride_cr };
1675 :
1676 0 : if (!denoise_and_model_realloc_if_necessary(ctx, sd, use_highbd)) {
1677 0 : fprintf(stderr, "Unable to realloc buffers\n");
1678 0 : return 0;
1679 : }
1680 :
1681 0 : if (!use_highbd) { // 8 bits input
1682 0 : raw_data[0] = sd->buffer_y + sd->origin_y * sd->stride_y + sd->origin_x;
1683 0 : raw_data[1] = sd->buffer_cb + sd->stride_cb * (sd->origin_y >> chroma_sub_log2[0])
1684 0 : + (sd->origin_x >> chroma_sub_log2[1]);
1685 0 : raw_data[2] = sd->buffer_cr + sd->stride_cr * (sd->origin_y >> chroma_sub_log2[0])
1686 0 : + (sd->origin_x >> chroma_sub_log2[1]);
1687 : }
1688 : else { // 10 bits input
1689 0 : pack_2d_pic(sd, ctx->packed);
1690 :
1691 0 : raw_data[0] = (uint8_t *)(ctx->packed[0]);
1692 0 : raw_data[1] = (uint8_t *)(ctx->packed[1]);
1693 0 : raw_data[2] = (uint8_t *)(ctx->packed[2]);
1694 : }
1695 :
1696 0 : const uint8_t *const data[3] = { raw_data[0], raw_data[1], raw_data[2] };
1697 :
1698 0 : eb_aom_flat_block_finder_run(&ctx->flat_block_finder, data[0], sd->width,
1699 0 : sd->height, strides[0], ctx->flat_blocks);
1700 :
1701 0 : if (!eb_aom_wiener_denoise_2d(data, ctx->denoised, sd->width, sd->height,
1702 0 : strides, chroma_sub_log2, ctx->noise_psd,
1703 : block_size, ctx->bit_depth, use_highbd)) {
1704 0 : fprintf(stderr, "Unable to denoise image\n");
1705 0 : return 0;
1706 : }
1707 :
1708 0 : const aom_noise_status_t status = eb_aom_noise_model_update(
1709 0 : &ctx->noise_model, data, (const uint8_t *const *)ctx->denoised,
1710 0 : sd->width, sd->height, strides, chroma_sub_log2, ctx->flat_blocks,
1711 : block_size);
1712 :
1713 0 : int32_t have_noise_estimate = 0;
1714 0 : if (status == AOM_NOISE_STATUS_OK || status == AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE) {
1715 0 : eb_aom_noise_model_save_latest(&ctx->noise_model);
1716 0 : have_noise_estimate = 1;
1717 : }
1718 :
1719 0 : film_grain->apply_grain = 0;
1720 0 : if (have_noise_estimate) {
1721 0 : if (!eb_aom_noise_model_get_grain_parameters(&ctx->noise_model, film_grain)) {
1722 0 : fprintf(stderr, "Unable to get grain parameters.\n");
1723 0 : return 0;
1724 : }
1725 0 : film_grain->apply_grain = 1;
1726 :
1727 0 : if (!use_highbd) {
1728 0 : memcpy(raw_data[0], ctx->denoised[0],
1729 0 : (strides[0] * sd->height) << use_highbd);
1730 0 : memcpy(raw_data[1], ctx->denoised[1],
1731 0 : (strides[1] * (sd->height >> chroma_sub_log2[0])) << use_highbd);
1732 0 : memcpy(raw_data[2], ctx->denoised[2],
1733 0 : (strides[2] * (sd->height >> chroma_sub_log2[0])) << use_highbd);
1734 : }
1735 : else
1736 0 : unpack_2d_pic(ctx->denoised, sd);
1737 : }
1738 0 : eb_aom_flat_block_finder_free(&ctx->flat_block_finder);
1739 0 : eb_aom_noise_model_free(&ctx->noise_model);
1740 0 : free(ctx->flat_blocks);
1741 0 : ctx->flat_blocks = NULL;
1742 :
1743 0 : return 1;
1744 : }
|