LCOV - coverage.info - Codec/noise

LCOV - code coverage report

Current view:	top level - Codec - noise_model.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	0	972	0.0 %
Date:	2019-11-25 17:12:20	Functions:	0	55	0.0 %

          Line data    Source code

       1             : /*
       2             :  * Copyright (c) 2017, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #include <math.h>
      13             : #include <stdio.h>
      14             : #include <stdlib.h>
      15             : #include "noise_model.h"
      16             : #include "noise_util.h"
      17             : #include "mathutils.h"
      18             : 
      19             : #define kLowPolyNumParams 3
      20             : 
      21             : static const int32_t kMaxLag = 4;
      22             : 
      23             : void *eb_aom_memalign(size_t align, size_t size);
      24             : void eb_aom_free(void *memblk);
      25             : 
      26             : void un_pack2d(
      27             :     uint16_t      *in16_bit_buffer,
      28             :     uint32_t       in_stride,
      29             :     uint8_t       *out8_bit_buffer,
      30             :     uint32_t       out8_stride,
      31             :     uint8_t       *outn_bit_buffer,
      32             :     uint32_t       outn_stride,
      33             :     uint32_t       width,
      34             :     uint32_t       height);
      35             : 
      36             : void pack2d_src(
      37             :     uint8_t     *in8_bit_buffer,
      38             :     uint32_t     in8_stride,
      39             :     uint8_t     *inn_bit_buffer,
      40             :     uint32_t     inn_stride,
      41             :     uint16_t    *out16_bit_buffer,
      42             :     uint32_t     out_stride,
      43             :     uint32_t     width,
      44             :     uint32_t     height);
      45             : 
      46             : // Defines a function that can be used to obtain the mean of a block for the
      47             : // provided data type (uint8_t, or uint16_t)
      48             : #define GET_BLOCK_MEAN(INT_TYPE, suffix)                                    \
      49             :   static double get_block_mean_##suffix(const INT_TYPE *data, int32_t w, int32_t h, \
      50             :                                         int32_t stride, int32_t x_o, int32_t y_o,       \
      51             :                                         int32_t block_size) {                   \
      52             :     const int32_t max_h = AOMMIN(h - y_o, block_size);                          \
      53             :     const int32_t max_w = AOMMIN(w - x_o, block_size);                          \
      54             :     double block_mean = 0;                                                  \
      55             :     for (int32_t y = 0; y < max_h; ++y) {                                       \
      56             :       for (int32_t x = 0; x < max_w; ++x) {                                     \
      57             :         block_mean += data[(y_o + y) * stride + x_o + x];                   \
      58             :       }                                                                     \
      59             :     }                                                                       \
      60             :     return block_mean / (max_w * max_h);                                    \
      61             :   }
      62             : 
      63           0 : GET_BLOCK_MEAN(uint8_t, lowbd);
      64           0 : GET_BLOCK_MEAN(uint16_t, highbd);
      65             : 
      66           0 : static INLINE double get_block_mean(const uint8_t *data, int32_t w, int32_t h,
      67             :     int32_t stride, int32_t x_o, int32_t y_o,
      68             :     int32_t block_size, int32_t use_highbd) {
      69           0 :     if (use_highbd)
      70           0 :         return get_block_mean_highbd((const uint16_t *)data, w, h, stride, x_o, y_o,
      71             :             block_size);
      72           0 :     return get_block_mean_lowbd(data, w, h, stride, x_o, y_o, block_size);
      73             : }
      74             : 
      75           0 : static INLINE double fclamp(double value, double low, double high) {
      76           0 :     return value < low ? low : (value > high ? high : value);
      77             : }
      78             : 
      79             : // Defines a function that can be used to obtain the variance of a block
      80             : // for the provided data type (uint8_t, or uint16_t)
      81             : #define GET_NOISE_VAR(INT_TYPE, suffix)                                  \
      82             :   static double get_noise_var_##suffix(                                  \
      83             :       const INT_TYPE *data, const INT_TYPE *denoised, int32_t stride, int32_t w, \
      84             :       int32_t h, int32_t x_o, int32_t y_o, int32_t block_size_x, int32_t block_size_y) {     \
      85             :     const int32_t max_h = AOMMIN(h - y_o, block_size_y);                     \
      86             :     const int32_t max_w = AOMMIN(w - x_o, block_size_x);                     \
      87             :     double noise_var = 0;                                                \
      88             :     double noise_mean = 0;                                               \
      89             :     for (int32_t y = 0; y < max_h; ++y) {                                    \
      90             :       for (int32_t x = 0; x < max_w; ++x) {                                  \
      91             :         double noise = (double)data[(y_o + y) * stride + x_o + x] -      \
      92             :                        denoised[(y_o + y) * stride + x_o + x];           \
      93             :         noise_mean += noise;                                             \
      94             :         noise_var += noise * noise;                                      \
      95             :       }                                                                  \
      96             :     }                                                                    \
      97             :     noise_mean /= (max_w * max_h);                                       \
      98             :     return noise_var / (max_w * max_h) - noise_mean * noise_mean;        \
      99             :   }
     100             : 
     101           0 : GET_NOISE_VAR(uint8_t, lowbd);
     102           0 : GET_NOISE_VAR(uint16_t, highbd);
     103             : 
     104           0 : static INLINE double get_noise_var(const uint8_t *data, const uint8_t *denoised,
     105             :     int32_t w, int32_t h, int32_t stride, int32_t x_o, int32_t y_o,
     106             :     int32_t block_size_x, int32_t block_size_y,
     107             :     int32_t use_highbd) {
     108           0 :     if (use_highbd)
     109           0 :         return get_noise_var_highbd((const uint16_t *)data,
     110             :         (const uint16_t *)denoised, w, h, stride, x_o,
     111             :             y_o, block_size_x, block_size_y);
     112           0 :     return get_noise_var_lowbd(data, denoised, w, h, stride, x_o, y_o,
     113             :         block_size_x, block_size_y);
     114             : }
     115             : 
     116           0 : static void equation_system_clear(aom_equation_system_t *eqns) {
     117           0 :     const int32_t n = eqns->n;
     118           0 :     memset(eqns->A, 0, sizeof(*eqns->A) * n * n);
     119           0 :     memset(eqns->x, 0, sizeof(*eqns->x) * n);
     120           0 :     memset(eqns->b, 0, sizeof(*eqns->b) * n);
     121           0 : }
     122             : 
     123           0 : static void equation_system_copy(aom_equation_system_t *dst,
     124             :     const aom_equation_system_t *src) {
     125           0 :     const int32_t n = dst->n;
     126           0 :     memcpy(dst->A, src->A, sizeof(*dst->A) * n * n);
     127           0 :     memcpy(dst->x, src->x, sizeof(*dst->x) * n);
     128           0 :     memcpy(dst->b, src->b, sizeof(*dst->b) * n);
     129           0 : }
     130             : 
     131           0 : static int32_t equation_system_init(aom_equation_system_t *eqns, int32_t n) {
     132           0 :     eqns->A = (double *)malloc(sizeof(*eqns->A) * n * n);
     133           0 :     eqns->b = (double *)malloc(sizeof(*eqns->b) * n);
     134           0 :     eqns->x = (double *)malloc(sizeof(*eqns->x) * n);
     135           0 :     eqns->n = n;
     136           0 :     if (!eqns->A || !eqns->b || !eqns->x) {
     137           0 :         fprintf(stderr, "Failed to allocate system of equations of size %d\n", n);
     138           0 :         free(eqns->A);
     139           0 :         free(eqns->b);
     140           0 :         free(eqns->x);
     141           0 :         memset(eqns, 0, sizeof(*eqns));
     142           0 :         return 0;
     143             :     }
     144           0 :     equation_system_clear(eqns);
     145           0 :     return 1;
     146             : }
     147             : 
     148           0 : static int32_t equation_system_solve(aom_equation_system_t *eqns) {
     149           0 :     const int32_t n = eqns->n;
     150           0 :     double *b = (double *)malloc(sizeof(*b) * n);
     151           0 :     double *A = (double *)malloc(sizeof(*A) * n * n);
     152           0 :     int32_t ret = 0;
     153           0 :     if (A == NULL || b == NULL) {
     154           0 :         fprintf(stderr, "Unable to allocate temp values of size %dx%d\n", n, n);
     155           0 :         free(b);
     156           0 :         free(A);
     157           0 :         return 0;
     158             :     }
     159           0 :     memcpy(A, eqns->A, sizeof(*eqns->A) * n * n);
     160           0 :     memcpy(b, eqns->b, sizeof(*eqns->b) * n);
     161           0 :     ret = linsolve(n, A, eqns->n, b, eqns->x);
     162           0 :     free(b);
     163           0 :     free(A);
     164             : 
     165           0 :     if (ret == 0)
     166           0 :         return 0;
     167           0 :     return 1;
     168             : }
     169             : /*
     170             : static void equation_system_add(aom_equation_system_t *dest,
     171             :                                 aom_equation_system_t *src) {
     172             :   const int32_t n = dest->n;
     173             :   int32_t i, j;
     174             :   for (i = 0; i < n; ++i) {
     175             :     for (j = 0; j < n; ++j)
     176             :       dest->A[i * n + j] += src->A[i * n + j];
     177             :     dest->b[i] += src->b[i];
     178             :   }
     179             : }
     180             : */
     181           0 : static void equation_system_free(aom_equation_system_t *eqns) {
     182           0 :     if (!eqns) return;
     183           0 :     free(eqns->A);
     184           0 :     free(eqns->b);
     185           0 :     free(eqns->x);
     186           0 :     memset(eqns, 0, sizeof(*eqns));
     187             : }
     188             : 
     189           0 : static void noise_strength_solver_clear(aom_noise_strength_solver_t *solver) {
     190           0 :     equation_system_clear(&solver->eqns);
     191           0 :     solver->num_equations = 0;
     192           0 :     solver->total = 0;
     193           0 : }
     194             : 
     195             : /*
     196             : static void noise_strength_solver_add(aom_noise_strength_solver_t *dest,
     197             :                                       aom_noise_strength_solver_t *src) {
     198             :   equation_system_add(&dest->eqns, &src->eqns);
     199             :   dest->num_equations += src->num_equations;
     200             :   dest->total += src->total;
     201             : }
     202             : */
     203             : 
     204           0 : static void noise_strength_solver_copy(aom_noise_strength_solver_t *dest,
     205             :     aom_noise_strength_solver_t *src) {
     206           0 :     equation_system_copy(&dest->eqns, &src->eqns);
     207           0 :     dest->num_equations = src->num_equations;
     208           0 :     dest->total = src->total;
     209           0 : }
     210             : 
     211             : // Return the number of coefficients required for the given parameters
     212           0 : static int32_t num_coeffs(const aom_noise_model_params_t params) {
     213           0 :     const int32_t n = 2 * params.lag + 1;
     214           0 :     switch (params.shape) {
     215           0 :     case AOM_NOISE_SHAPE_DIAMOND: return params.lag * (params.lag + 1);
     216           0 :     case AOM_NOISE_SHAPE_SQUARE: return (n * n) / 2;
     217             :     }
     218           0 :     return 0;
     219             : }
     220             : 
     221           0 : static int32_t noise_state_init(aom_noise_state_t *state, int32_t n, int32_t bit_depth) {
     222           0 :     const int32_t kNumBins = 20;
     223           0 :     if (!equation_system_init(&state->eqns, n)) {
     224           0 :         fprintf(stderr, "Failed initialization noise state with size %d\n", n);
     225           0 :         return 0;
     226             :     }
     227           0 :     state->ar_gain = 1.0;
     228           0 :     state->num_observations = 0;
     229           0 :     return eb_aom_noise_strength_solver_init(&state->strength_solver, kNumBins,
     230             :         bit_depth);
     231             : }
     232             : 
     233           0 : static void set_chroma_coefficient_fallback_soln(aom_equation_system_t *eqns) {
     234           0 :     const double kTolerance = 1e-6;
     235           0 :     const int32_t last = eqns->n - 1;
     236             :     // Set all of the AR coefficients to zero, but try to solve for correlation
     237             :     // with the luma channel
     238           0 :     memset(eqns->x, 0, sizeof(*eqns->x) * eqns->n);
     239           0 :     if (fabs(eqns->A[last * eqns->n + last]) > kTolerance)
     240           0 :         eqns->x[last] = eqns->b[last] / eqns->A[last * eqns->n + last];
     241           0 : }
     242             : 
     243           0 : int32_t eb_aom_noise_strength_lut_init(aom_noise_strength_lut_t *lut, int32_t num_points) {
     244           0 :     if (!lut) return 0;
     245           0 :     lut->points = (double(*)[2])malloc(num_points * sizeof(*lut->points));
     246           0 :     if (!lut->points) return 0;
     247           0 :     lut->num_points = num_points;
     248           0 :     memset(lut->points, 0, sizeof(*lut->points) * num_points);
     249           0 :     return 1;
     250             : }
     251             : 
     252           0 : void eb_aom_noise_strength_lut_free(aom_noise_strength_lut_t *lut) {
     253           0 :     if (!lut) return;
     254           0 :     free(lut->points);
     255           0 :     memset(lut, 0, sizeof(*lut));
     256             : }
     257             : 
     258           0 : double eb_aom_noise_strength_lut_eval(const aom_noise_strength_lut_t *lut,
     259             :     double x) {
     260           0 :     int32_t i = 0;
     261             :     // Constant extrapolation for x <  x_0.
     262           0 :     if (x < lut->points[0][0]) return lut->points[0][1];
     263           0 :     for (i = 0; i < lut->num_points - 1; ++i) {
     264           0 :         if (x >= lut->points[i][0] && x <= lut->points[i + 1][0]) {
     265           0 :             const double a =
     266           0 :                 (x - lut->points[i][0]) / (lut->points[i + 1][0] - lut->points[i][0]);
     267           0 :             return lut->points[i + 1][1] * a + lut->points[i][1] * (1.0 - a);
     268             :         }
     269             :     }
     270             :     // Constant extrapolation for x > x_{n-1}
     271           0 :     return lut->points[lut->num_points - 1][1];
     272             : }
     273             : 
     274           0 : static double noise_strength_solver_get_bin_index(
     275             :     const aom_noise_strength_solver_t *solver, double value) {
     276             :     const double val =
     277           0 :         fclamp(value, solver->min_intensity, solver->max_intensity);
     278           0 :     const double range = solver->max_intensity - solver->min_intensity;
     279           0 :     return (solver->num_bins - 1) * (val - solver->min_intensity) / range;
     280             : }
     281             : 
     282           0 : static double noise_strength_solver_get_value(
     283             :     const aom_noise_strength_solver_t *solver, double x) {
     284           0 :     const double bin = noise_strength_solver_get_bin_index(solver, x);
     285           0 :     const int32_t bin_i0 = (int32_t)floor(bin);
     286           0 :     const int32_t bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
     287           0 :     const double a = bin - bin_i0;
     288           0 :     return (1.0 - a) * solver->eqns.x[bin_i0] + a * solver->eqns.x[bin_i1];
     289             : }
     290             : 
     291           0 : void eb_aom_noise_strength_solver_add_measurement(
     292             :     aom_noise_strength_solver_t *solver, double block_mean, double noise_std) {
     293           0 :     const double bin = noise_strength_solver_get_bin_index(solver, block_mean);
     294           0 :     const int32_t bin_i0 = (int32_t)floor(bin);
     295           0 :     const int32_t bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
     296           0 :     const double a = bin - bin_i0;
     297           0 :     const int32_t n = solver->num_bins;
     298           0 :     solver->eqns.A[bin_i0 * n + bin_i0] += (1.0 - a) * (1.0 - a);
     299           0 :     solver->eqns.A[bin_i1 * n + bin_i0] += a * (1.0 - a);
     300           0 :     solver->eqns.A[bin_i1 * n + bin_i1] += a * a;
     301           0 :     solver->eqns.A[bin_i0 * n + bin_i1] += a * (1.0 - a);
     302           0 :     solver->eqns.b[bin_i0] += (1.0 - a) * noise_std;
     303           0 :     solver->eqns.b[bin_i1] += a * noise_std;
     304           0 :     solver->total += noise_std;
     305           0 :     solver->num_equations++;
     306           0 : }
     307             : 
     308           0 : int32_t eb_aom_noise_strength_solver_solve(aom_noise_strength_solver_t *solver) {
     309             :     // Add regularization proportional to the number of constraints
     310           0 :     const int32_t n = solver->num_bins;
     311           0 :     const double kAlpha = 2.0 * (double)(solver->num_equations) / n;
     312           0 :     int32_t result = 0;
     313           0 :     double mean = 0;
     314             : 
     315             :     // Do this in a non-destructive manner so it is not confusing to the caller
     316           0 :     double *old_A = solver->eqns.A;
     317           0 :     double *A = (double *)malloc(sizeof(*A) * n * n);
     318           0 :     if (!A) {
     319           0 :         fprintf(stderr, "Unable to allocate copy of A\n");
     320           0 :         return 0;
     321             :     }
     322           0 :     memcpy(A, old_A, sizeof(*A) * n * n);
     323             : 
     324           0 :     for (int32_t i = 0; i < n; ++i) {
     325           0 :         const int32_t i_lo = AOMMAX(0, i - 1);
     326           0 :         const int32_t i_hi = AOMMIN(n - 1, i + 1);
     327           0 :         A[i * n + i_lo] -= kAlpha;
     328           0 :         A[i * n + i] += 2 * kAlpha;
     329           0 :         A[i * n + i_hi] -= kAlpha;
     330             :     }
     331             : 
     332             :     // Small regularization to give average noise strength
     333           0 :     mean = solver->total / solver->num_equations;
     334           0 :     for (int32_t i = 0; i < n; ++i) {
     335           0 :         A[i * n + i] += 1.0 / 8192.;
     336           0 :         solver->eqns.b[i] += mean / 8192.;
     337             :     }
     338           0 :     solver->eqns.A = A;
     339           0 :     result = equation_system_solve(&solver->eqns);
     340           0 :     solver->eqns.A = old_A;
     341             : 
     342           0 :     free(A);
     343           0 :     return result;
     344             : }
     345             : 
     346           0 : int32_t eb_aom_noise_strength_solver_init(aom_noise_strength_solver_t *solver,
     347             :     int32_t num_bins, int32_t bit_depth) {
     348           0 :     if (!solver) return 0;
     349           0 :     memset(solver, 0, sizeof(*solver));
     350           0 :     solver->num_bins = num_bins;
     351           0 :     solver->min_intensity = 0;
     352           0 :     solver->max_intensity = (1 << bit_depth) - 1;
     353           0 :     solver->total = 0;
     354           0 :     solver->num_equations = 0;
     355           0 :     return equation_system_init(&solver->eqns, num_bins);
     356             : }
     357             : 
     358           0 : double eb_aom_noise_strength_solver_get_center(
     359             :     const aom_noise_strength_solver_t *solver, int32_t i) {
     360           0 :     const double range = solver->max_intensity - solver->min_intensity;
     361           0 :     const int32_t n = solver->num_bins;
     362           0 :     return ((double)i) / (n - 1) * range + solver->min_intensity;
     363             : }
     364             : 
     365             : // Computes the residual if a point were to be removed from the lut. This is
     366             : // calculated as the area between the output of the solver and the line segment
     367             : // that would be formed between [x_{i - 1}, x_{i + 1}).
     368           0 : static void update_piecewise_linear_residual(
     369             :     const aom_noise_strength_solver_t *solver,
     370             :     const aom_noise_strength_lut_t *lut, double *residual, int32_t start, int32_t end) {
     371           0 :     const double dx = 255. / solver->num_bins;
     372           0 :     for (int32_t i = AOMMAX(start, 1); i < AOMMIN(end, lut->num_points - 1); ++i) {
     373           0 :         const int32_t lower = AOMMAX(0, (int32_t)floor(noise_strength_solver_get_bin_index(
     374             :             solver, lut->points[i - 1][0])));
     375           0 :         const int32_t upper = AOMMIN(solver->num_bins - 1,
     376             :             (int32_t)ceil(noise_strength_solver_get_bin_index(
     377             :                 solver, lut->points[i + 1][0])));
     378           0 :         double r = 0;
     379           0 :         for (int32_t j = lower; j <= upper; ++j) {
     380           0 :             const double x = eb_aom_noise_strength_solver_get_center(solver, j);
     381           0 :             if (x < lut->points[i - 1][0]) continue;
     382           0 :             if (x >= lut->points[i + 1][0]) continue;
     383           0 :             const double y = solver->eqns.x[j];
     384           0 :             const double a = (x - lut->points[i - 1][0]) /
     385           0 :                 (lut->points[i + 1][0] - lut->points[i - 1][0]);
     386           0 :             const double estimate_y =
     387           0 :                 lut->points[i - 1][1] * (1.0 - a) + lut->points[i + 1][1] * a;
     388           0 :             r += fabs(y - estimate_y);
     389             :         }
     390           0 :         residual[i] = r * dx;
     391             :     }
     392           0 : }
     393             : 
     394           0 : int32_t eb_aom_noise_strength_solver_fit_piecewise(
     395             :     const aom_noise_strength_solver_t *solver, int32_t max_output_points,
     396             :     aom_noise_strength_lut_t *lut) {
     397             :     // The tolerance is normalized to be give consistent results between
     398             :     // different bit-depths.
     399           0 :     const double kTolerance = solver->max_intensity * 0.00625 / 255.0;
     400           0 :     if (!eb_aom_noise_strength_lut_init(lut, solver->num_bins)) {
     401           0 :         fprintf(stderr, "Failed to init lut\n");
     402           0 :         return 0;
     403             :     }
     404           0 :     for (int32_t i = 0; i < solver->num_bins; ++i) {
     405           0 :         lut->points[i][0] = eb_aom_noise_strength_solver_get_center(solver, i);
     406           0 :         lut->points[i][1] = solver->eqns.x[i];
     407             :     }
     408           0 :     if (max_output_points < 0)
     409           0 :         max_output_points = solver->num_bins;
     410           0 :     double *residual = malloc(solver->num_bins * sizeof(*residual));
     411             :     ASSERT(residual != NULL);
     412           0 :     memset(residual, 0, sizeof(*residual) * solver->num_bins);
     413             : 
     414           0 :     update_piecewise_linear_residual(solver, lut, residual, 0, solver->num_bins);
     415             : 
     416             :     // Greedily remove points if there are too many or if it doesn't hurt local
     417             :     // approximation (never remove the end points)
     418           0 :     while (lut->num_points > 2) {
     419           0 :         int32_t min_index = 1;
     420           0 :         for (int32_t j = 1; j < lut->num_points - 1; ++j) {
     421           0 :             if (residual[j] < residual[min_index])
     422           0 :                 min_index = j;
     423             :         }
     424           0 :         const double dx =
     425           0 :             lut->points[min_index + 1][0] - lut->points[min_index - 1][0];
     426           0 :         const double avg_residual = residual[min_index] / dx;
     427           0 :         if (lut->num_points <= max_output_points && avg_residual > kTolerance)
     428           0 :             break;
     429           0 :         const int32_t num_remaining = lut->num_points - min_index - 1;
     430           0 :         memmove(lut->points + min_index, lut->points + min_index + 1,
     431             :             sizeof(lut->points[0]) * num_remaining);
     432           0 :         lut->num_points--;
     433             : 
     434           0 :         update_piecewise_linear_residual(solver, lut, residual, min_index - 1,
     435             :             min_index + 1);
     436             :     }
     437           0 :     free(residual);
     438           0 :     return 1;
     439             : }
     440             : 
     441           0 : int32_t eb_aom_flat_block_finder_init(aom_flat_block_finder_t *block_finder,
     442             :     int32_t block_size, int32_t bit_depth, int32_t use_highbd) {
     443           0 :     const int32_t n = block_size * block_size;
     444             :     aom_equation_system_t eqns;
     445           0 :     double *AtA_inv = 0;
     446           0 :     double *A = 0;
     447           0 :     int32_t x = 0, y = 0, i = 0, j = 0;
     448           0 :     if (!equation_system_init(&eqns, kLowPolyNumParams)) {
     449           0 :         fprintf(stderr, "Failed to init equation system for block_size=%d\n",
     450             :             block_size);
     451           0 :         return 0;
     452             :     }
     453             : 
     454           0 :     AtA_inv = (double *)malloc(kLowPolyNumParams * kLowPolyNumParams *
     455             :         sizeof(*AtA_inv));
     456           0 :     A = (double *)malloc(kLowPolyNumParams * n * sizeof(*A));
     457           0 :     if (AtA_inv == NULL || A == NULL) {
     458           0 :         fprintf(stderr, "Failed to alloc A or AtA_inv for block_size=%d\n",
     459             :             block_size);
     460           0 :         free(AtA_inv);
     461           0 :         free(A);
     462           0 :         equation_system_free(&eqns);
     463           0 :         return 0;
     464             :     }
     465             : 
     466           0 :     block_finder->A = A;
     467           0 :     block_finder->AtA_inv = AtA_inv;
     468           0 :     block_finder->block_size = block_size;
     469           0 :     block_finder->normalization = (1 << bit_depth) - 1;
     470           0 :     block_finder->use_highbd = use_highbd;
     471             : 
     472           0 :     for (y = 0; y < block_size; ++y) {
     473           0 :         const double yd = ((double)y - block_size / 2.) / (block_size / 2.);
     474           0 :         for (x = 0; x < block_size; ++x) {
     475           0 :             const double xd = ((double)x - block_size / 2.) / (block_size / 2.);
     476           0 :             const double coords[3] = { yd, xd, 1 };
     477           0 :             const int32_t row = y * block_size + x;
     478           0 :             A[kLowPolyNumParams * row + 0] = yd;
     479           0 :             A[kLowPolyNumParams * row + 1] = xd;
     480           0 :             A[kLowPolyNumParams * row + 2] = 1;
     481             : 
     482           0 :             for (i = 0; i < kLowPolyNumParams; ++i) {
     483           0 :                 for (j = 0; j < kLowPolyNumParams; ++j)
     484           0 :                     eqns.A[kLowPolyNumParams * i + j] += coords[i] * coords[j];
     485             :             }
     486             :         }
     487             :     }
     488             : 
     489             :     // Lazy inverse using existing equation solver.
     490           0 :     for (i = 0; i < kLowPolyNumParams; ++i) {
     491           0 :         memset(eqns.b, 0, sizeof(*eqns.b) * kLowPolyNumParams);
     492           0 :         eqns.b[i] = 1;
     493           0 :         equation_system_solve(&eqns);
     494             : 
     495           0 :         for (j = 0; j < kLowPolyNumParams; ++j)
     496           0 :             AtA_inv[j * kLowPolyNumParams + i] = eqns.x[j];
     497             :     }
     498           0 :     equation_system_free(&eqns);
     499           0 :     return 1;
     500             : }
     501             : 
     502           0 : void eb_aom_flat_block_finder_free(aom_flat_block_finder_t *block_finder) {
     503           0 :     if (!block_finder) return;
     504           0 :     free(block_finder->A);
     505           0 :     free(block_finder->AtA_inv);
     506           0 :     memset(block_finder, 0, sizeof(*block_finder));
     507             : }
     508             : 
     509           0 : void eb_aom_flat_block_finder_extract_block(
     510             :     const aom_flat_block_finder_t *block_finder, const uint8_t *const data,
     511             :     int32_t w, int32_t h, int32_t stride, int32_t offsx, int32_t offsy, double *plane,
     512             :     double *block) {
     513           0 :     const int32_t block_size = block_finder->block_size;
     514           0 :     const int32_t n = block_size * block_size;
     515           0 :     const double *A = block_finder->A;
     516           0 :     const double *AtA_inv = block_finder->AtA_inv;
     517             :     double plane_coords[kLowPolyNumParams];
     518             :     double AtA_inv_b[kLowPolyNumParams];
     519             :     int32_t xi, yi, i;
     520             : 
     521           0 :     if (block_finder->use_highbd) {
     522           0 :         const uint16_t *const data16 = (const uint16_t *const)data;
     523           0 :         for (yi = 0; yi < block_size; ++yi) {
     524           0 :             const int32_t y = clamp(offsy + yi, 0, h - 1);
     525           0 :             for (xi = 0; xi < block_size; ++xi) {
     526           0 :                 const int32_t x = clamp(offsx + xi, 0, w - 1);
     527           0 :                 block[yi * block_size + xi] =
     528           0 :                     ((double)data16[y * stride + x]) / block_finder->normalization;
     529             :             }
     530             :         }
     531             :     }
     532             :     else {
     533           0 :         for (yi = 0; yi < block_size; ++yi) {
     534           0 :             const int32_t y = clamp(offsy + yi, 0, h - 1);
     535           0 :             for (xi = 0; xi < block_size; ++xi) {
     536           0 :                 const int32_t x = clamp(offsx + xi, 0, w - 1);
     537           0 :                 block[yi * block_size + xi] =
     538           0 :                     ((double)data[y * stride + x]) / block_finder->normalization;
     539             :             }
     540             :         }
     541             :     }
     542           0 :     multiply_mat(block, A, AtA_inv_b, 1, n, kLowPolyNumParams);
     543           0 :     multiply_mat(AtA_inv, AtA_inv_b, plane_coords, kLowPolyNumParams,
     544             :         kLowPolyNumParams, 1);
     545           0 :     multiply_mat(A, plane_coords, plane, n, kLowPolyNumParams, 1);
     546             : 
     547           0 :     for (i = 0; i < n; ++i)
     548           0 :         block[i] -= plane[i];
     549           0 : }
     550             : 
     551             : typedef struct {
     552             :     int32_t index;
     553             :     float score;
     554             : } index_and_score_t;
     555             : 
     556           0 : static int32_t compare_scores(const void *a, const void *b) {
     557           0 :     const float diff =
     558           0 :         ((index_and_score_t *)a)->score - ((index_and_score_t *)b)->score;
     559           0 :     if (diff < 0)
     560           0 :         return -1;
     561           0 :     else if (diff > 0)
     562           0 :         return 1;
     563           0 :     return 0;
     564             : }
     565             : 
     566           0 : int32_t eb_aom_flat_block_finder_run(const aom_flat_block_finder_t *block_finder,
     567             :     const uint8_t *const data, int32_t w, int32_t h,
     568             :     int32_t stride, uint8_t *flat_blocks) {
     569             :     // The gradient-based features used in this code are based on:
     570             :     //  A. Kokaram, D. Kelly, H. Denman and A. Crawford, "Measuring noise
     571             :     //  correlation for improved video denoising," 2012 19th, ICIP.
     572             :     // The thresholds are more lenient to allow for correct grain modeling
     573             :     // if extreme cases.
     574           0 :     const int32_t block_size = block_finder->block_size;
     575           0 :     const int32_t n = block_size * block_size;
     576           0 :     const double kTraceThreshold = 0.15 / (32 * 32);
     577           0 :     const double kRatioThreshold = 1.25;
     578           0 :     const double kNormThreshold = 0.08 / (32 * 32);
     579           0 :     const double kVarThreshold = 0.005 / (double)n;
     580           0 :     const int32_t num_blocks_w = (w + block_size - 1) / block_size;
     581           0 :     const int32_t num_blocks_h = (h + block_size - 1) / block_size;
     582           0 :     int32_t num_flat = 0;
     583           0 :     int32_t bx = 0, by = 0;
     584           0 :     double *plane = (double *)malloc(n * sizeof(*plane));
     585           0 :     double *block = (double *)malloc(n * sizeof(*block));
     586           0 :     index_and_score_t *scores = (index_and_score_t *)malloc(
     587           0 :         num_blocks_w * num_blocks_h * sizeof(*scores));
     588           0 :     if (plane == NULL || block == NULL || scores == NULL) {
     589           0 :         fprintf(stderr, "Failed to allocate memory for block of size %d\n", n);
     590           0 :         free(plane);
     591           0 :         free(block);
     592           0 :         free(scores);
     593           0 :         return -1;
     594             :     }
     595             : 
     596             : #ifdef NOISE_MODEL_LOG_SCORE
     597             :     fprintf(stderr, "score = [");
     598             : #endif
     599           0 :     for (by = 0; by < num_blocks_h; ++by) {
     600           0 :         for (bx = 0; bx < num_blocks_w; ++bx) {
     601             :             // Compute gradient covariance matrix.
     602           0 :             double Gxx = 0, Gxy = 0, Gyy = 0;
     603           0 :             double var = 0;
     604           0 :             double mean = 0;
     605             :             int32_t xi, yi;
     606           0 :             eb_aom_flat_block_finder_extract_block(block_finder, data, w, h, stride,
     607             :                 bx * block_size, by * block_size,
     608             :                 plane, block);
     609             : 
     610           0 :             for (yi = 1; yi < block_size - 1; ++yi) {
     611           0 :                 for (xi = 1; xi < block_size - 1; ++xi) {
     612           0 :                     const double gx = (block[yi * block_size + xi + 1] -
     613           0 :                         block[yi * block_size + xi - 1]) /
     614             :                         2;
     615           0 :                     const double gy = (block[yi * block_size + xi + block_size] -
     616           0 :                         block[yi * block_size + xi - block_size]) /
     617             :                         2;
     618           0 :                     Gxx += gx * gx;
     619           0 :                     Gxy += gx * gy;
     620           0 :                     Gyy += gy * gy;
     621             : 
     622           0 :                     mean += block[yi * block_size + xi];
     623           0 :                     var += block[yi * block_size + xi] * block[yi * block_size + xi];
     624             :                 }
     625             :             }
     626           0 :             mean /= (block_size - 2) * (block_size - 2);
     627             : 
     628             :             // Normalize gradients by BlockSize.
     629           0 :             Gxx /= ((block_size - 2) * (block_size - 2));
     630           0 :             Gxy /= ((block_size - 2) * (block_size - 2));
     631           0 :             Gyy /= ((block_size - 2) * (block_size - 2));
     632           0 :             var = var / ((block_size - 2) * (block_size - 2)) - mean * mean;
     633             : 
     634             :             {
     635           0 :                 const double trace = Gxx + Gyy;
     636           0 :                 const double det = Gxx * Gyy - Gxy * Gxy;
     637           0 :                 const double e1 = (trace + sqrt(trace * trace - 4 * det)) / 2.;
     638           0 :                 const double e2 = (trace - sqrt(trace * trace - 4 * det)) / 2.;
     639           0 :                 const double norm = e1;  // Spectral norm
     640           0 :                 const double ratio = (e1 / AOMMAX(e2, 1e-6));
     641           0 :                 const int32_t is_flat = (trace < kTraceThreshold) &&
     642           0 :                     (ratio < kRatioThreshold) &&
     643           0 :                     (norm < kNormThreshold) && (var > kVarThreshold);
     644             :                 // The following weights are used to combine the above features to give
     645             :                 // a sigmoid score for flatness. If the input was normalized to [0,100]
     646             :                 // the magnitude of these values would be close to 1 (e.g., weights
     647             :                 // corresponding to variance would be a factor of 10000x smaller).
     648             :                 // The weights are given in the following order:
     649             :                 //    [{var}, {ratio}, {trace}, {norm}, offset]
     650             :                 // with one of the most discriminative being simply the variance.
     651             :                 const double weights[5] = { -6682, -0.2056, 13087, -12434, 2.5694 };
     652           0 :                 const float score =
     653           0 :                     (float)(1.0 / (1 + exp(-(weights[0] * var + weights[1] * ratio +
     654           0 :                         weights[2] * trace + weights[3] * norm +
     655           0 :                         weights[4]))));
     656           0 :                 flat_blocks[by * num_blocks_w + bx] = is_flat ? 255 : 0;
     657           0 :                 scores[by * num_blocks_w + bx].score = var > kVarThreshold ? score : 0;
     658           0 :                 scores[by * num_blocks_w + bx].index = by * num_blocks_w + bx;
     659             : #ifdef NOISE_MODEL_LOG_SCORE
     660             :                 fprintf(stderr, "%g %g %g %g %g %d ", score, var, ratio, trace, norm,
     661             :                     is_flat);
     662             : #endif
     663           0 :                 num_flat += is_flat;
     664             :             }
     665             :         }
     666             : #ifdef NOISE_MODEL_LOG_SCORE
     667             :         fprintf(stderr, "\n");
     668             : #endif
     669             :     }
     670             : #ifdef NOISE_MODEL_LOG_SCORE
     671             :     fprintf(stderr, "];\n");
     672             : #endif
     673             :     // Find the top-scored blocks (most likely to be flat) and set the flat blocks
     674             :     // be the union of the thresholded results and the top 10th percentile of the
     675             :     // scored results.
     676           0 :     qsort(scores, num_blocks_w * num_blocks_h, sizeof(*scores), &compare_scores);
     677           0 :     const int32_t top_nth_percentile = num_blocks_w * num_blocks_h * 90 / 100;
     678           0 :     const float score_threshold = scores[top_nth_percentile].score;
     679           0 :     for (int32_t i = 0; i < num_blocks_w * num_blocks_h; ++i) {
     680           0 :         if (scores[i].score >= score_threshold) {
     681           0 :             num_flat += flat_blocks[scores[i].index] == 0;
     682           0 :             flat_blocks[scores[i].index] |= 1;
     683             :         }
     684             :     }
     685           0 :     free(block);
     686           0 :     free(plane);
     687           0 :     free(scores);
     688           0 :     return num_flat;
     689             : }
     690             : 
     691           0 : int32_t eb_aom_noise_model_init(aom_noise_model_t *model,
     692             :     const aom_noise_model_params_t params) {
     693           0 :     const int32_t n = num_coeffs(params);
     694           0 :     const int32_t lag = params.lag;
     695           0 :     const int32_t bit_depth = params.bit_depth;
     696           0 :     int32_t x = 0, y = 0, i = 0, c = 0;
     697             : 
     698           0 :     memset(model, 0, sizeof(*model));
     699           0 :     if (params.lag < 1) {
     700           0 :         fprintf(stderr, "Invalid noise param: lag = %d must be >= 1\n", params.lag);
     701           0 :         return 0;
     702             :     }
     703           0 :     if (params.lag > kMaxLag) {
     704           0 :         fprintf(stderr, "Invalid noise param: lag = %d must be <= %d\n", params.lag,
     705             :             kMaxLag);
     706           0 :         return 0;
     707             :     }
     708             : 
     709           0 :     memcpy(&model->params, &params, sizeof(params));
     710           0 :     for (c = 0; c < 3; ++c) {
     711           0 :         if (!noise_state_init(&model->combined_state[c], n + (c > 0), bit_depth)) {
     712           0 :             fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
     713           0 :             eb_aom_noise_model_free(model);
     714           0 :             return 0;
     715             :         }
     716           0 :         if (!noise_state_init(&model->latest_state[c], n + (c > 0), bit_depth)) {
     717           0 :             fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
     718           0 :             eb_aom_noise_model_free(model);
     719           0 :             return 0;
     720             :         }
     721             :     }
     722           0 :     model->n = n;
     723           0 :     model->coords = (int32_t(*)[2])malloc(sizeof(*model->coords) * n);
     724             :     ASSERT(model->coords != NULL);
     725           0 :     for (y = -lag; y <= 0; ++y) {
     726           0 :         const int32_t max_x = y == 0 ? -1 : lag;
     727           0 :         for (x = -lag; x <= max_x; ++x) {
     728           0 :             switch (params.shape) {
     729           0 :             case AOM_NOISE_SHAPE_DIAMOND:
     730           0 :                 if (abs(x) <= y + lag) {
     731           0 :                     model->coords[i][0] = x;
     732           0 :                     model->coords[i][1] = y;
     733           0 :                     ++i;
     734             :                 }
     735           0 :                 break;
     736           0 :             case AOM_NOISE_SHAPE_SQUARE:
     737           0 :                 model->coords[i][0] = x;
     738           0 :                 model->coords[i][1] = y;
     739           0 :                 ++i;
     740           0 :                 break;
     741           0 :             default:
     742           0 :                 fprintf(stderr, "Invalid shape\n");
     743           0 :                 eb_aom_noise_model_free(model);
     744           0 :                 return 0;
     745             :             }
     746             :         }
     747             :     }
     748           0 :     assert(i == n);
     749           0 :     return 1;
     750             : }
     751             : 
     752           0 : void eb_aom_noise_model_free(aom_noise_model_t *model) {
     753           0 :     int32_t c = 0;
     754           0 :     if (!model) return;
     755             : 
     756           0 :     free(model->coords);
     757           0 :     for (c = 0; c < 3; ++c) {
     758           0 :         equation_system_free(&model->latest_state[c].eqns);
     759           0 :         equation_system_free(&model->combined_state[c].eqns);
     760             : 
     761           0 :         equation_system_free(&model->latest_state[c].strength_solver.eqns);
     762           0 :         equation_system_free(&model->combined_state[c].strength_solver.eqns);
     763             :     }
     764           0 :     memset(model, 0, sizeof(*model));
     765             : }
     766             : 
     767             : // Extracts the neighborhood defined by coords around point (x, y) from
     768             : // the difference between the data and denoised images. Also extracts the
     769             : // entry (possibly downsampled) for (x, y) in the alt_data (e.g., luma).
     770             : #define EXTRACT_AR_ROW(INT_TYPE, suffix)                                   \
     771             :   static double extract_ar_row_##suffix(                                   \
     772             :       int32_t(*coords)[2], int32_t num_coords, const INT_TYPE *const data,         \
     773             :       const INT_TYPE *const denoised, int32_t stride, int32_t sub_log2[2],         \
     774             :       const INT_TYPE *const alt_data, const INT_TYPE *const alt_denoised,  \
     775             :       int32_t alt_stride, int32_t x, int32_t y, double *buffer) {                      \
     776             :     for (int32_t i = 0; i < num_coords; ++i) {                                 \
     777             :       const int32_t x_i = x + coords[i][0], y_i = y + coords[i][1];            \
     778             :       buffer[i] =                                                          \
     779             :           (double)data[y_i * stride + x_i] - denoised[y_i * stride + x_i]; \
     780             :     }                                                                      \
     781             :     const double val =                                                     \
     782             :         (double)data[y * stride + x] - denoised[y * stride + x];           \
     783             :                                                                            \
     784             :     if (alt_data && alt_denoised) {                                        \
     785             :       double avg_data = 0, avg_denoised = 0;                               \
     786             :       int32_t num_samples = 0;                                                 \
     787             :       for (int32_t dy_i = 0; dy_i < (1 << sub_log2[1]); dy_i++) {              \
     788             :         const int32_t y_up = (y << sub_log2[1]) + dy_i;                        \
     789             :         for (int32_t dx_i = 0; dx_i < (1 << sub_log2[0]); dx_i++) {            \
     790             :           const int32_t x_up = (x << sub_log2[0]) + dx_i;                      \
     791             :           avg_data += alt_data[y_up * alt_stride + x_up];                  \
     792             :           avg_denoised += alt_denoised[y_up * alt_stride + x_up];          \
     793             :           num_samples++;                                                   \
     794             :         }                                                                  \
     795             :       }                                                                    \
     796             :       buffer[num_coords] = (avg_data - avg_denoised) / num_samples;        \
     797             :     }                                                                      \
     798             :     return val;                                                            \
     799             :   }
     800             : 
     801           0 : EXTRACT_AR_ROW(uint8_t, lowbd);
     802           0 : EXTRACT_AR_ROW(uint16_t, highbd);
     803             : 
     804           0 : static int32_t add_block_observations(
     805             :     aom_noise_model_t *noise_model, int32_t c, const uint8_t *const data,
     806             :     const uint8_t *const denoised, int32_t w, int32_t h, int32_t stride, int32_t sub_log2[2],
     807             :     const uint8_t *const alt_data, const uint8_t *const alt_denoised,
     808             :     int32_t alt_stride, const uint8_t *const flat_blocks, int32_t block_size,
     809             :     int32_t num_blocks_w, int32_t num_blocks_h) {
     810           0 :     const int32_t lag = noise_model->params.lag;
     811           0 :     const int32_t num_coords = noise_model->n;
     812           0 :     const double normalization = (1 << noise_model->params.bit_depth) - 1;
     813           0 :     double *A = noise_model->latest_state[c].eqns.A;
     814           0 :     double *b = noise_model->latest_state[c].eqns.b;
     815           0 :     double *buffer = (double *)malloc(sizeof(*buffer) * (num_coords + 1));
     816           0 :     const int32_t n = noise_model->latest_state[c].eqns.n;
     817             : 
     818           0 :     if (!buffer) {
     819           0 :         fprintf(stderr, "Unable to allocate buffer of size %d\n", num_coords + 1);
     820           0 :         return 0;
     821             :     }
     822           0 :     for (int32_t by = 0; by < num_blocks_h; ++by) {
     823           0 :         const int32_t y_o = by * (block_size >> sub_log2[1]);
     824           0 :         for (int32_t bx = 0; bx < num_blocks_w; ++bx) {
     825           0 :             const int32_t x_o = bx * (block_size >> sub_log2[0]);
     826           0 :             if (!flat_blocks[by * num_blocks_w + bx])
     827           0 :                 continue;
     828           0 :             int32_t y_start =
     829           0 :                 (by > 0 && flat_blocks[(by - 1) * num_blocks_w + bx]) ? 0 : lag;
     830           0 :             int32_t x_start =
     831           0 :                 (bx > 0 && flat_blocks[by * num_blocks_w + bx - 1]) ? 0 : lag;
     832           0 :             int32_t y_end = AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
     833             :                 block_size >> sub_log2[1]);
     834           0 :             int32_t x_end = AOMMIN(
     835             :                 (w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]) - lag,
     836             :                 (bx + 1 < num_blocks_w && flat_blocks[by * num_blocks_w + bx + 1])
     837             :                 ? (block_size >> sub_log2[0])
     838             :                 : ((block_size >> sub_log2[0]) - lag));
     839           0 :             for (int32_t y = y_start; y < y_end; ++y) {
     840           0 :                 for (int32_t x = x_start; x < x_end; ++x) {
     841           0 :                     const double val =
     842           0 :                         noise_model->params.use_highbd
     843           0 :                         ? extract_ar_row_highbd(noise_model->coords, num_coords,
     844             :                         (const uint16_t *const)data,
     845             :                             (const uint16_t *const)denoised,
     846             :                             stride, sub_log2,
     847             :                             (const uint16_t *const)alt_data,
     848             :                             (const uint16_t *const)alt_denoised,
     849             :                             alt_stride, x + x_o, y + y_o, buffer)
     850           0 :                         : extract_ar_row_lowbd(noise_model->coords, num_coords, data,
     851             :                             denoised, stride, sub_log2, alt_data,
     852             :                             alt_denoised, alt_stride, x + x_o,
     853             :                             y + y_o, buffer);
     854           0 :                     for (int32_t i = 0; i < n; ++i) {
     855           0 :                         for (int32_t j = 0; j < n; ++j) {
     856           0 :                             A[i * n + j] +=
     857           0 :                                 (buffer[i] * buffer[j]) / (normalization * normalization);
     858             :                         }
     859           0 :                         b[i] += (buffer[i] * val) / (normalization * normalization);
     860             :                     }
     861           0 :                     noise_model->latest_state[c].num_observations++;
     862             :                 }
     863             :             }
     864             :         }
     865             :     }
     866           0 :     free(buffer);
     867           0 :     return 1;
     868             : }
     869             : 
     870           0 : static void add_noise_std_observations(
     871             :     aom_noise_model_t *noise_model, int32_t c, const double *coeffs,
     872             :     const uint8_t *const data, const uint8_t *const denoised, int32_t w, int32_t h,
     873             :     int32_t stride, int32_t sub_log2[2], const uint8_t *const alt_data, int32_t alt_stride,
     874             :     const uint8_t *const flat_blocks, int32_t block_size, int32_t num_blocks_w,
     875             :     int32_t num_blocks_h) {
     876           0 :     const int32_t num_coords = noise_model->n;
     877           0 :     aom_noise_strength_solver_t *noise_strength_solver =
     878             :         &noise_model->latest_state[c].strength_solver;
     879             : 
     880           0 :     const aom_noise_strength_solver_t *noise_strength_luma =
     881             :         &noise_model->latest_state[0].strength_solver;
     882           0 :     const double luma_gain = noise_model->latest_state[0].ar_gain;
     883           0 :     const double noise_gain = noise_model->latest_state[c].ar_gain;
     884           0 :     for (int32_t by = 0; by < num_blocks_h; ++by) {
     885           0 :         const int32_t y_o = by * (block_size >> sub_log2[1]);
     886           0 :         for (int32_t bx = 0; bx < num_blocks_w; ++bx) {
     887           0 :             const int32_t x_o = bx * (block_size >> sub_log2[0]);
     888           0 :             if (!flat_blocks[by * num_blocks_w + bx])
     889           0 :                 continue;
     890           0 :             const int32_t num_samples_h =
     891           0 :                 AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
     892             :                     block_size >> sub_log2[1]);
     893           0 :             const int32_t num_samples_w =
     894           0 :                 AOMMIN((w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]),
     895             :                 (block_size >> sub_log2[0]));
     896             :             // Make sure that we have a reasonable amount of samples to consider the
     897             :             // block
     898           0 :             if (num_samples_w * num_samples_h > block_size) {
     899           0 :                 const double block_mean = get_block_mean(
     900             :                     alt_data ? alt_data : data, w, h, alt_data ? alt_stride : stride,
     901           0 :                     x_o << sub_log2[0], y_o << sub_log2[1], block_size,
     902             :                     noise_model->params.use_highbd);
     903           0 :                 const double noise_var = get_noise_var(
     904           0 :                     data, denoised, stride, w >> sub_log2[0], h >> sub_log2[1], x_o,
     905           0 :                     y_o, block_size >> sub_log2[0], block_size >> sub_log2[1],
     906             :                     noise_model->params.use_highbd);
     907             :                 // We want to remove the part of the noise that came from being
     908             :                 // correlated with luma. Note that the noise solver for luma must
     909             :                 // have already been run.
     910           0 :                 const double luma_strength =
     911           0 :                     c > 0 ? luma_gain * noise_strength_solver_get_value(
     912             :                         noise_strength_luma, block_mean)
     913           0 :                     : 0;
     914           0 :                 const double corr = c > 0 ? coeffs[num_coords] : 0;
     915             :                 // Chroma noise:
     916             :                 //    N(0, noise_var) = N(0, uncorr_var) + corr * N(0, luma_strength^2)
     917             :                 // The uncorrelated component:
     918             :                 //   uncorr_var = noise_var - (corr * luma_strength)^2
     919             :                 // But don't allow fully correlated noise (hence the max), since the
     920             :                 // synthesis cannot model it.
     921           0 :                 const double uncorr_std = sqrt(
     922           0 :                     AOMMAX(noise_var / 16, noise_var - pow(corr * luma_strength, 2)));
     923             :                 // After we've removed correlation with luma, undo the gain that will
     924             :                 // come from running the IIR filter.
     925           0 :                 const double adjusted_strength = uncorr_std / noise_gain;
     926           0 :                 eb_aom_noise_strength_solver_add_measurement(
     927             :                     noise_strength_solver, block_mean, adjusted_strength);
     928             :             }
     929             :         }
     930             :     }
     931           0 : }
     932             : 
     933             : // Return true if the noise estimate appears to be different from the combined
     934             : // (multi-frame) estimate. The difference is measured by checking whether the
     935             : // AR coefficients have diverged (using a threshold on normalized cross
     936             : // correlation), or whether the noise strength has changed.
     937             : /*
     938             : static int32_t is_noise_model_different(aom_noise_model_t *const noise_model) {
     939             :   // These thresholds are kind of arbitrary and will likely need further tuning
     940             :   // (or exported as parameters). The threshold on noise strength is a weighted
     941             :   // difference between the noise strength histograms
     942             :   const double kCoeffThreshold = 0.9;
     943             :   const double kStrengthThreshold =
     944             :       0.005 * (1 << (noise_model->params.bit_depth - 8));
     945             :   for (int32_t c = 0; c < 1; ++c) {
     946             :     const double corr =
     947             :         eb_aom_normalized_cross_correlation(noise_model->latest_state[c].eqns.x,
     948             :                                          noise_model->combined_state[c].eqns.x,
     949             :                                          noise_model->combined_state[c].eqns.n);
     950             :     if (corr < kCoeffThreshold) return 1;
     951             : 
     952             :     const double dx =
     953             :         1.0 / noise_model->latest_state[c].strength_solver.num_bins;
     954             : 
     955             :     const aom_equation_system_t *latest_eqns =
     956             :         &noise_model->latest_state[c].strength_solver.eqns;
     957             :     const aom_equation_system_t *combined_eqns =
     958             :         &noise_model->combined_state[c].strength_solver.eqns;
     959             :     double diff = 0;
     960             :     double total_weight = 0;
     961             :     for (int32_t j = 0; j < latest_eqns->n; ++j) {
     962             :       double weight = 0;
     963             :       for (int32_t i = 0; i < latest_eqns->n; ++i)
     964             :         weight += latest_eqns->A[i * latest_eqns->n + j];
     965             :       weight = sqrt(weight);
     966             :       diff += weight * fabs(latest_eqns->x[j] - combined_eqns->x[j]);
     967             :       total_weight += weight;
     968             :     }
     969             :     if (diff * dx / total_weight > kStrengthThreshold) return 1;
     970             :   }
     971             :   return 0;
     972             : }
     973             : */
     974             : 
     975           0 : static int32_t ar_equation_system_solve(aom_noise_state_t *state, int32_t is_chroma) {
     976           0 :     const int32_t ret = equation_system_solve(&state->eqns);
     977           0 :     state->ar_gain = 1.0;
     978           0 :     if (!ret) return ret;
     979             : 
     980             :     // Update the AR gain from the equation system as it will be used to fit
     981             :     // the noise strength as a function of intensity.  In the Yule-Walker
     982             :     // equations, the diagonal should be the variance of the correlated noise.
     983             :     // In the case of the least squares estimate, there will be some variability
     984             :     // in the diagonal. So use the mean of the diagonal as the estimate of
     985             :     // overall variance (this works for least squares or Yule-Walker formulation).
     986           0 :     double var = 0;
     987           0 :     const int32_t n = state->eqns.n;
     988           0 :     for (int32_t i = 0; i < (state->eqns.n - is_chroma); ++i)
     989           0 :         var += state->eqns.A[i * n + i] / state->num_observations;
     990           0 :     var /= (n - is_chroma);
     991             : 
     992             :     // Keep track of E(Y^2) = <b, x> + E(X^2)
     993             :     // In the case that we are using chroma and have an estimate of correlation
     994             :     // with luma we adjust that estimate slightly to remove the correlated bits by
     995             :     // subtracting out the last column of a scaled by our correlation estimate
     996             :     // from b. E(y^2) = <b - A(:, end)*x(end), x>
     997           0 :     double sum_covar = 0;
     998           0 :     for (int32_t i = 0; i < state->eqns.n - is_chroma; ++i) {
     999           0 :         double bi = state->eqns.b[i];
    1000           0 :         if (is_chroma)
    1001           0 :             bi -= state->eqns.A[i * n + (n - 1)] * state->eqns.x[n - 1];
    1002           0 :         sum_covar += (bi * state->eqns.x[i]) / state->num_observations;
    1003             :     }
    1004             :     // Now, get an estimate of the variance of uncorrelated noise signal and use
    1005             :     // it to determine the gain of the AR filter.
    1006           0 :     const double noise_var = AOMMAX(var - sum_covar, 1e-6);
    1007           0 :     state->ar_gain = AOMMAX(1, sqrt(AOMMAX(var / noise_var, 1e-6)));
    1008           0 :     return ret;
    1009             : }
    1010             : 
    1011           0 : aom_noise_status_t eb_aom_noise_model_update(
    1012             :     aom_noise_model_t *const noise_model, const uint8_t *const data[3],
    1013             :     const uint8_t *const denoised[3], int32_t w, int32_t h, int32_t stride[3],
    1014             :     int32_t chroma_sub_log2[2], const uint8_t *const flat_blocks, int32_t block_size) {
    1015           0 :     const int32_t num_blocks_w = (w + block_size - 1) / block_size;
    1016           0 :     const int32_t num_blocks_h = (h + block_size - 1) / block_size;
    1017             :     //  int32_t y_model_different = 0;
    1018           0 :     int32_t num_blocks = 0;
    1019           0 :     int32_t i = 0, channel = 0;
    1020             : 
    1021           0 :     if (block_size <= 1) {
    1022           0 :         fprintf(stderr, "BlockSize = %d must be > 1\n", block_size);
    1023           0 :         return AOM_NOISE_STATUS_INVALID_ARGUMENT;
    1024             :     }
    1025             : 
    1026           0 :     if (block_size < noise_model->params.lag * 2 + 1) {
    1027           0 :         fprintf(stderr, "BlockSize = %d must be >= %d\n", block_size,
    1028           0 :             noise_model->params.lag * 2 + 1);
    1029           0 :         return AOM_NOISE_STATUS_INVALID_ARGUMENT;
    1030             :     }
    1031             : 
    1032             :     // Clear the latest equation system
    1033           0 :     for (i = 0; i < 3; ++i) {
    1034           0 :         equation_system_clear(&noise_model->latest_state[i].eqns);
    1035           0 :         noise_model->latest_state[i].num_observations = 0;
    1036           0 :         noise_strength_solver_clear(&noise_model->latest_state[i].strength_solver);
    1037             :     }
    1038             : 
    1039             :     // Check that we have enough flat blocks
    1040           0 :     for (i = 0; i < num_blocks_h * num_blocks_w; ++i) {
    1041           0 :         if (flat_blocks[i])
    1042           0 :             num_blocks++;
    1043             :     }
    1044             : 
    1045           0 :     if (num_blocks <= 1) {
    1046           0 :         fprintf(stderr, "Not enough flat blocks to update noise estimate\n");
    1047           0 :         return AOM_NOISE_STATUS_INSUFFICIENT_FLAT_BLOCKS;
    1048             :     }
    1049             : 
    1050           0 :     for (channel = 0; channel < 3; ++channel) {
    1051           0 :         int32_t no_subsampling[2] = { 0, 0 };
    1052           0 :         const uint8_t *alt_data = channel > 0 ? data[0] : 0;
    1053           0 :         const uint8_t *alt_denoised = channel > 0 ? denoised[0] : 0;
    1054           0 :         int32_t *sub = channel > 0 ? chroma_sub_log2 : no_subsampling;
    1055           0 :         const int32_t is_chroma = channel != 0;
    1056           0 :         if (!data[channel] || !denoised[channel]) break;
    1057           0 :         if (!add_block_observations(noise_model, channel, data[channel],
    1058           0 :             denoised[channel], w, h, stride[channel], sub,
    1059             :             alt_data, alt_denoised, stride[0], flat_blocks,
    1060             :             block_size, num_blocks_w, num_blocks_h)) {
    1061           0 :             fprintf(stderr, "Adding block observation failed\n");
    1062           0 :             return AOM_NOISE_STATUS_INTERNAL_ERROR;
    1063             :         }
    1064             : 
    1065           0 :         if (!ar_equation_system_solve(&noise_model->latest_state[channel],
    1066             :             is_chroma)) {
    1067           0 :             if (is_chroma) {
    1068           0 :                 set_chroma_coefficient_fallback_soln(
    1069             :                     &noise_model->latest_state[channel].eqns);
    1070             :             }
    1071             :             else {
    1072           0 :                 fprintf(stderr, "Solving latest noise equation system failed %d!\n",
    1073             :                     channel);
    1074           0 :                 return AOM_NOISE_STATUS_INTERNAL_ERROR;
    1075             :             }
    1076             :         }
    1077             : 
    1078           0 :         add_noise_std_observations(
    1079           0 :             noise_model, channel, noise_model->latest_state[channel].eqns.x,
    1080           0 :             data[channel], denoised[channel], w, h, stride[channel], sub, alt_data,
    1081             :             stride[0], flat_blocks, block_size, num_blocks_w, num_blocks_h);
    1082             : 
    1083           0 :         if (!eb_aom_noise_strength_solver_solve(
    1084             :             &noise_model->latest_state[channel].strength_solver)) {
    1085           0 :             fprintf(stderr, "Solving latest noise strength failed!\n");
    1086           0 :             return AOM_NOISE_STATUS_INTERNAL_ERROR;
    1087             :         }
    1088             : 
    1089             :         // Check noise characteristics and return if error.
    1090             :     //    if (channel == 0 &&
    1091             :     //        noise_model->combined_state[channel].strength_solver.num_equations >
    1092             :     //            0 &&
    1093             :     //        is_noise_model_different(noise_model)) {
    1094             :     //      y_model_different = 1;
    1095             :     //    }
    1096             : 
    1097           0 :         noise_model->combined_state[channel].num_observations =
    1098           0 :             noise_model->latest_state[channel].num_observations;
    1099           0 :         equation_system_copy(&noise_model->combined_state[channel].eqns,
    1100           0 :             &noise_model->latest_state[channel].eqns);
    1101           0 :         if (!ar_equation_system_solve(&noise_model->combined_state[channel],
    1102             :             is_chroma)) {
    1103           0 :             if (is_chroma) {
    1104           0 :                 set_chroma_coefficient_fallback_soln(
    1105             :                     &noise_model->combined_state[channel].eqns);
    1106             :             }
    1107             :             else {
    1108           0 :                 fprintf(stderr, "Solving combined noise equation system failed %d!\n",
    1109             :                     channel);
    1110           0 :                 return AOM_NOISE_STATUS_INTERNAL_ERROR;
    1111             :             }
    1112             :         }
    1113             : 
    1114           0 :         noise_strength_solver_copy(
    1115             :             &noise_model->combined_state[channel].strength_solver,
    1116             :             &noise_model->latest_state[channel].strength_solver);
    1117             : 
    1118           0 :         if (!eb_aom_noise_strength_solver_solve(
    1119             :             &noise_model->combined_state[channel].strength_solver)) {
    1120           0 :             fprintf(stderr, "Solving combined noise strength failed!\n");
    1121           0 :             return AOM_NOISE_STATUS_INTERNAL_ERROR;
    1122             :         }
    1123             :     }
    1124             : 
    1125           0 :     return AOM_NOISE_STATUS_OK;
    1126             : }
    1127             : 
    1128           0 : void eb_aom_noise_model_save_latest(aom_noise_model_t *noise_model) {
    1129           0 :     for (int32_t c = 0; c < 3; c++) {
    1130           0 :         equation_system_copy(&noise_model->combined_state[c].eqns,
    1131           0 :             &noise_model->latest_state[c].eqns);
    1132           0 :         equation_system_copy(&noise_model->combined_state[c].strength_solver.eqns,
    1133           0 :             &noise_model->latest_state[c].strength_solver.eqns);
    1134           0 :         noise_model->combined_state[c].strength_solver.num_equations =
    1135           0 :             noise_model->latest_state[c].strength_solver.num_equations;
    1136           0 :         noise_model->combined_state[c].num_observations =
    1137           0 :             noise_model->latest_state[c].num_observations;
    1138           0 :         noise_model->combined_state[c].ar_gain =
    1139           0 :             noise_model->latest_state[c].ar_gain;
    1140             :     }
    1141           0 : }
    1142             : 
    1143           0 : int32_t eb_aom_noise_model_get_grain_parameters(aom_noise_model_t *const noise_model,
    1144             :     aom_film_grain_t *film_grain) {
    1145           0 :     if (noise_model->params.lag > 3) {
    1146           0 :         fprintf(stderr, "params.lag = %d > 3\n", noise_model->params.lag);
    1147           0 :         return 0;
    1148             :     }
    1149           0 :     uint16_t random_seed = film_grain->random_seed;
    1150           0 :     memset(film_grain, 0, sizeof(*film_grain));
    1151           0 :     film_grain->random_seed = random_seed;
    1152             : 
    1153           0 :     film_grain->apply_grain = 1;
    1154           0 :     film_grain->update_parameters = 1;
    1155             : 
    1156           0 :     film_grain->ar_coeff_lag = noise_model->params.lag;
    1157             : 
    1158             :     // Convert the scaling functions to 8 bit values
    1159             :     aom_noise_strength_lut_t scaling_points[3];
    1160           0 :     eb_aom_noise_strength_solver_fit_piecewise(
    1161           0 :         &noise_model->combined_state[0].strength_solver, 14, scaling_points + 0);
    1162           0 :     eb_aom_noise_strength_solver_fit_piecewise(
    1163           0 :         &noise_model->combined_state[1].strength_solver, 10, scaling_points + 1);
    1164           0 :     eb_aom_noise_strength_solver_fit_piecewise(
    1165           0 :         &noise_model->combined_state[2].strength_solver, 10, scaling_points + 2);
    1166             : 
    1167             :     // Both the domain and the range of the scaling functions in the film_grain
    1168             :     // are normalized to 8-bit (e.g., they are implicitly scaled during grain
    1169             :     // synthesis).
    1170           0 :     const double strength_divisor = 1 << (noise_model->params.bit_depth - 8);
    1171           0 :     double max_scaling_value = 1e-4;
    1172           0 :     for (int32_t c = 0; c < 3; ++c) {
    1173           0 :         for (int32_t i = 0; i < scaling_points[c].num_points; ++i) {
    1174           0 :             scaling_points[c].points[i][0] =
    1175           0 :                 AOMMIN(255, scaling_points[c].points[i][0] / strength_divisor);
    1176           0 :             scaling_points[c].points[i][1] =
    1177           0 :                 AOMMIN(255, scaling_points[c].points[i][1] / strength_divisor);
    1178           0 :             max_scaling_value =
    1179           0 :                 AOMMAX(scaling_points[c].points[i][1], max_scaling_value);
    1180             :         }
    1181             :     }
    1182             : 
    1183             :     // Scaling_shift values are in the range [8,11]
    1184             :     const int32_t max_scaling_value_log2 =
    1185           0 :         clamp((int32_t)floor(log2(max_scaling_value) + 1), 2, 5);
    1186           0 :     film_grain->scaling_shift = 5 + (8 - max_scaling_value_log2);
    1187             : 
    1188           0 :     const double scale_factor = 1 << (8 - max_scaling_value_log2);
    1189           0 :     film_grain->num_y_points = scaling_points[0].num_points;
    1190           0 :     film_grain->num_cb_points = scaling_points[1].num_points;
    1191           0 :     film_grain->num_cr_points = scaling_points[2].num_points;
    1192             : 
    1193           0 :     int32_t(*film_grain_scaling[3])[2] = {
    1194           0 :       film_grain->scaling_points_y,
    1195           0 :       film_grain->scaling_points_cb,
    1196           0 :       film_grain->scaling_points_cr,
    1197             :     };
    1198           0 :     for (int32_t c = 0; c < 3; c++) {
    1199           0 :         for (int32_t i = 0; i < scaling_points[c].num_points; ++i) {
    1200           0 :             film_grain_scaling[c][i][0] = (int32_t)(scaling_points[c].points[i][0] + 0.5);
    1201           0 :             film_grain_scaling[c][i][1] = clamp(
    1202           0 :                 (int32_t)(scale_factor * scaling_points[c].points[i][1] + 0.5), 0, 255);
    1203             :         }
    1204             :     }
    1205           0 :     eb_aom_noise_strength_lut_free(scaling_points + 0);
    1206           0 :     eb_aom_noise_strength_lut_free(scaling_points + 1);
    1207           0 :     eb_aom_noise_strength_lut_free(scaling_points + 2);
    1208             : 
    1209             :     // Convert the ar_coeffs into 8-bit values
    1210           0 :     const int32_t n_coeff = noise_model->combined_state[0].eqns.n;
    1211           0 :     double max_coeff = 1e-4, min_coeff = -1e-4;
    1212           0 :     double y_corr[2] = { 0, 0 };
    1213           0 :     double avg_luma_strength = 0;
    1214           0 :     for (int32_t c = 0; c < 3; c++) {
    1215           0 :         aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
    1216           0 :         for (int32_t i = 0; i < n_coeff; ++i) {
    1217           0 :             max_coeff = AOMMAX(max_coeff, eqns->x[i]);
    1218           0 :             min_coeff = AOMMIN(min_coeff, eqns->x[i]);
    1219             :         }
    1220             :         // Since the correlation between luma/chroma was computed in an already
    1221             :         // scaled space, we adjust it in the un-scaled space.
    1222           0 :         aom_noise_strength_solver_t *solver =
    1223             :             &noise_model->combined_state[c].strength_solver;
    1224             :         // Compute a weighted average of the strength for the channel.
    1225           0 :         double average_strength = 0, total_weight = 0;
    1226           0 :         for (int32_t i = 0; i < solver->eqns.n; ++i) {
    1227           0 :             double w = 0;
    1228           0 :             for (int32_t j = 0; j < solver->eqns.n; ++j)
    1229           0 :                 w += solver->eqns.A[i * solver->eqns.n + j];
    1230           0 :             w = sqrt(w);
    1231           0 :             average_strength += solver->eqns.x[i] * w;
    1232           0 :             total_weight += w;
    1233             :         }
    1234           0 :         if (total_weight == 0)
    1235           0 :             average_strength = 1;
    1236             :         else
    1237           0 :             average_strength /= total_weight;
    1238           0 :         if (c == 0)
    1239           0 :             avg_luma_strength = average_strength;
    1240             :         else {
    1241           0 :             y_corr[c - 1] = avg_luma_strength * eqns->x[n_coeff] / average_strength;
    1242           0 :             max_coeff = AOMMAX(max_coeff, y_corr[c - 1]);
    1243           0 :             min_coeff = AOMMIN(min_coeff, y_corr[c - 1]);
    1244             :         }
    1245             :     }
    1246             :     // Shift value: AR coeffs range (values 6-9)
    1247             :     // 6: [-2, 2),  7: [-1, 1), 8: [-0.5, 0.5), 9: [-0.25, 0.25)
    1248           0 :     film_grain->ar_coeff_shift =
    1249           0 :         clamp(7 - (int32_t)AOMMAX(1 + floor(log2(max_coeff)), ceil(log2(-min_coeff))),
    1250             :             6, 9);
    1251           0 :     double scale_ar_coeff = 1 << film_grain->ar_coeff_shift;
    1252           0 :     int32_t *ar_coeffs[3] = {
    1253           0 :       film_grain->ar_coeffs_y,
    1254           0 :       film_grain->ar_coeffs_cb,
    1255           0 :       film_grain->ar_coeffs_cr,
    1256             :     };
    1257           0 :     for (int32_t c = 0; c < 3; ++c) {
    1258           0 :         aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
    1259           0 :         for (int32_t i = 0; i < n_coeff; ++i) {
    1260           0 :             ar_coeffs[c][i] =
    1261           0 :                 clamp((int32_t)round(scale_ar_coeff * eqns->x[i]), -128, 127);
    1262             :         }
    1263           0 :         if (c > 0) {
    1264           0 :             ar_coeffs[c][n_coeff] =
    1265           0 :                 clamp((int32_t)round(scale_ar_coeff * y_corr[c - 1]), -128, 127);
    1266             :         }
    1267             :     }
    1268             : 
    1269             :     // At the moment, the noise modeling code assumes that the chroma scaling
    1270             :     // functions are a function of luma.
    1271           0 :     film_grain->cb_mult = 128;       // 8 bits
    1272           0 :     film_grain->cb_luma_mult = 192;  // 8 bits
    1273           0 :     film_grain->cb_offset = 256;     // 9 bits
    1274             : 
    1275           0 :     film_grain->cr_mult = 128;       // 8 bits
    1276           0 :     film_grain->cr_luma_mult = 192;  // 8 bits
    1277           0 :     film_grain->cr_offset = 256;     // 9 bits
    1278             : 
    1279           0 :     film_grain->chroma_scaling_from_luma = 0;
    1280           0 :     film_grain->grain_scale_shift = 0;
    1281           0 :     film_grain->overlap_flag = 1;
    1282           0 :     return 1;
    1283             : }
    1284             : 
    1285           0 : static void pointwise_multiply(const float *a, float *b, int32_t n) {
    1286           0 :     for (int32_t i = 0; i < n; ++i)
    1287           0 :         b[i] *= a[i];
    1288           0 : }
    1289             : 
    1290           0 : static float *get_half_cos_window(int32_t block_size) {
    1291             :     float *window_function =
    1292           0 :         (float *)malloc(block_size * block_size * sizeof(*window_function));
    1293             :     ASSERT(window_function);
    1294           0 :     for (int32_t y = 0; y < block_size; ++y) {
    1295           0 :         const double cos_yd = cos((.5 + y) * PI / block_size - PI / 2);
    1296           0 :         for (int32_t x = 0; x < block_size; ++x) {
    1297           0 :             const double cos_xd = cos((.5 + x) * PI / block_size - PI / 2);
    1298           0 :             window_function[y * block_size + x] = (float)(cos_yd * cos_xd);
    1299             :         }
    1300             :     }
    1301           0 :     return window_function;
    1302             : }
    1303             : 
    1304             : #define DITHER_AND_QUANTIZE(INT_TYPE, suffix)                               \
    1305             :   static void dither_and_quantize_##suffix(                                 \
    1306             :       float *result, int32_t result_stride, INT_TYPE *denoised, int32_t w, int32_t h,   \
    1307             :       int32_t stride, int32_t chroma_sub_w, int32_t chroma_sub_h, int32_t block_size,       \
    1308             :       float block_normalization) {                                          \
    1309             :     for (int32_t y = 0; y < (h >> chroma_sub_h); ++y) {                         \
    1310             :       for (int32_t x = 0; x < (w >> chroma_sub_w); ++x) {                       \
    1311             :         const int32_t result_idx =                                              \
    1312             :             (y + (block_size >> chroma_sub_h)) * result_stride + x +        \
    1313             :             (block_size >> chroma_sub_w);                                   \
    1314             :         INT_TYPE new_val = (INT_TYPE)AOMMIN(                                \
    1315             :             AOMMAX(result[result_idx] * block_normalization + 0.5f, 0),     \
    1316             :             block_normalization);                                           \
    1317             :         const float err =                                                   \
    1318             :             -(((float)new_val) / block_normalization - result[result_idx]); \
    1319             :         denoised[y * stride + x] = new_val;                                 \
    1320             :         if (x + 1 < (w >> chroma_sub_w)) {                                  \
    1321             :           result[result_idx + 1] += err * 7.0f / 16.0f;                     \
    1322             :         }                                                                   \
    1323             :         if (y + 1 < (h >> chroma_sub_h)) {                                  \
    1324             :           if (x > 0) {                                                      \
    1325             :             result[result_idx + result_stride - 1] += err * 3.0f / 16.0f;   \
    1326             :           }                                                                 \
    1327             :           result[result_idx + result_stride] += err * 5.0f / 16.0f;         \
    1328             :           if (x + 1 < (w >> chroma_sub_w)) {                                \
    1329             :             result[result_idx + result_stride + 1] += err * 1.0f / 16.0f;   \
    1330             :           }                                                                 \
    1331             :         }                                                                   \
    1332             :       }                                                                     \
    1333             :     }                                                                       \
    1334             :   }
    1335             : 
    1336           0 : DITHER_AND_QUANTIZE(uint8_t, lowbd);
    1337           0 : DITHER_AND_QUANTIZE(uint16_t, highbd);
    1338             : 
    1339           0 : int32_t eb_aom_wiener_denoise_2d(const uint8_t *const data[3], uint8_t *denoised[3],
    1340             :     int32_t w, int32_t h, int32_t stride[3], int32_t chroma_sub[2],
    1341             :     float *noise_psd[3], int32_t block_size, int32_t bit_depth,
    1342             :     int32_t use_highbd) {
    1343           0 :     float *plane = NULL, *window_full = NULL,
    1344           0 :         *window_chroma = NULL;
    1345             :     DECLARE_ALIGNED(32, float, *block);
    1346           0 :     block = NULL;
    1347           0 :     double *block_d = NULL, *plane_d = NULL;
    1348           0 :     struct aom_noise_tx_t *tx_full = NULL;
    1349           0 :     struct aom_noise_tx_t *tx_chroma = NULL;
    1350           0 :     const int32_t num_blocks_w = (w + block_size - 1) / block_size;
    1351           0 :     const int32_t num_blocks_h = (h + block_size - 1) / block_size;
    1352           0 :     const int32_t result_stride = (num_blocks_w + 2) * block_size;
    1353           0 :     const int32_t result_height = (num_blocks_h + 2) * block_size;
    1354           0 :     float *result = NULL;
    1355           0 :     int32_t init_success = 1;
    1356             :     aom_flat_block_finder_t block_finder_full;
    1357             :     aom_flat_block_finder_t block_finder_chroma;
    1358           0 :     const float kBlockNormalization = (float)((1 << bit_depth) - 1);
    1359           0 :     if (chroma_sub[0] != chroma_sub[1]) {
    1360           0 :         fprintf(stderr,
    1361             :             "eb_aom_wiener_denoise_2d doesn't handle different chroma "
    1362             :             "subsampling");
    1363           0 :         return 0;
    1364             :     }
    1365           0 :     init_success &= eb_aom_flat_block_finder_init(&block_finder_full, block_size,
    1366             :         bit_depth, use_highbd);
    1367           0 :     result = (float *)malloc((num_blocks_h + 2) * block_size * result_stride *
    1368             :         sizeof(*result));
    1369           0 :     plane = (float *)malloc(block_size * block_size * sizeof(*plane));
    1370             :     block =
    1371           0 :         (float *)eb_aom_memalign(32, 2 * block_size * block_size * sizeof(*block));
    1372           0 :     block_d = (double *)malloc(block_size * block_size * sizeof(*block_d));
    1373           0 :     plane_d = (double *)malloc(block_size * block_size * sizeof(*plane_d));
    1374           0 :     window_full = get_half_cos_window(block_size);
    1375           0 :     tx_full = eb_aom_noise_tx_malloc(block_size);
    1376             : 
    1377           0 :     if (chroma_sub[0] != 0) {
    1378           0 :         init_success &= eb_aom_flat_block_finder_init(&block_finder_chroma,
    1379           0 :             block_size >> chroma_sub[0],
    1380             :             bit_depth, use_highbd);
    1381           0 :         window_chroma = get_half_cos_window(block_size >> chroma_sub[0]);
    1382           0 :         tx_chroma = eb_aom_noise_tx_malloc(block_size >> chroma_sub[0]);
    1383             :     }
    1384             :     else {
    1385           0 :         window_chroma = window_full;
    1386           0 :         tx_chroma = tx_full;
    1387             :     }
    1388             : 
    1389           0 :     init_success &= (int32_t)((tx_full != NULL) && (tx_chroma != NULL) && (plane != NULL) &&
    1390           0 :         (plane_d != NULL) && (block != NULL) && (block_d != NULL) &&
    1391           0 :         (window_full != NULL) && (window_chroma != NULL) &&
    1392             :         (result != NULL));
    1393           0 :     for (int32_t c = init_success ? 0 : 3; c < 3; ++c) {
    1394           0 :         float *window_function = c == 0 ? window_full : window_chroma;
    1395           0 :         aom_flat_block_finder_t *block_finder = &block_finder_full;
    1396           0 :         const int32_t chroma_sub_h = c > 0 ? chroma_sub[1] : 0;
    1397           0 :         const int32_t chroma_sub_w = c > 0 ? chroma_sub[0] : 0;
    1398           0 :         struct aom_noise_tx_t *tx =
    1399           0 :             (c > 0 && chroma_sub[0] > 0) ? tx_chroma : tx_full;
    1400           0 :         if (!data[c] || !denoised[c]) continue;
    1401           0 :         if (c > 0 && chroma_sub[0] != 0)
    1402           0 :             block_finder = &block_finder_chroma;
    1403           0 :         memset(result, 0, sizeof(*result) * result_stride * result_height);
    1404             :         // Do overlapped block processing (half overlapped). The block rows can
    1405             :         // easily be done in parallel
    1406           0 :         for (int32_t offsy = 0; offsy < (block_size >> chroma_sub_h);
    1407           0 :             offsy += (block_size >> chroma_sub_h) / 2) {
    1408           0 :             for (int32_t offsx = 0; offsx < (block_size >> chroma_sub_w);
    1409           0 :                 offsx += (block_size >> chroma_sub_w) / 2) {
    1410             :                 // Pad the boundary when processing each block-set.
    1411           0 :                 for (int32_t by = -1; by < num_blocks_h; ++by) {
    1412           0 :                     for (int32_t bx = -1; bx < num_blocks_w; ++bx) {
    1413           0 :                         const int32_t pixels_per_block =
    1414           0 :                             (block_size >> chroma_sub_w) * (block_size >> chroma_sub_h);
    1415           0 :                         eb_aom_flat_block_finder_extract_block(
    1416           0 :                             block_finder, data[c], w >> chroma_sub_w, h >> chroma_sub_h,
    1417           0 :                             stride[c], bx * (block_size >> chroma_sub_w) + offsx,
    1418           0 :                             by * (block_size >> chroma_sub_h) + offsy, plane_d, block_d);
    1419           0 :                         for (int32_t j = 0; j < pixels_per_block; ++j) {
    1420           0 :                             block[j] = (float)block_d[j];
    1421           0 :                             plane[j] = (float)plane_d[j];
    1422             :                         }
    1423           0 :                         pointwise_multiply(window_function, block, pixels_per_block);
    1424           0 :                         eb_aom_noise_tx_forward(tx, block);
    1425           0 :                         eb_aom_noise_tx_filter(tx, noise_psd[c]);
    1426           0 :                         eb_aom_noise_tx_inverse(tx, block);
    1427             : 
    1428             :                         // Apply window function to the plane approximation (we will apply
    1429             :                         // it to the sum of plane + block when composing the results).
    1430           0 :                         pointwise_multiply(window_function, plane, pixels_per_block);
    1431             : 
    1432           0 :                         for (int32_t y = 0; y < (block_size >> chroma_sub_h); ++y) {
    1433           0 :                             const int32_t y_result =
    1434           0 :                                 y + (by + 1) * (block_size >> chroma_sub_h) + offsy;
    1435           0 :                             for (int32_t x = 0; x < (block_size >> chroma_sub_w); ++x) {
    1436           0 :                                 const int32_t x_result =
    1437           0 :                                     x + (bx + 1) * (block_size >> chroma_sub_w) + offsx;
    1438           0 :                                 result[y_result * result_stride + x_result] +=
    1439           0 :                                     (block[y * (block_size >> chroma_sub_w) + x] +
    1440           0 :                                         plane[y * (block_size >> chroma_sub_w) + x]) *
    1441           0 :                                     window_function[y * (block_size >> chroma_sub_w) + x];
    1442             :                             }
    1443             :                         }
    1444             :                     }
    1445             :                 }
    1446             :             }
    1447             :         }
    1448           0 :         if (use_highbd) {
    1449           0 :             dither_and_quantize_highbd(result, result_stride, (uint16_t *)denoised[c],
    1450           0 :                 w, h, stride[c], chroma_sub_w, chroma_sub_h,
    1451             :                 block_size, kBlockNormalization);
    1452             :         }
    1453             :         else {
    1454           0 :             dither_and_quantize_lowbd(result, result_stride, denoised[c], w, h,
    1455           0 :                 stride[c], chroma_sub_w, chroma_sub_h,
    1456             :                 block_size, kBlockNormalization);
    1457             :         }
    1458             :     }
    1459           0 :     free(result);
    1460           0 :     free(plane);
    1461           0 :     eb_aom_free(block);
    1462           0 :     free(plane_d);
    1463           0 :     free(block_d);
    1464           0 :     free(window_full);
    1465             : 
    1466           0 :     eb_aom_noise_tx_free(tx_full);
    1467             : 
    1468           0 :     eb_aom_flat_block_finder_free(&block_finder_full);
    1469           0 :     if (chroma_sub[0] != 0) {
    1470           0 :         eb_aom_flat_block_finder_free(&block_finder_chroma);
    1471           0 :         free(window_chroma);
    1472           0 :         eb_aom_noise_tx_free(tx_chroma);
    1473             :     }
    1474           0 :     return init_success;
    1475             : }
    1476             : 
    1477           0 : EbErrorType eb_aom_denoise_and_model_alloc(aom_denoise_and_model_t *ctx,
    1478             :     int32_t bit_depth,
    1479             :     int32_t block_size,
    1480             :     float noise_level) {
    1481             : 
    1482           0 :     ctx->block_size = block_size;
    1483           0 :     ctx->noise_level = noise_level;
    1484           0 :     ctx->bit_depth = bit_depth;
    1485             : 
    1486           0 :     EB_MALLOC_ARRAY(ctx->noise_psd[0], block_size * block_size);
    1487           0 :     EB_MALLOC_ARRAY(ctx->noise_psd[1], block_size * block_size);
    1488           0 :     EB_MALLOC_ARRAY(ctx->noise_psd[2], block_size * block_size);
    1489           0 :     return EB_ErrorNone;
    1490             : }
    1491             : 
    1492           0 : void denoise_and_model_dctor(EbPtr p) {
    1493           0 :     aom_denoise_and_model_t *obj = (aom_denoise_and_model_t*)p;
    1494             : 
    1495           0 :     free(obj->flat_blocks);
    1496           0 :     for (int32_t i = 0; i < 3; ++i) {
    1497           0 :         EB_FREE_ARRAY(obj->denoised[i]);
    1498           0 :         EB_FREE_ARRAY(obj->noise_psd[i]);
    1499           0 :         EB_FREE_ARRAY(obj->packed[i]);
    1500             :     }
    1501           0 :     eb_aom_noise_model_free(&obj->noise_model);
    1502           0 :     eb_aom_flat_block_finder_free(&obj->flat_block_finder);
    1503           0 : }
    1504             : 
    1505             : 
    1506           0 : EbErrorType denoise_and_model_ctor(aom_denoise_and_model_t *object_ptr,
    1507             :     EbPtr object_init_data_ptr) {
    1508           0 :     denoise_and_model_init_data_t *init_data_ptr = (denoise_and_model_init_data_t*)object_init_data_ptr;
    1509           0 :     EbErrorType return_error = EB_ErrorNone;
    1510           0 :     uint32_t use_highbd = init_data_ptr->encoder_bit_depth > EB_8BIT ? 10 : 8;
    1511             : 
    1512           0 :     int32_t chroma_sub_log2[2] = { 1, 1 };  //todo: send chroma subsampling
    1513           0 :     chroma_sub_log2[0] = (init_data_ptr->encoder_color_format == EB_YUV444 ? 1 : 2) - 1;
    1514           0 :     chroma_sub_log2[1] = (init_data_ptr->encoder_color_format >= EB_YUV422 ? 1 : 2) - 1;
    1515             : 
    1516           0 :     object_ptr->dctor = denoise_and_model_dctor;
    1517             : 
    1518           0 :     return_error = eb_aom_denoise_and_model_alloc(object_ptr,
    1519           0 :         init_data_ptr->encoder_bit_depth > EB_8BIT ? 10 : 8,
    1520             :         DENOISING_BlockSize,
    1521           0 :         (float)(init_data_ptr->noise_level / 10.0));
    1522           0 :     if (return_error != EB_ErrorNone)
    1523           0 :         return return_error;
    1524           0 :     object_ptr->width = init_data_ptr->width;
    1525           0 :     object_ptr->height = init_data_ptr->height;
    1526           0 :     object_ptr->y_stride = init_data_ptr->stride_y;
    1527           0 :     object_ptr->uv_stride = init_data_ptr->stride_cb;
    1528             : 
    1529             :     //todo: consider replacing with EbPictureBuffersDesc
    1530             : 
    1531           0 :     EB_MALLOC_ARRAY(object_ptr->denoised[0], (object_ptr->y_stride * object_ptr->height) << use_highbd);
    1532           0 :     EB_MALLOC_ARRAY(object_ptr->denoised[1], (object_ptr->uv_stride * (object_ptr->height >> chroma_sub_log2[0])) << use_highbd);
    1533           0 :     EB_MALLOC_ARRAY(object_ptr->denoised[2], (object_ptr->uv_stride * (object_ptr->height >> chroma_sub_log2[0])) << use_highbd);
    1534             : 
    1535           0 :     if (use_highbd) {
    1536           0 :         EB_MALLOC_ARRAY(object_ptr->packed[0], (object_ptr->y_stride * object_ptr->height));
    1537           0 :         EB_MALLOC_ARRAY(object_ptr->packed[1], (object_ptr->uv_stride * (object_ptr->height >> chroma_sub_log2[0])));
    1538           0 :         EB_MALLOC_ARRAY(object_ptr->packed[2], (object_ptr->uv_stride * (object_ptr->height >> chroma_sub_log2[0])));
    1539             :     }
    1540             : 
    1541           0 :     return return_error;
    1542             : }
    1543             : 
    1544           0 : static int32_t denoise_and_model_realloc_if_necessary(struct aom_denoise_and_model_t *ctx,
    1545             :     EbPictureBufferDesc *sd, int32_t use_highbd) {
    1546           0 :     int32_t chroma_sub_log2[2] = { 1, 1 };  //todo: send chroma subsampling
    1547             : 
    1548           0 :     const int32_t block_size = ctx->block_size;
    1549             : 
    1550           0 :     free(ctx->flat_blocks);
    1551           0 :     ctx->flat_blocks = NULL;
    1552             : 
    1553           0 :     ctx->num_blocks_w = (sd->width + ctx->block_size - 1) / ctx->block_size;
    1554           0 :     ctx->num_blocks_h = (sd->height + ctx->block_size - 1) / ctx->block_size;
    1555           0 :     ctx->flat_blocks = malloc(ctx->num_blocks_w * ctx->num_blocks_h);
    1556             : 
    1557           0 :     eb_aom_flat_block_finder_free(&ctx->flat_block_finder);
    1558           0 :     if (!eb_aom_flat_block_finder_init(&ctx->flat_block_finder, ctx->block_size,
    1559             :         ctx->bit_depth, use_highbd)) {
    1560           0 :         fprintf(stderr, "Unable to init flat block finder\n");
    1561           0 :         return 0;
    1562             :     }
    1563             : 
    1564           0 :     const aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3,
    1565           0 :                                               ctx->bit_depth, use_highbd };
    1566             :     //  eb_aom_noise_model_free(&ctx->noise_model);
    1567           0 :     if (!eb_aom_noise_model_init(&ctx->noise_model, params)) {
    1568           0 :         fprintf(stderr, "Unable to init noise model\n");
    1569           0 :         return 0;
    1570             :     }
    1571             : 
    1572             :     // Simply use a flat PSD (although we could use the flat blocks to estimate
    1573             :     // PSD) those to estimate an actual noise PSD)
    1574             :     const float y_noise_level =
    1575           0 :         eb_aom_noise_psd_get_default_value(ctx->block_size, ctx->noise_level);
    1576           0 :     const float uv_noise_level = eb_aom_noise_psd_get_default_value(
    1577           0 :         ctx->block_size >> chroma_sub_log2[1], ctx->noise_level);
    1578           0 :     for (int32_t i = 0; i < block_size * block_size; ++i) {
    1579           0 :         ctx->noise_psd[0][i] = y_noise_level;
    1580           0 :         ctx->noise_psd[1][i] = ctx->noise_psd[2][i] = uv_noise_level;
    1581             :     }
    1582           0 :     return 1;
    1583             : }
    1584             : 
    1585           0 : static void pack_2d_pic(EbPictureBufferDesc *inputPicture,
    1586             :     uint16_t *packed[3]) {
    1587           0 :     const uint32_t input_luma_offset = ((inputPicture->origin_y)      * inputPicture->stride_y) + (inputPicture->origin_x);
    1588           0 :     const uint32_t inputBitIncLumaOffset = ((inputPicture->origin_y)      * inputPicture->stride_bit_inc_y) + (inputPicture->origin_x);
    1589           0 :     const uint32_t input_cb_offset = (((inputPicture->origin_y) >> 1) * inputPicture->stride_cb) + ((inputPicture->origin_x) >> 1);
    1590           0 :     const uint32_t inputBitIncCbOffset = (((inputPicture->origin_y) >> 1) * inputPicture->stride_bit_inc_cb) + ((inputPicture->origin_x) >> 1);
    1591           0 :     const uint32_t input_cr_offset = (((inputPicture->origin_y) >> 1) * inputPicture->stride_cr) + ((inputPicture->origin_x) >> 1);
    1592           0 :     const uint32_t inputBitIncCrOffset = (((inputPicture->origin_y) >> 1) * inputPicture->stride_bit_inc_cr) + ((inputPicture->origin_x) >> 1);
    1593             : 
    1594           0 :     pack2d_src(
    1595           0 :         inputPicture->buffer_y + input_luma_offset,
    1596           0 :         inputPicture->stride_y,
    1597           0 :         inputPicture->buffer_bit_inc_y + inputBitIncLumaOffset,
    1598           0 :         inputPicture->stride_bit_inc_y,
    1599             :         (uint16_t *)packed[0],
    1600           0 :         inputPicture->stride_y,
    1601           0 :         inputPicture->width,
    1602           0 :         inputPicture->height);
    1603             : 
    1604           0 :     pack2d_src(
    1605           0 :         inputPicture->buffer_cb + input_cb_offset,
    1606           0 :         inputPicture->stride_cr,
    1607           0 :         inputPicture->buffer_bit_inc_cb + inputBitIncCbOffset,
    1608           0 :         inputPicture->stride_bit_inc_cr,
    1609           0 :         (uint16_t *)packed[1],
    1610           0 :         inputPicture->stride_cr,
    1611           0 :         inputPicture->width >> 1,
    1612           0 :         inputPicture->height >> 1);
    1613             : 
    1614           0 :     pack2d_src(
    1615           0 :         inputPicture->buffer_cr + input_cr_offset,
    1616           0 :         inputPicture->stride_cr,
    1617           0 :         inputPicture->buffer_bit_inc_cr + inputBitIncCrOffset,
    1618           0 :         inputPicture->stride_bit_inc_cr,
    1619           0 :         (uint16_t *)packed[2],
    1620           0 :         inputPicture->stride_cr,
    1621           0 :         inputPicture->width >> 1,
    1622           0 :         inputPicture->height >> 1);
    1623           0 : }
    1624             : 
    1625           0 : static void unpack_2d_pic(uint8_t *packed[3],
    1626             :     EbPictureBufferDesc  *outputPicturePtr)
    1627             : {
    1628           0 :     uint32_t lumaBufferOffset = ((outputPicturePtr->origin_y)      * outputPicturePtr->stride_y) + (outputPicturePtr->origin_x);
    1629           0 :     uint32_t chromaBufferOffset = (((outputPicturePtr->origin_y) >> 1) * outputPicturePtr->stride_cb) + ((outputPicturePtr->origin_x) >> 1);
    1630           0 :     uint16_t luma_width = (uint16_t)(outputPicturePtr->width);
    1631           0 :     uint16_t chroma_width = luma_width >> 1;
    1632           0 :     uint16_t luma_height = (uint16_t)(outputPicturePtr->height);
    1633           0 :     uint16_t chroma_height = luma_height >> 1;
    1634             : 
    1635           0 :     un_pack2d(
    1636             :         (uint16_t*)(packed[0]),
    1637           0 :         outputPicturePtr->stride_y,
    1638           0 :         outputPicturePtr->buffer_y + lumaBufferOffset,
    1639           0 :         outputPicturePtr->stride_y,
    1640           0 :         outputPicturePtr->buffer_bit_inc_y + lumaBufferOffset,
    1641           0 :         outputPicturePtr->stride_bit_inc_y,
    1642             :         luma_width,
    1643             :         luma_height);
    1644             : 
    1645           0 :     un_pack2d(
    1646           0 :         (uint16_t*)(packed[1]),
    1647           0 :         outputPicturePtr->stride_cb,
    1648           0 :         outputPicturePtr->buffer_cb + chromaBufferOffset,
    1649           0 :         outputPicturePtr->stride_cb,
    1650           0 :         outputPicturePtr->buffer_bit_inc_cb + chromaBufferOffset,
    1651           0 :         outputPicturePtr->stride_bit_inc_cb,
    1652             :         chroma_width,
    1653             :         chroma_height);
    1654             : 
    1655           0 :     un_pack2d(
    1656           0 :         (uint16_t*)(packed[2]),
    1657           0 :         outputPicturePtr->stride_cr,
    1658           0 :         outputPicturePtr->buffer_cr + chromaBufferOffset,
    1659           0 :         outputPicturePtr->stride_cr,
    1660           0 :         outputPicturePtr->buffer_bit_inc_cr + chromaBufferOffset,
    1661           0 :         outputPicturePtr->stride_bit_inc_cr,
    1662             :         chroma_width,
    1663             :         chroma_height);
    1664           0 : }
    1665             : 
    1666           0 : int32_t eb_aom_denoise_and_model_run(struct aom_denoise_and_model_t *ctx,
    1667             :     EbPictureBufferDesc *sd,
    1668             :     aom_film_grain_t *film_grain,
    1669             :     int32_t use_highbd)
    1670             : {
    1671           0 :     const int32_t block_size = ctx->block_size;
    1672             :     uint8_t *raw_data[3];
    1673           0 :     int32_t chroma_sub_log2[2] = { 1, 1 };  //todo: send chroma subsampling
    1674           0 :     int32_t strides[3] = { sd->stride_y, sd->stride_cb, sd->stride_cr };
    1675             : 
    1676           0 :     if (!denoise_and_model_realloc_if_necessary(ctx, sd, use_highbd)) {
    1677           0 :         fprintf(stderr, "Unable to realloc buffers\n");
    1678           0 :         return 0;
    1679             :     }
    1680             : 
    1681           0 :     if (!use_highbd) {  // 8 bits input
    1682           0 :         raw_data[0] = sd->buffer_y + sd->origin_y * sd->stride_y + sd->origin_x;
    1683           0 :         raw_data[1] = sd->buffer_cb + sd->stride_cb * (sd->origin_y >> chroma_sub_log2[0])
    1684           0 :             + (sd->origin_x >> chroma_sub_log2[1]);
    1685           0 :         raw_data[2] = sd->buffer_cr + sd->stride_cr * (sd->origin_y >> chroma_sub_log2[0])
    1686           0 :             + (sd->origin_x >> chroma_sub_log2[1]);
    1687             :     }
    1688             :     else {          // 10 bits input
    1689           0 :         pack_2d_pic(sd, ctx->packed);
    1690             : 
    1691           0 :         raw_data[0] = (uint8_t *)(ctx->packed[0]);
    1692           0 :         raw_data[1] = (uint8_t *)(ctx->packed[1]);
    1693           0 :         raw_data[2] = (uint8_t *)(ctx->packed[2]);
    1694             :     }
    1695             : 
    1696           0 :     const uint8_t *const data[3] = { raw_data[0], raw_data[1], raw_data[2] };
    1697             : 
    1698           0 :     eb_aom_flat_block_finder_run(&ctx->flat_block_finder, data[0], sd->width,
    1699           0 :         sd->height, strides[0], ctx->flat_blocks);
    1700             : 
    1701           0 :     if (!eb_aom_wiener_denoise_2d(data, ctx->denoised, sd->width, sd->height,
    1702           0 :         strides, chroma_sub_log2, ctx->noise_psd,
    1703             :         block_size, ctx->bit_depth, use_highbd)) {
    1704           0 :         fprintf(stderr, "Unable to denoise image\n");
    1705           0 :         return 0;
    1706             :     }
    1707             : 
    1708           0 :     const aom_noise_status_t status = eb_aom_noise_model_update(
    1709           0 :         &ctx->noise_model, data, (const uint8_t *const *)ctx->denoised,
    1710           0 :         sd->width, sd->height, strides, chroma_sub_log2, ctx->flat_blocks,
    1711             :         block_size);
    1712             : 
    1713           0 :     int32_t have_noise_estimate = 0;
    1714           0 :     if (status == AOM_NOISE_STATUS_OK || status == AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE) {
    1715           0 :         eb_aom_noise_model_save_latest(&ctx->noise_model);
    1716           0 :         have_noise_estimate = 1;
    1717             :     }
    1718             : 
    1719           0 :     film_grain->apply_grain = 0;
    1720           0 :     if (have_noise_estimate) {
    1721           0 :         if (!eb_aom_noise_model_get_grain_parameters(&ctx->noise_model, film_grain)) {
    1722           0 :             fprintf(stderr, "Unable to get grain parameters.\n");
    1723           0 :             return 0;
    1724             :         }
    1725           0 :         film_grain->apply_grain = 1;
    1726             : 
    1727           0 :         if (!use_highbd) {
    1728           0 :             memcpy(raw_data[0], ctx->denoised[0],
    1729           0 :                 (strides[0] * sd->height) << use_highbd);
    1730           0 :             memcpy(raw_data[1], ctx->denoised[1],
    1731           0 :                 (strides[1] * (sd->height >> chroma_sub_log2[0])) << use_highbd);
    1732           0 :             memcpy(raw_data[2], ctx->denoised[2],
    1733           0 :                 (strides[2] * (sd->height >> chroma_sub_log2[0])) << use_highbd);
    1734             :         }
    1735             :         else
    1736           0 :             unpack_2d_pic(ctx->denoised, sd);
    1737             :     }
    1738           0 :     eb_aom_flat_block_finder_free(&ctx->flat_block_finder);
    1739           0 :     eb_aom_noise_model_free(&ctx->noise_model);
    1740           0 :     free(ctx->flat_blocks);
    1741           0 :     ctx->flat_blocks = NULL;
    1742             : 
    1743           0 :     return 1;
    1744             : }

Generated by: LCOV version 1.14