aom_dsp/noise_model.c

   1 /*
   2  * Copyright (c) 2017, Alliance for Open Media. All rights reserved
   3  *
   4  * This source code is subject to the terms of the BSD 2 Clause License and
   5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
   6  * was not distributed with this source code in the LICENSE file, you can
   7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
   8  * Media Patent License 1.0 was not distributed with this source code in the
   9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  10  */
  11
  12 #include <math.h>
  13 #include <stdio.h>
  14 #include <stdlib.h>
  15 #include <string.h>
  16
  17 #include "aom_dsp/aom_dsp_common.h"
  18 #include "aom_dsp/mathutils.h"
  19 #include "aom_dsp/noise_model.h"
  20 #include "aom_dsp/noise_util.h"
  21 #include "aom_mem/aom_mem.h"
  22
  23 #define kLowPolyNumParams 3
  24
  25 static const int kMaxLag = 4;
  26
  27 // Defines a function that can be used to obtain the mean of a block for the
  28 // provided data type (uint8_t, or uint16_t)
  29 #define GET_BLOCK_MEAN(INT_TYPE, suffix)                                    \
  30   static double get_block_mean_##suffix(const INT_TYPE *data, int w, int h, \
  31                                         int stride, int x_o, int y_o,       \
  32                                         int block_size) {                   \
  33     const int max_h = AOMMIN(h - y_o, block_size);                          \
  34     const int max_w = AOMMIN(w - x_o, block_size);                          \
  35     double block_mean = 0;                                                  \
  36     for (int y = 0; y < max_h; ++y) {                                       \
  37       for (int x = 0; x < max_w; ++x) {                                     \
  38         block_mean += data[(y_o + y) * stride + x_o + x];                   \
  39       }                                                                     \
  40     }                                                                       \
  41     return block_mean / (max_w * max_h);                                    \
  42   }
  43
  44 GET_BLOCK_MEAN(uint8_t, lowbd);
  45 GET_BLOCK_MEAN(uint16_t, highbd);
  46
  47 static INLINE double get_block_mean(const uint8_t *data, int w, int h,
  48                                     int stride, int x_o, int y_o,
  49                                     int block_size, int use_highbd) {
  50   if (use_highbd)
  51     return get_block_mean_highbd((const uint16_t *)data, w, h, stride, x_o, y_o,
  52                                  block_size);
  53   return get_block_mean_lowbd(data, w, h, stride, x_o, y_o, block_size);
  54 }
  55
  56 // Defines a function that can be used to obtain the variance of a block
  57 // for the provided data type (uint8_t, or uint16_t)
  58 #define GET_NOISE_VAR(INT_TYPE, suffix)                                  \
  59   static double get_noise_var_##suffix(                                  \
  60       const INT_TYPE *data, const INT_TYPE *denoised, int stride, int w, \
  61       int h, int x_o, int y_o, int block_size_x, int block_size_y) {     \
  62     const int max_h = AOMMIN(h - y_o, block_size_y);                     \
  63     const int max_w = AOMMIN(w - x_o, block_size_x);                     \
  64     double noise_var = 0;                                                \
  65     double noise_mean = 0;                                               \
  66     for (int y = 0; y < max_h; ++y) {                                    \
  67       for (int x = 0; x < max_w; ++x) {                                  \
  68         double noise = (double)data[(y_o + y) * stride + x_o + x] -      \
  69                        denoised[(y_o + y) * stride + x_o + x];           \
  70         noise_mean += noise;                                             \
  71         noise_var += noise * noise;                                      \
  72       }                                                                  \
  73     }                                                                    \
  74     noise_mean /= (max_w * max_h);                                       \
  75     return noise_var / (max_w * max_h) - noise_mean * noise_mean;        \
  76   }
  77
  78 GET_NOISE_VAR(uint8_t, lowbd);
  79 GET_NOISE_VAR(uint16_t, highbd);
  80
  81 static INLINE double get_noise_var(const uint8_t *data, const uint8_t *denoised,
  82                                    int w, int h, int stride, int x_o, int y_o,
  83                                    int block_size_x, int block_size_y,
  84                                    int use_highbd) {
  85   if (use_highbd)
  86     return get_noise_var_highbd((const uint16_t *)data,
  87                                 (const uint16_t *)denoised, w, h, stride, x_o,
  88                                 y_o, block_size_x, block_size_y);
  89   return get_noise_var_lowbd(data, denoised, w, h, stride, x_o, y_o,
  90                              block_size_x, block_size_y);
  91 }
  92
  93 static void equation_system_clear(aom_equation_system_t *eqns) {
  94   const int n = eqns->n;
  95   memset(eqns->A, 0, sizeof(*eqns->A) * n * n);
  96   memset(eqns->x, 0, sizeof(*eqns->x) * n);
  97   memset(eqns->b, 0, sizeof(*eqns->b) * n);
  98 }
  99
 100 static void equation_system_copy(aom_equation_system_t *dst,
 101                                  const aom_equation_system_t *src) {
 102   const int n = dst->n;
 103   memcpy(dst->A, src->A, sizeof(*dst->A) * n * n);
 104   memcpy(dst->x, src->x, sizeof(*dst->x) * n);
 105   memcpy(dst->b, src->b, sizeof(*dst->b) * n);
 106 }
 107
 108 static int equation_system_init(aom_equation_system_t *eqns, int n) {
 109   eqns->A = (double *)aom_malloc(sizeof(*eqns->A) * n * n);
 110   eqns->b = (double *)aom_malloc(sizeof(*eqns->b) * n);
 111   eqns->x = (double *)aom_malloc(sizeof(*eqns->x) * n);
 112   eqns->n = n;
 113   if (!eqns->A || !eqns->b || !eqns->x) {
 114     fprintf(stderr, "Failed to allocate system of equations of size %d\n", n);
 115     aom_free(eqns->A);
 116     aom_free(eqns->b);
 117     aom_free(eqns->x);
 118     memset(eqns, 0, sizeof(*eqns));
 119     return 0;
 120   }
 121   equation_system_clear(eqns);
 122   return 1;
 123 }
 124
 125 static int equation_system_solve(aom_equation_system_t *eqns) {
 126   const int n = eqns->n;
 127   double *b = (double *)aom_malloc(sizeof(*b) * n);
 128   double *A = (double *)aom_malloc(sizeof(*A) * n * n);
 129   int ret = 0;
 130   if (A == NULL || b == NULL) {
 131     fprintf(stderr, "Unable to allocate temp values of size %dx%d\n", n, n);
 132     aom_free(b);
 133     aom_free(A);
 134     return 0;
 135   }
 136   memcpy(A, eqns->A, sizeof(*eqns->A) * n * n);
 137   memcpy(b, eqns->b, sizeof(*eqns->b) * n);
 138   ret = linsolve(n, A, eqns->n, b, eqns->x);
 139   aom_free(b);
 140   aom_free(A);
 141
 142   if (ret == 0) {
 143     return 0;
 144   }
 145   return 1;
 146 }
 147
 148 static void equation_system_add(aom_equation_system_t *dest,
 149                                 aom_equation_system_t *src) {
 150   const int n = dest->n;
 151   int i, j;
 152   for (i = 0; i < n; ++i) {
 153     for (j = 0; j < n; ++j) {
 154       dest->A[i * n + j] += src->A[i * n + j];
 155     }
 156     dest->b[i] += src->b[i];
 157   }
 158 }
 159
 160 static void equation_system_free(aom_equation_system_t *eqns) {
 161   if (!eqns) return;
 162   aom_free(eqns->A);
 163   aom_free(eqns->b);
 164   aom_free(eqns->x);
 165   memset(eqns, 0, sizeof(*eqns));
 166 }
 167
 168 static void noise_strength_solver_clear(aom_noise_strength_solver_t *solver) {
 169   equation_system_clear(&solver->eqns);
 170   solver->num_equations = 0;
 171   solver->total = 0;
 172 }
 173
 174 static void noise_strength_solver_add(aom_noise_strength_solver_t *dest,
 175                                       aom_noise_strength_solver_t *src) {
 176   equation_system_add(&dest->eqns, &src->eqns);
 177   dest->num_equations += src->num_equations;
 178   dest->total += src->total;
 179 }
 180
 181 // Return the number of coefficients required for the given parameters
 182 static int num_coeffs(const aom_noise_model_params_t params) {
 183   const int n = 2 * params.lag + 1;
 184   switch (params.shape) {
 185     case AOM_NOISE_SHAPE_DIAMOND: return params.lag * (params.lag + 1);
 186     case AOM_NOISE_SHAPE_SQUARE: return (n * n) / 2;
 187   }
 188   return 0;
 189 }
 190
 191 static int noise_state_init(aom_noise_state_t *state, int n, int bit_depth) {
 192   const int kNumBins = 20;
 193   if (!equation_system_init(&state->eqns, n)) {
 194     fprintf(stderr, "Failed initialization noise state with size %d\n", n);
 195     return 0;
 196   }
 197   state->ar_gain = 1.0;
 198   state->num_observations = 0;
 199   return aom_noise_strength_solver_init(&state->strength_solver, kNumBins,
 200                                         bit_depth);
 201 }
 202
 203 static void set_chroma_coefficient_fallback_soln(aom_equation_system_t *eqns) {
 204   const double kTolerance = 1e-6;
 205   const int last = eqns->n - 1;
 206   // Set all of the AR coefficients to zero, but try to solve for correlation
 207   // with the luma channel
 208   memset(eqns->x, 0, sizeof(*eqns->x) * eqns->n);
 209   if (fabs(eqns->A[last * eqns->n + last]) > kTolerance) {
 210     eqns->x[last] = eqns->b[last] / eqns->A[last * eqns->n + last];
 211   }
 212 }
 213
 214 int aom_noise_strength_lut_init(aom_noise_strength_lut_t *lut, int num_points) {
 215   if (!lut) return 0;
 216   if (num_points <= 0) return 0;
 217   lut->num_points = 0;
 218   lut->points = (double(*)[2])aom_malloc(num_points * sizeof(*lut->points));
 219   if (!lut->points) return 0;
 220   lut->num_points = num_points;
 221   memset(lut->points, 0, sizeof(*lut->points) * num_points);
 222   return 1;
 223 }
 224
 225 void aom_noise_strength_lut_free(aom_noise_strength_lut_t *lut) {
 226   if (!lut) return;
 227   aom_free(lut->points);
 228   memset(lut, 0, sizeof(*lut));
 229 }
 230
 231 double aom_noise_strength_lut_eval(const aom_noise_strength_lut_t *lut,
 232                                    double x) {
 233   int i = 0;
 234   // Constant extrapolation for x <  x_0.
 235   if (x < lut->points[0][0]) return lut->points[0][1];
 236   for (i = 0; i < lut->num_points - 1; ++i) {
 237     if (x >= lut->points[i][0] && x <= lut->points[i + 1][0]) {
 238       const double a =
 239           (x - lut->points[i][0]) / (lut->points[i + 1][0] - lut->points[i][0]);
 240       return lut->points[i + 1][1] * a + lut->points[i][1] * (1.0 - a);
 241     }
 242   }
 243   // Constant extrapolation for x > x_{n-1}
 244   return lut->points[lut->num_points - 1][1];
 245 }
 246
 247 static double noise_strength_solver_get_bin_index(
 248     const aom_noise_strength_solver_t *solver, double value) {
 249   const double val =
 250       fclamp(value, solver->min_intensity, solver->max_intensity);
 251   const double range = solver->max_intensity - solver->min_intensity;
 252   return (solver->num_bins - 1) * (val - solver->min_intensity) / range;
 253 }
 254
 255 static double noise_strength_solver_get_value(
 256     const aom_noise_strength_solver_t *solver, double x) {
 257   const double bin = noise_strength_solver_get_bin_index(solver, x);
 258   const int bin_i0 = (int)floor(bin);
 259   const int bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
 260   const double a = bin - bin_i0;
 261   return (1.0 - a) * solver->eqns.x[bin_i0] + a * solver->eqns.x[bin_i1];
 262 }
 263
 264 void aom_noise_strength_solver_add_measurement(
 265     aom_noise_strength_solver_t *solver, double block_mean, double noise_std) {
 266   const double bin = noise_strength_solver_get_bin_index(solver, block_mean);
 267   const int bin_i0 = (int)floor(bin);
 268   const int bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
 269   const double a = bin - bin_i0;
 270   const int n = solver->num_bins;
 271   solver->eqns.A[bin_i0 * n + bin_i0] += (1.0 - a) * (1.0 - a);
 272   solver->eqns.A[bin_i1 * n + bin_i0] += a * (1.0 - a);
 273   solver->eqns.A[bin_i1 * n + bin_i1] += a * a;
 274   solver->eqns.A[bin_i0 * n + bin_i1] += a * (1.0 - a);
 275   solver->eqns.b[bin_i0] += (1.0 - a) * noise_std;
 276   solver->eqns.b[bin_i1] += a * noise_std;
 277   solver->total += noise_std;
 278   solver->num_equations++;
 279 }
 280
 281 int aom_noise_strength_solver_solve(aom_noise_strength_solver_t *solver) {
 282   // Add regularization proportional to the number of constraints
 283   const int n = solver->num_bins;
 284   const double kAlpha = 2.0 * (double)(solver->num_equations) / n;
 285   int result = 0;
 286   double mean = 0;
 287
 288   // Do this in a non-destructive manner so it is not confusing to the caller
 289   double *old_A = solver->eqns.A;
 290   double *A = (double *)aom_malloc(sizeof(*A) * n * n);
 291   if (!A) {
 292     fprintf(stderr, "Unable to allocate copy of A\n");
 293     return 0;
 294   }
 295   memcpy(A, old_A, sizeof(*A) * n * n);
 296
 297   for (int i = 0; i < n; ++i) {
 298     const int i_lo = AOMMAX(0, i - 1);
 299     const int i_hi = AOMMIN(n - 1, i + 1);
 300     A[i * n + i_lo] -= kAlpha;
 301     A[i * n + i] += 2 * kAlpha;
 302     A[i * n + i_hi] -= kAlpha;
 303   }
 304
 305   // Small regularization to give average noise strength
 306   mean = solver->total / solver->num_equations;
 307   for (int i = 0; i < n; ++i) {
 308     A[i * n + i] += 1.0 / 8192.;
 309     solver->eqns.b[i] += mean / 8192.;
 310   }
 311   solver->eqns.A = A;
 312   result = equation_system_solve(&solver->eqns);
 313   solver->eqns.A = old_A;
 314
 315   aom_free(A);
 316   return result;
 317 }
 318
 319 int aom_noise_strength_solver_init(aom_noise_strength_solver_t *solver,
 320                                    int num_bins, int bit_depth) {
 321   if (!solver) return 0;
 322   memset(solver, 0, sizeof(*solver));
 323   solver->num_bins = num_bins;
 324   solver->min_intensity = 0;
 325   solver->max_intensity = (1 << bit_depth) - 1;
 326   solver->total = 0;
 327   solver->num_equations = 0;
 328   return equation_system_init(&solver->eqns, num_bins);
 329 }
 330
 331 void aom_noise_strength_solver_free(aom_noise_strength_solver_t *solver) {
 332   if (!solver) return;
 333   equation_system_free(&solver->eqns);
 334 }
 335
 336 double aom_noise_strength_solver_get_center(
 337     const aom_noise_strength_solver_t *solver, int i) {
 338   const double range = solver->max_intensity - solver->min_intensity;
 339   const int n = solver->num_bins;
 340   return ((double)i) / (n - 1) * range + solver->min_intensity;
 341 }
 342
 343 // Computes the residual if a point were to be removed from the lut. This is
 344 // calculated as the area between the output of the solver and the line segment
 345 // that would be formed between [x_{i - 1}, x_{i + 1}).
 346 static void update_piecewise_linear_residual(
 347     const aom_noise_strength_solver_t *solver,
 348     const aom_noise_strength_lut_t *lut, double *residual, int start, int end) {
 349   const double dx = 255. / solver->num_bins;
 350   for (int i = AOMMAX(start, 1); i < AOMMIN(end, lut->num_points - 1); ++i) {
 351     const int lower = AOMMAX(0, (int)floor(noise_strength_solver_get_bin_index(
 352                                     solver, lut->points[i - 1][0])));
 353     const int upper = AOMMIN(solver->num_bins - 1,
 354                              (int)ceil(noise_strength_solver_get_bin_index(
 355                                  solver, lut->points[i + 1][0])));
 356     double r = 0;
 357     for (int j = lower; j <= upper; ++j) {
 358       const double x = aom_noise_strength_solver_get_center(solver, j);
 359       if (x < lut->points[i - 1][0]) continue;
 360       if (x >= lut->points[i + 1][0]) continue;
 361       const double y = solver->eqns.x[j];
 362       const double a = (x - lut->points[i - 1][0]) /
 363                        (lut->points[i + 1][0] - lut->points[i - 1][0]);
 364       const double estimate_y =
 365           lut->points[i - 1][1] * (1.0 - a) + lut->points[i + 1][1] * a;
 366       r += fabs(y - estimate_y);
 367     }
 368     residual[i] = r * dx;
 369   }
 370 }
 371
 372 int aom_noise_strength_solver_fit_piecewise(
 373     const aom_noise_strength_solver_t *solver, int max_output_points,
 374     aom_noise_strength_lut_t *lut) {
 375   // The tolerance is normalized to be give consistent results between
 376   // different bit-depths.
 377   const double kTolerance = solver->max_intensity * 0.00625 / 255.0;
 378   if (!aom_noise_strength_lut_init(lut, solver->num_bins)) {
 379     fprintf(stderr, "Failed to init lut\n");
 380     return 0;
 381   }
 382   for (int i = 0; i < solver->num_bins; ++i) {
 383     lut->points[i][0] = aom_noise_strength_solver_get_center(solver, i);
 384     lut->points[i][1] = solver->eqns.x[i];
 385   }
 386   if (max_output_points < 0) {
 387     max_output_points = solver->num_bins;
 388   }
 389
 390   double *residual = aom_malloc(solver->num_bins * sizeof(*residual));
 391   memset(residual, 0, sizeof(*residual) * solver->num_bins);
 392
 393   update_piecewise_linear_residual(solver, lut, residual, 0, solver->num_bins);
 394
 395   // Greedily remove points if there are too many or if it doesn't hurt local
 396   // approximation (never remove the end points)
 397   while (lut->num_points > 2) {
 398     int min_index = 1;
 399     for (int j = 1; j < lut->num_points - 1; ++j) {
 400       if (residual[j] < residual[min_index]) {
 401         min_index = j;
 402       }
 403     }
 404     const double dx =
 405         lut->points[min_index + 1][0] - lut->points[min_index - 1][0];
 406     const double avg_residual = residual[min_index] / dx;
 407     if (lut->num_points <= max_output_points && avg_residual > kTolerance) {
 408       break;
 409     }
 410
 411     const int num_remaining = lut->num_points - min_index - 1;
 412     memmove(lut->points + min_index, lut->points + min_index + 1,
 413             sizeof(lut->points[0]) * num_remaining);
 414     lut->num_points--;
 415
 416     update_piecewise_linear_residual(solver, lut, residual, min_index - 1,
 417                                      min_index + 1);
 418   }
 419   aom_free(residual);
 420   return 1;
 421 }
 422
 423 int aom_flat_block_finder_init(aom_flat_block_finder_t *block_finder,
 424                                int block_size, int bit_depth, int use_highbd) {
 425   const int n = block_size * block_size;
 426   aom_equation_system_t eqns;
 427   double *AtA_inv = 0;
 428   double *A = 0;
 429   int x = 0, y = 0, i = 0, j = 0;
 430   block_finder->A = NULL;
 431   block_finder->AtA_inv = NULL;
 432
 433   if (!equation_system_init(&eqns, kLowPolyNumParams)) {
 434     fprintf(stderr, "Failed to init equation system for block_size=%d\n",
 435             block_size);
 436     return 0;
 437   }
 438
 439   AtA_inv = (double *)aom_malloc(kLowPolyNumParams * kLowPolyNumParams *
 440                                  sizeof(*AtA_inv));
 441   A = (double *)aom_malloc(kLowPolyNumParams * n * sizeof(*A));
 442   if (AtA_inv == NULL || A == NULL) {
 443     fprintf(stderr, "Failed to alloc A or AtA_inv for block_size=%d\n",
 444             block_size);
 445     aom_free(AtA_inv);
 446     aom_free(A);
 447     equation_system_free(&eqns);
 448     return 0;
 449   }
 450
 451   block_finder->A = A;
 452   block_finder->AtA_inv = AtA_inv;
 453   block_finder->block_size = block_size;
 454   block_finder->normalization = (1 << bit_depth) - 1;
 455   block_finder->use_highbd = use_highbd;
 456
 457   for (y = 0; y < block_size; ++y) {
 458     const double yd = ((double)y - block_size / 2.) / (block_size / 2.);
 459     for (x = 0; x < block_size; ++x) {
 460       const double xd = ((double)x - block_size / 2.) / (block_size / 2.);
 461       const double coords[3] = { yd, xd, 1 };
 462       const int row = y * block_size + x;
 463       A[kLowPolyNumParams * row + 0] = yd;
 464       A[kLowPolyNumParams * row + 1] = xd;
 465       A[kLowPolyNumParams * row + 2] = 1;
 466
 467       for (i = 0; i < kLowPolyNumParams; ++i) {
 468         for (j = 0; j < kLowPolyNumParams; ++j) {
 469           eqns.A[kLowPolyNumParams * i + j] += coords[i] * coords[j];
 470         }
 471       }
 472     }
 473   }
 474
 475   // Lazy inverse using existing equation solver.
 476   for (i = 0; i < kLowPolyNumParams; ++i) {
 477     memset(eqns.b, 0, sizeof(*eqns.b) * kLowPolyNumParams);
 478     eqns.b[i] = 1;
 479     equation_system_solve(&eqns);
 480
 481     for (j = 0; j < kLowPolyNumParams; ++j) {
 482       AtA_inv[j * kLowPolyNumParams + i] = eqns.x[j];
 483     }
 484   }
 485   equation_system_free(&eqns);
 486   return 1;
 487 }
 488
 489 void aom_flat_block_finder_free(aom_flat_block_finder_t *block_finder) {
 490   if (!block_finder) return;
 491   aom_free(block_finder->A);
 492   aom_free(block_finder->AtA_inv);
 493   memset(block_finder, 0, sizeof(*block_finder));
 494 }
 495
 496 void aom_flat_block_finder_extract_block(
 497     const aom_flat_block_finder_t *block_finder, const uint8_t *const data,
 498     int w, int h, int stride, int offsx, int offsy, double *plane,
 499     double *block) {
 500   const int block_size = block_finder->block_size;
 501   const int n = block_size * block_size;
 502   const double *A = block_finder->A;
 503   const double *AtA_inv = block_finder->AtA_inv;
 504   double plane_coords[kLowPolyNumParams];
 505   double AtA_inv_b[kLowPolyNumParams];
 506   int xi, yi, i;
 507
 508   if (block_finder->use_highbd) {
 509     const uint16_t *const data16 = (const uint16_t *const)data;
 510     for (yi = 0; yi < block_size; ++yi) {
 511       const int y = clamp(offsy + yi, 0, h - 1);
 512       for (xi = 0; xi < block_size; ++xi) {
 513         const int x = clamp(offsx + xi, 0, w - 1);
 514         block[yi * block_size + xi] =
 515             ((double)data16[y * stride + x]) / block_finder->normalization;
 516       }
 517     }
 518   } else {
 519     for (yi = 0; yi < block_size; ++yi) {
 520       const int y = clamp(offsy + yi, 0, h - 1);
 521       for (xi = 0; xi < block_size; ++xi) {
 522         const int x = clamp(offsx + xi, 0, w - 1);
 523         block[yi * block_size + xi] =
 524             ((double)data[y * stride + x]) / block_finder->normalization;
 525       }
 526     }
 527   }
 528   multiply_mat(block, A, AtA_inv_b, 1, n, kLowPolyNumParams);
 529   multiply_mat(AtA_inv, AtA_inv_b, plane_coords, kLowPolyNumParams,
 530                kLowPolyNumParams, 1);
 531   multiply_mat(A, plane_coords, plane, n, kLowPolyNumParams, 1);
 532
 533   for (i = 0; i < n; ++i) {
 534     block[i] -= plane[i];
 535   }
 536 }
 537
 538 typedef struct {
 539   int index;
 540   float score;
 541 } index_and_score_t;
 542
 543 static int compare_scores(const void *a, const void *b) {
 544   const float diff =
 545       ((index_and_score_t *)a)->score - ((index_and_score_t *)b)->score;
 546   if (diff < 0)
 547     return -1;
 548   else if (diff > 0)
 549     return 1;
 550   return 0;
 551 }
 552
 553 int aom_flat_block_finder_run(const aom_flat_block_finder_t *block_finder,
 554                               const uint8_t *const data, int w, int h,
 555                               int stride, uint8_t *flat_blocks) {
 556   // The gradient-based features used in this code are based on:
 557   //  A. Kokaram, D. Kelly, H. Denman and A. Crawford, "Measuring noise
 558   //  correlation for improved video denoising," 2012 19th, ICIP.
 559   // The thresholds are more lenient to allow for correct grain modeling
 560   // if extreme cases.
 561   const int block_size = block_finder->block_size;
 562   const int n = block_size * block_size;
 563   const double kTraceThreshold = 0.15 / (32 * 32);
 564   const double kRatioThreshold = 1.25;
 565   const double kNormThreshold = 0.08 / (32 * 32);
 566   const double kVarThreshold = 0.005 / (double)n;
 567   const int num_blocks_w = (w + block_size - 1) / block_size;
 568   const int num_blocks_h = (h + block_size - 1) / block_size;
 569   int num_flat = 0;
 570   int bx = 0, by = 0;
 571   double *plane = (double *)aom_malloc(n * sizeof(*plane));
 572   double *block = (double *)aom_malloc(n * sizeof(*block));
 573   index_and_score_t *scores = (index_and_score_t *)aom_malloc(
 574       num_blocks_w * num_blocks_h * sizeof(*scores));
 575   if (plane == NULL || block == NULL || scores == NULL) {
 576     fprintf(stderr, "Failed to allocate memory for block of size %d\n", n);
 577     aom_free(plane);
 578     aom_free(block);
 579     aom_free(scores);
 580     return -1;
 581   }
 582
 583 #ifdef NOISE_MODEL_LOG_SCORE
 584   fprintf(stderr, "score = [");
 585 #endif
 586   for (by = 0; by < num_blocks_h; ++by) {
 587     for (bx = 0; bx < num_blocks_w; ++bx) {
 588       // Compute gradient covariance matrix.
 589       double Gxx = 0, Gxy = 0, Gyy = 0;
 590       double var = 0;
 591       double mean = 0;
 592       int xi, yi;
 593       aom_flat_block_finder_extract_block(block_finder, data, w, h, stride,
 594                                           bx * block_size, by * block_size,
 595                                           plane, block);
 596
 597       for (yi = 1; yi < block_size - 1; ++yi) {
 598         for (xi = 1; xi < block_size - 1; ++xi) {
 599           const double gx = (block[yi * block_size + xi + 1] -
 600                              block[yi * block_size + xi - 1]) /
 601                             2;
 602           const double gy = (block[yi * block_size + xi + block_size] -
 603                              block[yi * block_size + xi - block_size]) /
 604                             2;
 605           Gxx += gx * gx;
 606           Gxy += gx * gy;
 607           Gyy += gy * gy;
 608
 609           mean += block[yi * block_size + xi];
 610           var += block[yi * block_size + xi] * block[yi * block_size + xi];
 611         }
 612       }
 613       mean /= (block_size - 2) * (block_size - 2);
 614
 615       // Normalize gradients by block_size.
 616       Gxx /= ((block_size - 2) * (block_size - 2));
 617       Gxy /= ((block_size - 2) * (block_size - 2));
 618       Gyy /= ((block_size - 2) * (block_size - 2));
 619       var = var / ((block_size - 2) * (block_size - 2)) - mean * mean;
 620
 621       {
 622         const double trace = Gxx + Gyy;
 623         const double det = Gxx * Gyy - Gxy * Gxy;
 624         const double e1 = (trace + sqrt(trace * trace - 4 * det)) / 2.;
 625         const double e2 = (trace - sqrt(trace * trace - 4 * det)) / 2.;
 626         const double norm = e1;  // Spectral norm
 627         const double ratio = (e1 / AOMMAX(e2, 1e-6));
 628         const int is_flat = (trace < kTraceThreshold) &&
 629                             (ratio < kRatioThreshold) &&
 630                             (norm < kNormThreshold) && (var > kVarThreshold);
 631         // The following weights are used to combine the above features to give
 632         // a sigmoid score for flatness. If the input was normalized to [0,100]
 633         // the magnitude of these values would be close to 1 (e.g., weights
 634         // corresponding to variance would be a factor of 10000x smaller).
 635         // The weights are given in the following order:
 636         //    [{var}, {ratio}, {trace}, {norm}, offset]
 637         // with one of the most discriminative being simply the variance.
 638         const double weights[5] = { -6682, -0.2056, 13087, -12434, 2.5694 };
 639         double sum_weights = weights[0] * var + weights[1] * ratio +
 640                              weights[2] * trace + weights[3] * norm +
 641                              weights[4];
 642         // clamp the value to [-25.0, 100.0] to prevent overflow
 643         sum_weights = fclamp(sum_weights, -25.0, 100.0);
 644         const float score = (float)(1.0 / (1 + exp(-sum_weights)));
 645         flat_blocks[by * num_blocks_w + bx] = is_flat ? 255 : 0;
 646         scores[by * num_blocks_w + bx].score = var > kVarThreshold ? score : 0;
 647         scores[by * num_blocks_w + bx].index = by * num_blocks_w + bx;
 648 #ifdef NOISE_MODEL_LOG_SCORE
 649         fprintf(stderr, "%g %g %g %g %g %d ", score, var, ratio, trace, norm,
 650                 is_flat);
 651 #endif
 652         num_flat += is_flat;
 653       }
 654     }
 655 #ifdef NOISE_MODEL_LOG_SCORE
 656     fprintf(stderr, "\n");
 657 #endif
 658   }
 659 #ifdef NOISE_MODEL_LOG_SCORE
 660   fprintf(stderr, "];\n");
 661 #endif
 662   // Find the top-scored blocks (most likely to be flat) and set the flat blocks
 663   // be the union of the thresholded results and the top 10th percentile of the
 664   // scored results.
 665   qsort(scores, num_blocks_w * num_blocks_h, sizeof(*scores), &compare_scores);
 666   const int top_nth_percentile = num_blocks_w * num_blocks_h * 90 / 100;
 667   const float score_threshold = scores[top_nth_percentile].score;
 668   for (int i = 0; i < num_blocks_w * num_blocks_h; ++i) {
 669     if (scores[i].score >= score_threshold) {
 670       num_flat += flat_blocks[scores[i].index] == 0;
 671       flat_blocks[scores[i].index] |= 1;
 672     }
 673   }
 674   aom_free(block);
 675   aom_free(plane);
 676   aom_free(scores);
 677   return num_flat;
 678 }
 679
 680 int aom_noise_model_init(aom_noise_model_t *model,
 681                          const aom_noise_model_params_t params) {
 682   const int n = num_coeffs(params);
 683   const int lag = params.lag;
 684   const int bit_depth = params.bit_depth;
 685   int x = 0, y = 0, i = 0, c = 0;
 686
 687   memset(model, 0, sizeof(*model));
 688   if (params.lag < 1) {
 689     fprintf(stderr, "Invalid noise param: lag = %d must be >= 1\n", params.lag);
 690     return 0;
 691   }
 692   if (params.lag > kMaxLag) {
 693     fprintf(stderr, "Invalid noise param: lag = %d must be <= %d\n", params.lag,
 694             kMaxLag);
 695     return 0;
 696   }
 697
 698   memcpy(&model->params, &params, sizeof(params));
 699   for (c = 0; c < 3; ++c) {
 700     if (!noise_state_init(&model->combined_state[c], n + (c > 0), bit_depth)) {
 701       fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
 702       aom_noise_model_free(model);
 703       return 0;
 704     }
 705     if (!noise_state_init(&model->latest_state[c], n + (c > 0), bit_depth)) {
 706       fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
 707       aom_noise_model_free(model);
 708       return 0;
 709     }
 710   }
 711   model->n = n;
 712   model->coords = (int(*)[2])aom_malloc(sizeof(*model->coords) * n);
 713
 714   for (y = -lag; y <= 0; ++y) {
 715     const int max_x = y == 0 ? -1 : lag;
 716     for (x = -lag; x <= max_x; ++x) {
 717       switch (params.shape) {
 718         case AOM_NOISE_SHAPE_DIAMOND:
 719           if (abs(x) <= y + lag) {
 720             model->coords[i][0] = x;
 721             model->coords[i][1] = y;
 722             ++i;
 723           }
 724           break;
 725         case AOM_NOISE_SHAPE_SQUARE:
 726           model->coords[i][0] = x;
 727           model->coords[i][1] = y;
 728           ++i;
 729           break;
 730         default:
 731           fprintf(stderr, "Invalid shape\n");
 732           aom_noise_model_free(model);
 733           return 0;
 734       }
 735     }
 736   }
 737   assert(i == n);
 738   return 1;
 739 }
 740
 741 void aom_noise_model_free(aom_noise_model_t *model) {
 742   int c = 0;
 743   if (!model) return;
 744
 745   aom_free(model->coords);
 746   for (c = 0; c < 3; ++c) {
 747     equation_system_free(&model->latest_state[c].eqns);
 748     equation_system_free(&model->combined_state[c].eqns);
 749
 750     equation_system_free(&model->latest_state[c].strength_solver.eqns);
 751     equation_system_free(&model->combined_state[c].strength_solver.eqns);
 752   }
 753   memset(model, 0, sizeof(*model));
 754 }
 755
 756 // Extracts the neighborhood defined by coords around point (x, y) from
 757 // the difference between the data and denoised images. Also extracts the
 758 // entry (possibly downsampled) for (x, y) in the alt_data (e.g., luma).
 759 #define EXTRACT_AR_ROW(INT_TYPE, suffix)                                   \
 760   static double extract_ar_row_##suffix(                                   \
 761       int(*coords)[2], int num_coords, const INT_TYPE *const data,         \
 762       const INT_TYPE *const denoised, int stride, int sub_log2[2],         \
 763       const INT_TYPE *const alt_data, const INT_TYPE *const alt_denoised,  \
 764       int alt_stride, int x, int y, double *buffer) {                      \
 765     for (int i = 0; i < num_coords; ++i) {                                 \
 766       const int x_i = x + coords[i][0], y_i = y + coords[i][1];            \
 767       buffer[i] =                                                          \
 768           (double)data[y_i * stride + x_i] - denoised[y_i * stride + x_i]; \
 769     }                                                                      \
 770     const double val =                                                     \
 771         (double)data[y * stride + x] - denoised[y * stride + x];           \
 772                                                                            \
 773     if (alt_data && alt_denoised) {                                        \
 774       double avg_data = 0, avg_denoised = 0;                               \
 775       int num_samples = 0;                                                 \
 776       for (int dy_i = 0; dy_i < (1 << sub_log2[1]); dy_i++) {              \
 777         const int y_up = (y << sub_log2[1]) + dy_i;                        \
 778         for (int dx_i = 0; dx_i < (1 << sub_log2[0]); dx_i++) {            \
 779           const int x_up = (x << sub_log2[0]) + dx_i;                      \
 780           avg_data += alt_data[y_up * alt_stride + x_up];                  \
 781           avg_denoised += alt_denoised[y_up * alt_stride + x_up];          \
 782           num_samples++;                                                   \
 783         }                                                                  \
 784       }                                                                    \
 785       buffer[num_coords] = (avg_data - avg_denoised) / num_samples;        \
 786     }                                                                      \
 787     return val;                                                            \
 788   }
 789
 790 EXTRACT_AR_ROW(uint8_t, lowbd);
 791 EXTRACT_AR_ROW(uint16_t, highbd);
 792
 793 static int add_block_observations(
 794     aom_noise_model_t *noise_model, int c, const uint8_t *const data,
 795     const uint8_t *const denoised, int w, int h, int stride, int sub_log2[2],
 796     const uint8_t *const alt_data, const uint8_t *const alt_denoised,
 797     int alt_stride, const uint8_t *const flat_blocks, int block_size,
 798     int num_blocks_w, int num_blocks_h) {
 799   const int lag = noise_model->params.lag;
 800   const int num_coords = noise_model->n;
 801   const double normalization = (1 << noise_model->params.bit_depth) - 1;
 802   double *A = noise_model->latest_state[c].eqns.A;
 803   double *b = noise_model->latest_state[c].eqns.b;
 804   double *buffer = (double *)aom_malloc(sizeof(*buffer) * (num_coords + 1));
 805   const int n = noise_model->latest_state[c].eqns.n;
 806
 807   if (!buffer) {
 808     fprintf(stderr, "Unable to allocate buffer of size %d\n", num_coords + 1);
 809     return 0;
 810   }
 811   for (int by = 0; by < num_blocks_h; ++by) {
 812     const int y_o = by * (block_size >> sub_log2[1]);
 813     for (int bx = 0; bx < num_blocks_w; ++bx) {
 814       const int x_o = bx * (block_size >> sub_log2[0]);
 815       if (!flat_blocks[by * num_blocks_w + bx]) {
 816         continue;
 817       }
 818       int y_start =
 819           (by > 0 && flat_blocks[(by - 1) * num_blocks_w + bx]) ? 0 : lag;
 820       int x_start =
 821           (bx > 0 && flat_blocks[by * num_blocks_w + bx - 1]) ? 0 : lag;
 822       int y_end = AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
 823                          block_size >> sub_log2[1]);
 824       int x_end = AOMMIN(
 825           (w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]) - lag,
 826           (bx + 1 < num_blocks_w && flat_blocks[by * num_blocks_w + bx + 1])
 827               ? (block_size >> sub_log2[0])
 828               : ((block_size >> sub_log2[0]) - lag));
 829       for (int y = y_start; y < y_end; ++y) {
 830         for (int x = x_start; x < x_end; ++x) {
 831           const double val =
 832               noise_model->params.use_highbd
 833                   ? extract_ar_row_highbd(noise_model->coords, num_coords,
 834                                           (const uint16_t *const)data,
 835                                           (const uint16_t *const)denoised,
 836                                           stride, sub_log2,
 837                                           (const uint16_t *const)alt_data,
 838                                           (const uint16_t *const)alt_denoised,
 839                                           alt_stride, x + x_o, y + y_o, buffer)
 840                   : extract_ar_row_lowbd(noise_model->coords, num_coords, data,
 841                                          denoised, stride, sub_log2, alt_data,
 842                                          alt_denoised, alt_stride, x + x_o,
 843                                          y + y_o, buffer);
 844           for (int i = 0; i < n; ++i) {
 845             for (int j = 0; j < n; ++j) {
 846               A[i * n + j] +=
 847                   (buffer[i] * buffer[j]) / (normalization * normalization);
 848             }
 849             b[i] += (buffer[i] * val) / (normalization * normalization);
 850           }
 851           noise_model->latest_state[c].num_observations++;
 852         }
 853       }
 854     }
 855   }
 856   aom_free(buffer);
 857   return 1;
 858 }
 859
 860 static void add_noise_std_observations(
 861     aom_noise_model_t *noise_model, int c, const double *coeffs,
 862     const uint8_t *const data, const uint8_t *const denoised, int w, int h,
 863     int stride, int sub_log2[2], const uint8_t *const alt_data, int alt_stride,
 864     const uint8_t *const flat_blocks, int block_size, int num_blocks_w,
 865     int num_blocks_h) {
 866   const int num_coords = noise_model->n;
 867   aom_noise_strength_solver_t *noise_strength_solver =
 868       &noise_model->latest_state[c].strength_solver;
 869
 870   const aom_noise_strength_solver_t *noise_strength_luma =
 871       &noise_model->latest_state[0].strength_solver;
 872   const double luma_gain = noise_model->latest_state[0].ar_gain;
 873   const double noise_gain = noise_model->latest_state[c].ar_gain;
 874   for (int by = 0; by < num_blocks_h; ++by) {
 875     const int y_o = by * (block_size >> sub_log2[1]);
 876     for (int bx = 0; bx < num_blocks_w; ++bx) {
 877       const int x_o = bx * (block_size >> sub_log2[0]);
 878       if (!flat_blocks[by * num_blocks_w + bx]) {
 879         continue;
 880       }
 881       const int num_samples_h =
 882           AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
 883                  block_size >> sub_log2[1]);
 884       const int num_samples_w =
 885           AOMMIN((w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]),
 886                  (block_size >> sub_log2[0]));
 887       // Make sure that we have a reasonable amount of samples to consider the
 888       // block
 889       if (num_samples_w * num_samples_h > block_size) {
 890         const double block_mean = get_block_mean(
 891             alt_data ? alt_data : data, w, h, alt_data ? alt_stride : stride,
 892             x_o << sub_log2[0], y_o << sub_log2[1], block_size,
 893             noise_model->params.use_highbd);
 894         const double noise_var = get_noise_var(
 895             data, denoised, stride, w >> sub_log2[0], h >> sub_log2[1], x_o,
 896             y_o, block_size >> sub_log2[0], block_size >> sub_log2[1],
 897             noise_model->params.use_highbd);
 898         // We want to remove the part of the noise that came from being
 899         // correlated with luma. Note that the noise solver for luma must
 900         // have already been run.
 901         const double luma_strength =
 902             c > 0 ? luma_gain * noise_strength_solver_get_value(
 903                                     noise_strength_luma, block_mean)
 904                   : 0;
 905         const double corr = c > 0 ? coeffs[num_coords] : 0;
 906         // Chroma noise:
 907         //    N(0, noise_var) = N(0, uncorr_var) + corr * N(0, luma_strength^2)
 908         // The uncorrelated component:
 909         //   uncorr_var = noise_var - (corr * luma_strength)^2
 910         // But don't allow fully correlated noise (hence the max), since the
 911         // synthesis cannot model it.
 912         const double uncorr_std = sqrt(
 913             AOMMAX(noise_var / 16, noise_var - pow(corr * luma_strength, 2)));
 914         // After we've removed correlation with luma, undo the gain that will
 915         // come from running the IIR filter.
 916         const double adjusted_strength = uncorr_std / noise_gain;
 917         aom_noise_strength_solver_add_measurement(
 918             noise_strength_solver, block_mean, adjusted_strength);
 919       }
 920     }
 921   }
 922 }
 923
 924 // Return true if the noise estimate appears to be different from the combined
 925 // (multi-frame) estimate. The difference is measured by checking whether the
 926 // AR coefficients have diverged (using a threshold on normalized cross
 927 // correlation), or whether the noise strength has changed.
 928 static int is_noise_model_different(aom_noise_model_t *const noise_model) {
 929   // These thresholds are kind of arbitrary and will likely need further tuning
 930   // (or exported as parameters). The threshold on noise strength is a weighted
 931   // difference between the noise strength histograms
 932   const double kCoeffThreshold = 0.9;
 933   const double kStrengthThreshold =
 934       0.005 * (1 << (noise_model->params.bit_depth - 8));
 935   for (int c = 0; c < 1; ++c) {
 936     const double corr =
 937         aom_normalized_cross_correlation(noise_model->latest_state[c].eqns.x,
 938                                          noise_model->combined_state[c].eqns.x,
 939                                          noise_model->combined_state[c].eqns.n);
 940     if (corr < kCoeffThreshold) return 1;
 941
 942     const double dx =
 943         1.0 / noise_model->latest_state[c].strength_solver.num_bins;
 944
 945     const aom_equation_system_t *latest_eqns =
 946         &noise_model->latest_state[c].strength_solver.eqns;
 947     const aom_equation_system_t *combined_eqns =
 948         &noise_model->combined_state[c].strength_solver.eqns;
 949     double diff = 0;
 950     double total_weight = 0;
 951     for (int j = 0; j < latest_eqns->n; ++j) {
 952       double weight = 0;
 953       for (int i = 0; i < latest_eqns->n; ++i) {
 954         weight += latest_eqns->A[i * latest_eqns->n + j];
 955       }
 956       weight = sqrt(weight);
 957       diff += weight * fabs(latest_eqns->x[j] - combined_eqns->x[j]);
 958       total_weight += weight;
 959     }
 960     if (diff * dx / total_weight > kStrengthThreshold) return 1;
 961   }
 962   return 0;
 963 }
 964
 965 static int ar_equation_system_solve(aom_noise_state_t *state, int is_chroma) {
 966   const int ret = equation_system_solve(&state->eqns);
 967   state->ar_gain = 1.0;
 968   if (!ret) return ret;
 969
 970   // Update the AR gain from the equation system as it will be used to fit
 971   // the noise strength as a function of intensity.  In the Yule-Walker
 972   // equations, the diagonal should be the variance of the correlated noise.
 973   // In the case of the least squares estimate, there will be some variability
 974   // in the diagonal. So use the mean of the diagonal as the estimate of
 975   // overall variance (this works for least squares or Yule-Walker formulation).
 976   double var = 0;
 977   const int n = state->eqns.n;
 978   for (int i = 0; i < (state->eqns.n - is_chroma); ++i) {
 979     var += state->eqns.A[i * n + i] / state->num_observations;
 980   }
 981   var /= (n - is_chroma);
 982
 983   // Keep track of E(Y^2) = <b, x> + E(X^2)
 984   // In the case that we are using chroma and have an estimate of correlation
 985   // with luma we adjust that estimate slightly to remove the correlated bits by
 986   // subtracting out the last column of a scaled by our correlation estimate
 987   // from b. E(y^2) = <b - A(:, end)*x(end), x>
 988   double sum_covar = 0;
 989   for (int i = 0; i < state->eqns.n - is_chroma; ++i) {
 990     double bi = state->eqns.b[i];
 991     if (is_chroma) {
 992       bi -= state->eqns.A[i * n + (n - 1)] * state->eqns.x[n - 1];
 993     }
 994     sum_covar += (bi * state->eqns.x[i]) / state->num_observations;
 995   }
 996   // Now, get an estimate of the variance of uncorrelated noise signal and use
 997   // it to determine the gain of the AR filter.
 998   const double noise_var = AOMMAX(var - sum_covar, 1e-6);
 999   state->ar_gain = AOMMAX(1, sqrt(AOMMAX(var / noise_var, 1e-6)));
1000   return ret;
1001 }
1002
1003 aom_noise_status_t aom_noise_model_update(
1004     aom_noise_model_t *const noise_model, const uint8_t *const data[3],
1005     const uint8_t *const denoised[3], int w, int h, int stride[3],
1006     int chroma_sub_log2[2], const uint8_t *const flat_blocks, int block_size) {
1007   const int num_blocks_w = (w + block_size - 1) / block_size;
1008   const int num_blocks_h = (h + block_size - 1) / block_size;
1009   int y_model_different = 0;
1010   int num_blocks = 0;
1011   int i = 0, channel = 0;
1012
1013   if (block_size <= 1) {
1014     fprintf(stderr, "block_size = %d must be > 1\n", block_size);
1015     return AOM_NOISE_STATUS_INVALID_ARGUMENT;
1016   }
1017
1018   if (block_size < noise_model->params.lag * 2 + 1) {
1019     fprintf(stderr, "block_size = %d must be >= %d\n", block_size,
1020             noise_model->params.lag * 2 + 1);
1021     return AOM_NOISE_STATUS_INVALID_ARGUMENT;
1022   }
1023
1024   // Clear the latest equation system
1025   for (i = 0; i < 3; ++i) {
1026     equation_system_clear(&noise_model->latest_state[i].eqns);
1027     noise_model->latest_state[i].num_observations = 0;
1028     noise_strength_solver_clear(&noise_model->latest_state[i].strength_solver);
1029   }
1030
1031   // Check that we have enough flat blocks
1032   for (i = 0; i < num_blocks_h * num_blocks_w; ++i) {
1033     if (flat_blocks[i]) {
1034       num_blocks++;
1035     }
1036   }
1037
1038   if (num_blocks <= 1) {
1039     fprintf(stderr, "Not enough flat blocks to update noise estimate\n");
1040     return AOM_NOISE_STATUS_INSUFFICIENT_FLAT_BLOCKS;
1041   }
1042
1043   for (channel = 0; channel < 3; ++channel) {
1044     int no_subsampling[2] = { 0, 0 };
1045     const uint8_t *alt_data = channel > 0 ? data[0] : 0;
1046     const uint8_t *alt_denoised = channel > 0 ? denoised[0] : 0;
1047     int *sub = channel > 0 ? chroma_sub_log2 : no_subsampling;
1048     const int is_chroma = channel != 0;
1049     if (!data[channel] || !denoised[channel]) break;
1050     if (!add_block_observations(noise_model, channel, data[channel],
1051                                 denoised[channel], w, h, stride[channel], sub,
1052                                 alt_data, alt_denoised, stride[0], flat_blocks,
1053                                 block_size, num_blocks_w, num_blocks_h)) {
1054       fprintf(stderr, "Adding block observation failed\n");
1055       return AOM_NOISE_STATUS_INTERNAL_ERROR;
1056     }
1057
1058     if (!ar_equation_system_solve(&noise_model->latest_state[channel],
1059                                   is_chroma)) {
1060       if (is_chroma) {
1061         set_chroma_coefficient_fallback_soln(
1062             &noise_model->latest_state[channel].eqns);
1063       } else {
1064         fprintf(stderr, "Solving latest noise equation system failed %d!\n",
1065                 channel);
1066         return AOM_NOISE_STATUS_INTERNAL_ERROR;
1067       }
1068     }
1069
1070     add_noise_std_observations(
1071         noise_model, channel, noise_model->latest_state[channel].eqns.x,
1072         data[channel], denoised[channel], w, h, stride[channel], sub, alt_data,
1073         stride[0], flat_blocks, block_size, num_blocks_w, num_blocks_h);
1074
1075     if (!aom_noise_strength_solver_solve(
1076             &noise_model->latest_state[channel].strength_solver)) {
1077       fprintf(stderr, "Solving latest noise strength failed!\n");
1078       return AOM_NOISE_STATUS_INTERNAL_ERROR;
1079     }
1080
1081     // Check noise characteristics and return if error.
1082     if (channel == 0 &&
1083         noise_model->combined_state[channel].strength_solver.num_equations >
1084             0 &&
1085         is_noise_model_different(noise_model)) {
1086       y_model_different = 1;
1087     }
1088
1089     // Don't update the combined stats if the y model is different.
1090     if (y_model_different) continue;
1091
1092     noise_model->combined_state[channel].num_observations +=
1093         noise_model->latest_state[channel].num_observations;
1094     equation_system_add(&noise_model->combined_state[channel].eqns,
1095                         &noise_model->latest_state[channel].eqns);
1096     if (!ar_equation_system_solve(&noise_model->combined_state[channel],
1097                                   is_chroma)) {
1098       if (is_chroma) {
1099         set_chroma_coefficient_fallback_soln(
1100             &noise_model->combined_state[channel].eqns);
1101       } else {
1102         fprintf(stderr, "Solving combined noise equation system failed %d!\n",
1103                 channel);
1104         return AOM_NOISE_STATUS_INTERNAL_ERROR;
1105       }
1106     }
1107
1108     noise_strength_solver_add(
1109         &noise_model->combined_state[channel].strength_solver,
1110         &noise_model->latest_state[channel].strength_solver);
1111
1112     if (!aom_noise_strength_solver_solve(
1113             &noise_model->combined_state[channel].strength_solver)) {
1114       fprintf(stderr, "Solving combined noise strength failed!\n");
1115       return AOM_NOISE_STATUS_INTERNAL_ERROR;
1116     }
1117   }
1118
1119   return y_model_different ? AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE
1120                            : AOM_NOISE_STATUS_OK;
1121 }
1122
1123 void aom_noise_model_save_latest(aom_noise_model_t *noise_model) {
1124   for (int c = 0; c < 3; c++) {
1125     equation_system_copy(&noise_model->combined_state[c].eqns,
1126                          &noise_model->latest_state[c].eqns);
1127     equation_system_copy(&noise_model->combined_state[c].strength_solver.eqns,
1128                          &noise_model->latest_state[c].strength_solver.eqns);
1129     noise_model->combined_state[c].strength_solver.num_equations =
1130         noise_model->latest_state[c].strength_solver.num_equations;
1131     noise_model->combined_state[c].num_observations =
1132         noise_model->latest_state[c].num_observations;
1133     noise_model->combined_state[c].ar_gain =
1134         noise_model->latest_state[c].ar_gain;
1135   }
1136 }
1137
1138 int aom_noise_model_get_grain_parameters(aom_noise_model_t *const noise_model,
1139                                          aom_film_grain_t *film_grain) {
1140   if (noise_model->params.lag > 3) {
1141     fprintf(stderr, "params.lag = %d > 3\n", noise_model->params.lag);
1142     return 0;
1143   }
1144   uint16_t random_seed = film_grain->random_seed;
1145   memset(film_grain, 0, sizeof(*film_grain));
1146   film_grain->random_seed = random_seed;
1147
1148   film_grain->apply_grain = 1;
1149   film_grain->update_parameters = 1;
1150
1151   film_grain->ar_coeff_lag = noise_model->params.lag;
1152
1153   // Convert the scaling functions to 8 bit values
1154   aom_noise_strength_lut_t scaling_points[3];
1155   if (!aom_noise_strength_solver_fit_piecewise(
1156           &noise_model->combined_state[0].strength_solver, 14,
1157           scaling_points + 0)) {
1158     return 0;
1159   }
1160   if (!aom_noise_strength_solver_fit_piecewise(
1161           &noise_model->combined_state[1].strength_solver, 10,
1162           scaling_points + 1)) {
1163     aom_noise_strength_lut_free(scaling_points + 0);
1164     return 0;
1165   }
1166   if (!aom_noise_strength_solver_fit_piecewise(
1167           &noise_model->combined_state[2].strength_solver, 10,
1168           scaling_points + 2)) {
1169     aom_noise_strength_lut_free(scaling_points + 0);
1170     aom_noise_strength_lut_free(scaling_points + 1);
1171     return 0;
1172   }
1173
1174   // Both the domain and the range of the scaling functions in the film_grain
1175   // are normalized to 8-bit (e.g., they are implicitly scaled during grain
1176   // synthesis).
1177   const double strength_divisor = 1 << (noise_model->params.bit_depth - 8);
1178   double max_scaling_value = 1e-4;
1179   for (int c = 0; c < 3; ++c) {
1180     for (int i = 0; i < scaling_points[c].num_points; ++i) {
1181       scaling_points[c].points[i][0] =
1182           AOMMIN(255, scaling_points[c].points[i][0] / strength_divisor);
1183       scaling_points[c].points[i][1] =
1184           AOMMIN(255, scaling_points[c].points[i][1] / strength_divisor);
1185       max_scaling_value =
1186           AOMMAX(scaling_points[c].points[i][1], max_scaling_value);
1187     }
1188   }
1189
1190   // Scaling_shift values are in the range [8,11]
1191   const int max_scaling_value_log2 =
1192       clamp((int)floor(log2(max_scaling_value) + 1), 2, 5);
1193   film_grain->scaling_shift = 5 + (8 - max_scaling_value_log2);
1194
1195   const double scale_factor = 1 << (8 - max_scaling_value_log2);
1196   film_grain->num_y_points = scaling_points[0].num_points;
1197   film_grain->num_cb_points = scaling_points[1].num_points;
1198   film_grain->num_cr_points = scaling_points[2].num_points;
1199
1200   int(*film_grain_scaling[3])[2] = {
1201     film_grain->scaling_points_y,
1202     film_grain->scaling_points_cb,
1203     film_grain->scaling_points_cr,
1204   };
1205   for (int c = 0; c < 3; c++) {
1206     for (int i = 0; i < scaling_points[c].num_points; ++i) {
1207       film_grain_scaling[c][i][0] = (int)(scaling_points[c].points[i][0] + 0.5);
1208       film_grain_scaling[c][i][1] = clamp(
1209           (int)(scale_factor * scaling_points[c].points[i][1] + 0.5), 0, 255);
1210     }
1211   }
1212   aom_noise_strength_lut_free(scaling_points + 0);
1213   aom_noise_strength_lut_free(scaling_points + 1);
1214   aom_noise_strength_lut_free(scaling_points + 2);
1215
1216   // Convert the ar_coeffs into 8-bit values
1217   const int n_coeff = noise_model->combined_state[0].eqns.n;
1218   double max_coeff = 1e-4, min_coeff = -1e-4;
1219   double y_corr[2] = { 0, 0 };
1220   double avg_luma_strength = 0;
1221   for (int c = 0; c < 3; c++) {
1222     aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
1223     for (int i = 0; i < n_coeff; ++i) {
1224       max_coeff = AOMMAX(max_coeff, eqns->x[i]);
1225       min_coeff = AOMMIN(min_coeff, eqns->x[i]);
1226     }
1227     // Since the correlation between luma/chroma was computed in an already
1228     // scaled space, we adjust it in the un-scaled space.
1229     aom_noise_strength_solver_t *solver =
1230         &noise_model->combined_state[c].strength_solver;
1231     // Compute a weighted average of the strength for the channel.
1232     double average_strength = 0, total_weight = 0;
1233     for (int i = 0; i < solver->eqns.n; ++i) {
1234       double w = 0;
1235       for (int j = 0; j < solver->eqns.n; ++j) {
1236         w += solver->eqns.A[i * solver->eqns.n + j];
1237       }
1238       w = sqrt(w);
1239       average_strength += solver->eqns.x[i] * w;
1240       total_weight += w;
1241     }
1242     if (total_weight == 0)
1243       average_strength = 1;
1244     else
1245       average_strength /= total_weight;
1246     if (c == 0) {
1247       avg_luma_strength = average_strength;
1248     } else {
1249       y_corr[c - 1] = avg_luma_strength * eqns->x[n_coeff] / average_strength;
1250       max_coeff = AOMMAX(max_coeff, y_corr[c - 1]);
1251       min_coeff = AOMMIN(min_coeff, y_corr[c - 1]);
1252     }
1253   }
1254   // Shift value: AR coeffs range (values 6-9)
1255   // 6: [-2, 2),  7: [-1, 1), 8: [-0.5, 0.5), 9: [-0.25, 0.25)
1256   film_grain->ar_coeff_shift =
1257       clamp(7 - (int)AOMMAX(1 + floor(log2(max_coeff)), ceil(log2(-min_coeff))),
1258             6, 9);
1259   double scale_ar_coeff = 1 << film_grain->ar_coeff_shift;
1260   int *ar_coeffs[3] = {
1261     film_grain->ar_coeffs_y,
1262     film_grain->ar_coeffs_cb,
1263     film_grain->ar_coeffs_cr,
1264   };
1265   for (int c = 0; c < 3; ++c) {
1266     aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
1267     for (int i = 0; i < n_coeff; ++i) {
1268       ar_coeffs[c][i] =
1269           clamp((int)round(scale_ar_coeff * eqns->x[i]), -128, 127);
1270     }
1271     if (c > 0) {
1272       ar_coeffs[c][n_coeff] =
1273           clamp((int)round(scale_ar_coeff * y_corr[c - 1]), -128, 127);
1274     }
1275   }
1276
1277   // At the moment, the noise modeling code assumes that the chroma scaling
1278   // functions are a function of luma.
1279   film_grain->cb_mult = 128;       // 8 bits
1280   film_grain->cb_luma_mult = 192;  // 8 bits
1281   film_grain->cb_offset = 256;     // 9 bits
1282
1283   film_grain->cr_mult = 128;       // 8 bits
1284   film_grain->cr_luma_mult = 192;  // 8 bits
1285   film_grain->cr_offset = 256;     // 9 bits
1286
1287   film_grain->chroma_scaling_from_luma = 0;
1288   film_grain->grain_scale_shift = 0;
1289   film_grain->overlap_flag = 1;
1290   return 1;
1291 }
1292
1293 static void pointwise_multiply(const float *a, float *b, int n) {
1294   for (int i = 0; i < n; ++i) {
1295     b[i] *= a[i];
1296   }
1297 }
1298
1299 static float *get_half_cos_window(int block_size) {
1300   float *window_function =
1301       (float *)aom_malloc(block_size * block_size * sizeof(*window_function));
1302   for (int y = 0; y < block_size; ++y) {
1303     const double cos_yd = cos((.5 + y) * PI / block_size - PI / 2);
1304     for (int x = 0; x < block_size; ++x) {
1305       const double cos_xd = cos((.5 + x) * PI / block_size - PI / 2);
1306       window_function[y * block_size + x] = (float)(cos_yd * cos_xd);
1307     }
1308   }
1309   return window_function;
1310 }
1311
1312 #define DITHER_AND_QUANTIZE(INT_TYPE, suffix)                               \
1313   static void dither_and_quantize_##suffix(                                 \
1314       float *result, int result_stride, INT_TYPE *denoised, int w, int h,   \
1315       int stride, int chroma_sub_w, int chroma_sub_h, int block_size,       \
1316       float block_normalization) {                                          \
1317     for (int y = 0; y < (h >> chroma_sub_h); ++y) {                         \
1318       for (int x = 0; x < (w >> chroma_sub_w); ++x) {                       \
1319         const int result_idx =                                              \
1320             (y + (block_size >> chroma_sub_h)) * result_stride + x +        \
1321             (block_size >> chroma_sub_w);                                   \
1322         INT_TYPE new_val = (INT_TYPE)AOMMIN(                                \
1323             AOMMAX(result[result_idx] * block_normalization + 0.5f, 0),     \
1324             block_normalization);                                           \
1325         const float err =                                                   \
1326             -(((float)new_val) / block_normalization - result[result_idx]); \
1327         denoised[y * stride + x] = new_val;                                 \
1328         if (x + 1 < (w >> chroma_sub_w)) {                                  \
1329           result[result_idx + 1] += err * 7.0f / 16.0f;                     \
1330         }                                                                   \
1331         if (y + 1 < (h >> chroma_sub_h)) {                                  \
1332           if (x > 0) {                                                      \
1333             result[result_idx + result_stride - 1] += err * 3.0f / 16.0f;   \
1334           }                                                                 \
1335           result[result_idx + result_stride] += err * 5.0f / 16.0f;         \
1336           if (x + 1 < (w >> chroma_sub_w)) {                                \
1337             result[result_idx + result_stride + 1] += err * 1.0f / 16.0f;   \
1338           }                                                                 \
1339         }                                                                   \
1340       }                                                                     \
1341     }                                                                       \
1342   }
1343
1344 DITHER_AND_QUANTIZE(uint8_t, lowbd);
1345 DITHER_AND_QUANTIZE(uint16_t, highbd);
1346
1347 int aom_wiener_denoise_2d(const uint8_t *const data[3], uint8_t *denoised[3],
1348                           int w, int h, int stride[3], int chroma_sub[2],
1349                           float *noise_psd[3], int block_size, int bit_depth,
1350                           int use_highbd) {
1351   float *plane = NULL, *block = NULL, *window_full = NULL,
1352         *window_chroma = NULL;
1353   double *block_d = NULL, *plane_d = NULL;
1354   struct aom_noise_tx_t *tx_full = NULL;
1355   struct aom_noise_tx_t *tx_chroma = NULL;
1356   const int num_blocks_w = (w + block_size - 1) / block_size;
1357   const int num_blocks_h = (h + block_size - 1) / block_size;
1358   const int result_stride = (num_blocks_w + 2) * block_size;
1359   const int result_height = (num_blocks_h + 2) * block_size;
1360   float *result = NULL;
1361   int init_success = 1;
1362   aom_flat_block_finder_t block_finder_full;
1363   aom_flat_block_finder_t block_finder_chroma;
1364   const float kBlockNormalization = (float)((1 << bit_depth) - 1);
1365   if (chroma_sub[0] != chroma_sub[1]) {
1366     fprintf(stderr,
1367             "aom_wiener_denoise_2d doesn't handle different chroma "
1368             "subsampling\n");
1369     return 0;
1370   }
1371   init_success &= aom_flat_block_finder_init(&block_finder_full, block_size,
1372                                              bit_depth, use_highbd);
1373   result = (float *)aom_malloc((num_blocks_h + 2) * block_size * result_stride *
1374                                sizeof(*result));
1375   plane = (float *)aom_malloc(block_size * block_size * sizeof(*plane));
1376   block =
1377       (float *)aom_memalign(32, 2 * block_size * block_size * sizeof(*block));
1378   block_d = (double *)aom_malloc(block_size * block_size * sizeof(*block_d));
1379   plane_d = (double *)aom_malloc(block_size * block_size * sizeof(*plane_d));
1380   window_full = get_half_cos_window(block_size);
1381   tx_full = aom_noise_tx_malloc(block_size);
1382
1383   if (chroma_sub[0] != 0) {
1384     init_success &= aom_flat_block_finder_init(&block_finder_chroma,
1385                                                block_size >> chroma_sub[0],
1386                                                bit_depth, use_highbd);
1387     window_chroma = get_half_cos_window(block_size >> chroma_sub[0]);
1388     tx_chroma = aom_noise_tx_malloc(block_size >> chroma_sub[0]);
1389   } else {
1390     window_chroma = window_full;
1391     tx_chroma = tx_full;
1392   }
1393
1394   init_success &= (tx_full != NULL) && (tx_chroma != NULL) && (plane != NULL) &&
1395                   (plane_d != NULL) && (block != NULL) && (block_d != NULL) &&
1396                   (window_full != NULL) && (window_chroma != NULL) &&
1397                   (result != NULL);
1398   for (int c = init_success ? 0 : 3; c < 3; ++c) {
1399     float *window_function = c == 0 ? window_full : window_chroma;
1400     aom_flat_block_finder_t *block_finder = &block_finder_full;
1401     const int chroma_sub_h = c > 0 ? chroma_sub[1] : 0;
1402     const int chroma_sub_w = c > 0 ? chroma_sub[0] : 0;
1403     struct aom_noise_tx_t *tx =
1404         (c > 0 && chroma_sub[0] > 0) ? tx_chroma : tx_full;
1405     if (!data[c] || !denoised[c]) continue;
1406     if (c > 0 && chroma_sub[0] != 0) {
1407       block_finder = &block_finder_chroma;
1408     }
1409     memset(result, 0, sizeof(*result) * result_stride * result_height);
1410     // Do overlapped block processing (half overlapped). The block rows can
1411     // easily be done in parallel
1412     for (int offsy = 0; offsy < (block_size >> chroma_sub_h);
1413          offsy += (block_size >> chroma_sub_h) / 2) {
1414       for (int offsx = 0; offsx < (block_size >> chroma_sub_w);
1415            offsx += (block_size >> chroma_sub_w) / 2) {
1416         // Pad the boundary when processing each block-set.
1417         for (int by = -1; by < num_blocks_h; ++by) {
1418           for (int bx = -1; bx < num_blocks_w; ++bx) {
1419             const int pixels_per_block =
1420                 (block_size >> chroma_sub_w) * (block_size >> chroma_sub_h);
1421             aom_flat_block_finder_extract_block(
1422                 block_finder, data[c], w >> chroma_sub_w, h >> chroma_sub_h,
1423                 stride[c], bx * (block_size >> chroma_sub_w) + offsx,
1424                 by * (block_size >> chroma_sub_h) + offsy, plane_d, block_d);
1425             for (int j = 0; j < pixels_per_block; ++j) {
1426               block[j] = (float)block_d[j];
1427               plane[j] = (float)plane_d[j];
1428             }
1429             pointwise_multiply(window_function, block, pixels_per_block);
1430             aom_noise_tx_forward(tx, block);
1431             aom_noise_tx_filter(tx, noise_psd[c]);
1432             aom_noise_tx_inverse(tx, block);
1433
1434             // Apply window function to the plane approximation (we will apply
1435             // it to the sum of plane + block when composing the results).
1436             pointwise_multiply(window_function, plane, pixels_per_block);
1437
1438             for (int y = 0; y < (block_size >> chroma_sub_h); ++y) {
1439               const int y_result =
1440                   y + (by + 1) * (block_size >> chroma_sub_h) + offsy;
1441               for (int x = 0; x < (block_size >> chroma_sub_w); ++x) {
1442                 const int x_result =
1443                     x + (bx + 1) * (block_size >> chroma_sub_w) + offsx;
1444                 result[y_result * result_stride + x_result] +=
1445                     (block[y * (block_size >> chroma_sub_w) + x] +
1446                      plane[y * (block_size >> chroma_sub_w) + x]) *
1447                     window_function[y * (block_size >> chroma_sub_w) + x];
1448               }
1449             }
1450           }
1451         }
1452       }
1453     }
1454     if (use_highbd) {
1455       dither_and_quantize_highbd(result, result_stride, (uint16_t *)denoised[c],
1456                                  w, h, stride[c], chroma_sub_w, chroma_sub_h,
1457                                  block_size, kBlockNormalization);
1458     } else {
1459       dither_and_quantize_lowbd(result, result_stride, denoised[c], w, h,
1460                                 stride[c], chroma_sub_w, chroma_sub_h,
1461                                 block_size, kBlockNormalization);
1462     }
1463   }
1464   aom_free(result);
1465   aom_free(plane);
1466   aom_free(block);
1467   aom_free(plane_d);
1468   aom_free(block_d);
1469   aom_free(window_full);
1470
1471   aom_noise_tx_free(tx_full);
1472
1473   aom_flat_block_finder_free(&block_finder_full);
1474   if (chroma_sub[0] != 0) {
1475     aom_flat_block_finder_free(&block_finder_chroma);
1476     aom_free(window_chroma);
1477     aom_noise_tx_free(tx_chroma);
1478   }
1479   return init_success;
1480 }
1481
1482 struct aom_denoise_and_model_t {
1483   int block_size;
1484   int bit_depth;
1485   float noise_level;
1486
1487   // Size of current denoised buffer and flat_block buffer
1488   int width;
1489   int height;
1490   int y_stride;
1491   int uv_stride;
1492   int num_blocks_w;
1493   int num_blocks_h;
1494
1495   // Buffers for image and noise_psd allocated on the fly
1496   float *noise_psd[3];
1497   uint8_t *denoised[3];
1498   uint8_t *flat_blocks;
1499
1500   aom_flat_block_finder_t flat_block_finder;
1501   aom_noise_model_t noise_model;
1502 };
1503
1504 struct aom_denoise_and_model_t *aom_denoise_and_model_alloc(int bit_depth,
1505                                                             int block_size,
1506                                                             float noise_level) {
1507   struct aom_denoise_and_model_t *ctx =
1508       (struct aom_denoise_and_model_t *)aom_malloc(
1509           sizeof(struct aom_denoise_and_model_t));
1510   if (!ctx) {
1511     fprintf(stderr, "Unable to allocate denoise_and_model struct\n");
1512     return NULL;
1513   }
1514   memset(ctx, 0, sizeof(*ctx));
1515
1516   ctx->block_size = block_size;
1517   ctx->noise_level = noise_level;
1518   ctx->bit_depth = bit_depth;
1519
1520   ctx->noise_psd[0] =
1521       aom_malloc(sizeof(*ctx->noise_psd[0]) * block_size * block_size);
1522   ctx->noise_psd[1] =
1523       aom_malloc(sizeof(*ctx->noise_psd[1]) * block_size * block_size);
1524   ctx->noise_psd[2] =
1525       aom_malloc(sizeof(*ctx->noise_psd[2]) * block_size * block_size);
1526   if (!ctx->noise_psd[0] || !ctx->noise_psd[1] || !ctx->noise_psd[2]) {
1527     fprintf(stderr, "Unable to allocate noise PSD buffers\n");
1528     aom_denoise_and_model_free(ctx);
1529     return NULL;
1530   }
1531   return ctx;
1532 }
1533
1534 void aom_denoise_and_model_free(struct aom_denoise_and_model_t *ctx) {
1535   aom_free(ctx->flat_blocks);
1536   for (int i = 0; i < 3; ++i) {
1537     aom_free(ctx->denoised[i]);
1538     aom_free(ctx->noise_psd[i]);
1539   }
1540   aom_noise_model_free(&ctx->noise_model);
1541   aom_flat_block_finder_free(&ctx->flat_block_finder);
1542   aom_free(ctx);
1543 }
1544
1545 static int denoise_and_model_realloc_if_necessary(
1546     struct aom_denoise_and_model_t *ctx, YV12_BUFFER_CONFIG *sd) {
1547   if (ctx->width == sd->y_width && ctx->height == sd->y_height &&
1548       ctx->y_stride == sd->y_stride && ctx->uv_stride == sd->uv_stride)
1549     return 1;
1550   const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
1551   const int block_size = ctx->block_size;
1552
1553   ctx->width = sd->y_width;
1554   ctx->height = sd->y_height;
1555   ctx->y_stride = sd->y_stride;
1556   ctx->uv_stride = sd->uv_stride;
1557
1558   for (int i = 0; i < 3; ++i) {
1559     aom_free(ctx->denoised[i]);
1560     ctx->denoised[i] = NULL;
1561   }
1562   aom_free(ctx->flat_blocks);
1563   ctx->flat_blocks = NULL;
1564
1565   ctx->denoised[0] = aom_malloc((sd->y_stride * sd->y_height) << use_highbd);
1566   ctx->denoised[1] = aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
1567   ctx->denoised[2] = aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
1568   if (!ctx->denoised[0] || !ctx->denoised[1] || !ctx->denoised[2]) {
1569     fprintf(stderr, "Unable to allocate denoise buffers\n");
1570     return 0;
1571   }
1572   ctx->num_blocks_w = (sd->y_width + ctx->block_size - 1) / ctx->block_size;
1573   ctx->num_blocks_h = (sd->y_height + ctx->block_size - 1) / ctx->block_size;
1574   ctx->flat_blocks = aom_malloc(ctx->num_blocks_w * ctx->num_blocks_h);
1575
1576   aom_flat_block_finder_free(&ctx->flat_block_finder);
1577   if (!aom_flat_block_finder_init(&ctx->flat_block_finder, ctx->block_size,
1578                                   ctx->bit_depth, use_highbd)) {
1579     fprintf(stderr, "Unable to init flat block finder\n");
1580     return 0;
1581   }
1582
1583   const aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3,
1584                                             ctx->bit_depth, use_highbd };
1585   aom_noise_model_free(&ctx->noise_model);
1586   if (!aom_noise_model_init(&ctx->noise_model, params)) {
1587     fprintf(stderr, "Unable to init noise model\n");
1588     return 0;
1589   }
1590
1591   // Simply use a flat PSD (although we could use the flat blocks to estimate
1592   // PSD) those to estimate an actual noise PSD)
1593   const float y_noise_level =
1594       aom_noise_psd_get_default_value(ctx->block_size, ctx->noise_level);
1595   const float uv_noise_level = aom_noise_psd_get_default_value(
1596       ctx->block_size >> sd->subsampling_x, ctx->noise_level);
1597   for (int i = 0; i < block_size * block_size; ++i) {
1598     ctx->noise_psd[0][i] = y_noise_level;
1599     ctx->noise_psd[1][i] = ctx->noise_psd[2][i] = uv_noise_level;
1600   }
1601   return 1;
1602 }
1603
1604 int aom_denoise_and_model_run(struct aom_denoise_and_model_t *ctx,
1605                               YV12_BUFFER_CONFIG *sd,
1606                               aom_film_grain_t *film_grain, int apply_denoise) {
1607   const int block_size = ctx->block_size;
1608   const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
1609   uint8_t *raw_data[3] = {
1610     use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->y_buffer) : sd->y_buffer,
1611     use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->u_buffer) : sd->u_buffer,
1612     use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->v_buffer) : sd->v_buffer,
1613   };
1614   const uint8_t *const data[3] = { raw_data[0], raw_data[1], raw_data[2] };
1615   int strides[3] = { sd->y_stride, sd->uv_stride, sd->uv_stride };
1616   int chroma_sub_log2[2] = { sd->subsampling_x, sd->subsampling_y };
1617
1618   if (!denoise_and_model_realloc_if_necessary(ctx, sd)) {
1619     fprintf(stderr, "Unable to realloc buffers\n");
1620     return 0;
1621   }
1622
1623   aom_flat_block_finder_run(&ctx->flat_block_finder, data[0], sd->y_width,
1624                             sd->y_height, strides[0], ctx->flat_blocks);
1625
1626   if (!aom_wiener_denoise_2d(data, ctx->denoised, sd->y_width, sd->y_height,
1627                              strides, chroma_sub_log2, ctx->noise_psd,
1628                              block_size, ctx->bit_depth, use_highbd)) {
1629     fprintf(stderr, "Unable to denoise image\n");
1630     return 0;
1631   }
1632
1633   const aom_noise_status_t status = aom_noise_model_update(
1634       &ctx->noise_model, data, (const uint8_t *const *)ctx->denoised,
1635       sd->y_width, sd->y_height, strides, chroma_sub_log2, ctx->flat_blocks,
1636       block_size);
1637   int have_noise_estimate = 0;
1638   if (status == AOM_NOISE_STATUS_OK) {
1639     have_noise_estimate = 1;
1640   } else if (status == AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE) {
1641     aom_noise_model_save_latest(&ctx->noise_model);
1642     have_noise_estimate = 1;
1643   } else {
1644     // Unable to update noise model; proceed if we have a previous estimate.
1645     have_noise_estimate =
1646         (ctx->noise_model.combined_state[0].strength_solver.num_equations > 0);
1647   }
1648
1649   film_grain->apply_grain = 0;
1650   if (have_noise_estimate) {
1651     if (!aom_noise_model_get_grain_parameters(&ctx->noise_model, film_grain)) {
1652       fprintf(stderr, "Unable to get grain parameters.\n");
1653       return 0;
1654     }
1655     if (!film_grain->random_seed) {
1656       film_grain->random_seed = 7391;
1657     }
1658     if (apply_denoise) {
1659       memcpy(raw_data[0], ctx->denoised[0],
1660              (strides[0] * sd->y_height) << use_highbd);
1661       memcpy(raw_data[1], ctx->denoised[1],
1662              (strides[1] * sd->uv_height) << use_highbd);
1663       memcpy(raw_data[2], ctx->denoised[2],
1664              (strides[2] * sd->uv_height) << use_highbd);
1665     }
1666   }
1667   return 1;
1668 }