LCOV - coverage.info - Codec/EbCdef.c

LCOV - code coverage report

Current view:	top level - Codec - EbCdef.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	382	1249	30.6 %
Date:	2019-11-25 17:38:06	Functions:	20	41	48.8 %

          Line data    Source code

       1             : /*
       2             :  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : #include <stdio.h>
      12             : #include <stdlib.h>
      13             : #include <math.h>
      14             : #include <string.h>
      15             : 
      16             : #include "EbCdef.h"
      17             : #include "stdint.h"
      18             : #include "EbCodingUnit.h"
      19             : #include "EbEncDecProcess.h"
      20             : #include "aom_dsp_rtcd.h"
      21             : 
      22             : extern int16_t eb_av1_ac_quant_Q3(int32_t qindex, int32_t delta, AomBitDepth bit_depth);
      23             : 
      24             : //-------memory stuff
      25             : 
      26             : #define ADDRESS_STORAGE_SIZE sizeof(size_t)
      27             : #define DEFAULT_ALIGNMENT (2 * sizeof(void *))
      28             : #define AOM_MAX_ALLOCABLE_MEMORY 8589934592  // 8 GB
      29             : /*returns an addr aligned to the byte boundary specified by align*/
      30             : #define align_addr(addr, align) \
      31             :   (void *)(((size_t)(addr) + ((align)-1)) & ~(size_t)((align)-1))
      32             : 
      33             : // Returns 0 in case of overflow of nmemb * size.
      34        3562 : static int32_t check_size_argument_overflow(uint64_t nmemb, uint64_t size) {
      35        3562 :     const uint64_t total_size = nmemb * size;
      36        3562 :     if (nmemb == 0) return 1;
      37        3562 :     if (size > AOM_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
      38             :     if (total_size != (size_t)total_size) return 0;
      39        3562 :     return 1;
      40             : }
      41             : 
      42        3562 : static size_t GetAlignedMallocSize(size_t size, size_t align) {
      43        3562 :     return size + align - 1 + ADDRESS_STORAGE_SIZE;
      44             : }
      45             : 
      46        7124 : static size_t *GetMallocAddressLocation(void *const mem) {
      47        7124 :     return ((size_t *)mem) - 1;
      48             : }
      49             : 
      50        3562 : static void SetActualMallocAddress(void *const mem,
      51             :     const void *const malloc_addr) {
      52        3562 :     size_t *const malloc_addr_location = GetMallocAddressLocation(mem);
      53        3562 :     *malloc_addr_location = (size_t)malloc_addr;
      54        3562 : }
      55             : 
      56        3562 : static void *GetActualMallocAddress(void *const mem) {
      57        3562 :     const size_t *const malloc_addr_location = GetMallocAddressLocation(mem);
      58        3562 :     return (void *)(*malloc_addr_location);
      59             : }
      60             : 
      61        3562 : void *eb_aom_memalign(size_t align, size_t size) {
      62        3562 :     void *x = NULL;
      63        3562 :     const size_t aligned_size = GetAlignedMallocSize(size, align);
      64             : #if defined(AOM_MAX_ALLOCABLE_MEMORY)
      65        3562 :     if (!check_size_argument_overflow(1, aligned_size)) return NULL;
      66             : #endif
      67        3562 :     void *const addr = malloc(aligned_size);
      68        3562 :     if (addr) {
      69        3562 :         x = align_addr((uint8_t *)addr + ADDRESS_STORAGE_SIZE, align);
      70        3562 :         SetActualMallocAddress(x, addr);
      71             :     }
      72        3562 :     return x;
      73             : }
      74             : 
      75        3202 : void *eb_aom_malloc(size_t size) { return eb_aom_memalign(DEFAULT_ALIGNMENT, size); }
      76             : 
      77        3562 : void eb_aom_free(void *memblk) {
      78        3562 :     if (memblk) {
      79        3562 :         void *addr = GetActualMallocAddress(memblk);
      80        3562 :         free(addr);
      81             :     }
      82        3562 : }
      83             : 
      84           0 : void *eb_aom_memset16(void *dest, int32_t val, size_t length) {
      85             :     size_t i;
      86           0 :     uint16_t *dest16 = (uint16_t *)dest;
      87           0 :     for (i = 0; i < length; i++) *dest16++ = (uint16_t)val;
      88           0 :     return dest;
      89             : }
      90             : //-------------------------------
      91             : 
      92             : extern INLINE int32_t get_msb(uint32_t n);
      93             : 
      94           0 : static INLINE int32_t sign(int32_t i) { return i < 0 ? -1 : 1; }
      95           0 : static INLINE int32_t constrain(int32_t diff, int32_t threshold, int32_t damping) {
      96           0 :     if (!threshold) return 0;
      97             : 
      98           0 :     const int32_t shift = AOMMAX(0, damping - get_msb(threshold));
      99           0 :     return sign(diff) *
     100           0 :         AOMMIN(abs(diff), AOMMAX(0, threshold - (abs(diff) >> shift)));
     101             : }
     102             : 
     103             : /* Generated from gen_filter_tables.c. */
     104             : DECLARE_ALIGNED(16, const int32_t, eb_cdef_directions[8][2]) = {
     105             :     { -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2 },
     106             :     { 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2 },
     107             :     { 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2 },
     108             :     { 0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2 },
     109             :     { 1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2 },
     110             :     { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1 },
     111             :     { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0 },
     112             :     { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1 }
     113             : };
     114             : 
     115             : /* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
     116             : The search minimizes the weighted variance along all the lines in a
     117             : particular direction, i.e. the squared error between the input and a
     118             : "predicted" block where each pixel is replaced by the average along a line
     119             : in a particular direction. Since each direction have the same sum(x^2) term,
     120             : that term is never computed. See Section 2, step 2, of:
     121             : http://jmvalin.ca/notes/intra_paint.pdf */
     122           0 : int32_t eb_cdef_find_dir_c(const uint16_t *img, int32_t stride, int32_t *var,
     123             :     int32_t coeff_shift) {
     124             :     int32_t i;
     125           0 :     int32_t cost[8] = { 0 };
     126           0 :     int32_t partial[8][15] = { { 0 } };
     127           0 :     int32_t best_cost = 0;
     128           0 :     int32_t best_dir = 0;
     129             :     /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
     130             :     The output is then 840 times larger, but we don't care for finding
     131             :     the max. */
     132             :     static const int32_t div_table[] = { 0, 840, 420, 280, 210, 168, 140, 120, 105 };
     133           0 :     for (i = 0; i < 8; i++) {
     134             :         int32_t j;
     135           0 :         for (j = 0; j < 8; j++) {
     136             :             int32_t x;
     137             :             /* We subtract 128 here to reduce the maximum range of the squared
     138             :             partial sums. */
     139           0 :             x = (img[i * stride + j] >> coeff_shift) - 128;
     140           0 :             partial[0][i + j] += x;
     141           0 :             partial[1][i + j / 2] += x;
     142           0 :             partial[2][i] += x;
     143           0 :             partial[3][3 + i - j / 2] += x;
     144           0 :             partial[4][7 + i - j] += x;
     145           0 :             partial[5][3 - i / 2 + j] += x;
     146           0 :             partial[6][j] += x;
     147           0 :             partial[7][i / 2 + j] += x;
     148             :         }
     149             :     }
     150           0 :     for (i = 0; i < 8; i++) {
     151           0 :         cost[2] += partial[2][i] * partial[2][i];
     152           0 :         cost[6] += partial[6][i] * partial[6][i];
     153             :     }
     154           0 :     cost[2] *= div_table[8];
     155           0 :     cost[6] *= div_table[8];
     156           0 :     for (i = 0; i < 7; i++) {
     157           0 :         cost[0] += (partial[0][i] * partial[0][i] +
     158           0 :             partial[0][14 - i] * partial[0][14 - i]) *
     159           0 :             div_table[i + 1];
     160           0 :         cost[4] += (partial[4][i] * partial[4][i] +
     161           0 :             partial[4][14 - i] * partial[4][14 - i]) *
     162           0 :             div_table[i + 1];
     163             :     }
     164           0 :     cost[0] += partial[0][7] * partial[0][7] * div_table[8];
     165           0 :     cost[4] += partial[4][7] * partial[4][7] * div_table[8];
     166           0 :     for (i = 1; i < 8; i += 2) {
     167             :         int32_t j;
     168           0 :         for (j = 0; j < 4 + 1; j++)
     169           0 :             cost[i] += partial[i][3 + j] * partial[i][3 + j];
     170           0 :         cost[i] *= div_table[8];
     171           0 :         for (j = 0; j < 4 - 1; j++) {
     172           0 :             cost[i] += (partial[i][j] * partial[i][j] +
     173           0 :                 partial[i][10 - j] * partial[i][10 - j]) *
     174           0 :                 div_table[2 * j + 2];
     175             :         }
     176             :     }
     177           0 :     for (i = 0; i < 8; i++) {
     178           0 :         if (cost[i] > best_cost) {
     179           0 :             best_cost = cost[i];
     180           0 :             best_dir = i;
     181             :         }
     182             :     }
     183             :     /* Difference between the optimal variance and the variance along the
     184             :     orthogonal direction. Again, the sum(x^2) terms cancel out. */
     185           0 :     *var = best_cost - cost[(best_dir + 4) & 7];
     186             :     /* We'd normally divide by 840, but dividing by 1024 is close enough
     187             :     for what we're going to do with this. */
     188           0 :     *var >>= 10;
     189           0 :     return best_dir;
     190             : }
     191             : 
     192             : const int32_t eb_cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } };
     193             : const int32_t eb_cdef_sec_taps[2][2] = { { 2, 1 }, { 2, 1 } };
     194             : 
     195             : /* Smooth in the direction detected. */
     196           0 : void eb_cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int32_t dstride,
     197             :     const uint16_t *in, int32_t pri_strength, int32_t sec_strength,
     198             :     int32_t dir, int32_t pri_damping, int32_t sec_damping, int32_t bsize,
     199             :     int32_t coeff_shift) {
     200             :     int32_t i, j, k;
     201           0 :     const int32_t s = CDEF_BSTRIDE;
     202           0 :     const int32_t *pri_taps = eb_cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
     203           0 :     const int32_t *sec_taps = eb_cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
     204             : 
     205           0 :     for (i = 0; i < (4 << (int32_t)(bsize == BLOCK_8X8 || bsize == BLOCK_4X8)); i++) {
     206           0 :         for (j = 0; j < (4 << (int32_t)(bsize == BLOCK_8X8 || bsize == BLOCK_8X4)); j++) {
     207           0 :             int16_t sum = 0;
     208             :             int16_t y;
     209           0 :             int16_t x = in[i * s + j];
     210           0 :             int32_t max = x;
     211           0 :             int32_t min = x;
     212           0 :             for (k = 0; k < 2; k++) {
     213           0 :                 int16_t p0 = in[i * s + j + eb_cdef_directions[dir][k]];
     214           0 :                 int16_t p1 = in[i * s + j - eb_cdef_directions[dir][k]];
     215           0 :                 sum += (int16_t)(pri_taps[k] * constrain(p0 - x, pri_strength, pri_damping));
     216           0 :                 sum += (int16_t)(pri_taps[k] * constrain(p1 - x, pri_strength, pri_damping));
     217           0 :                 if (p0 != CDEF_VERY_LARGE) max = AOMMAX(p0, max);
     218           0 :                 if (p1 != CDEF_VERY_LARGE) max = AOMMAX(p1, max);
     219           0 :                 min = AOMMIN(p0, min);
     220           0 :                 min = AOMMIN(p1, min);
     221           0 :                 int16_t s0 = in[i * s + j + eb_cdef_directions[(dir + 2) & 7][k]];
     222           0 :                 int16_t s1 = in[i * s + j - eb_cdef_directions[(dir + 2) & 7][k]];
     223           0 :                 int16_t s2 = in[i * s + j + eb_cdef_directions[(dir + 6) & 7][k]];
     224           0 :                 int16_t s3 = in[i * s + j - eb_cdef_directions[(dir + 6) & 7][k]];
     225           0 :                 if (s0 != CDEF_VERY_LARGE) max = AOMMAX(s0, max);
     226           0 :                 if (s1 != CDEF_VERY_LARGE) max = AOMMAX(s1, max);
     227           0 :                 if (s2 != CDEF_VERY_LARGE) max = AOMMAX(s2, max);
     228           0 :                 if (s3 != CDEF_VERY_LARGE) max = AOMMAX(s3, max);
     229           0 :                 min = AOMMIN(s0, min);
     230           0 :                 min = AOMMIN(s1, min);
     231           0 :                 min = AOMMIN(s2, min);
     232           0 :                 min = AOMMIN(s3, min);
     233           0 :                 sum += (int16_t)(sec_taps[k] * constrain(s0 - x, sec_strength, sec_damping));
     234           0 :                 sum += (int16_t)(sec_taps[k] * constrain(s1 - x, sec_strength, sec_damping));
     235           0 :                 sum += (int16_t)(sec_taps[k] * constrain(s2 - x, sec_strength, sec_damping));
     236           0 :                 sum += (int16_t)(sec_taps[k] * constrain(s3 - x, sec_strength, sec_damping));
     237             :             }
     238           0 :             y = (int16_t)clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), min, max);
     239           0 :             if (dst8)
     240           0 :                 dst8[i * dstride + j] = (uint8_t)y;
     241             :             else
     242           0 :                 dst16[i * dstride + j] = (uint16_t)y;
     243             :         }
     244             :     }
     245           0 : }
     246        4415 : int32_t get_cdef_gi_step(
     247             :     int8_t   cdef_filter_mode) {
     248        4415 :         int32_t gi_step = cdef_filter_mode == 1 ? 1 : cdef_filter_mode == 2 ? 4 : cdef_filter_mode == 3 ? 8 : cdef_filter_mode == 4 ? 16 : 64;
     249        4415 :     return gi_step;
     250             : }
     251             : /* Compute the primary filter strength for an 8x8 block based on the
     252             : directional variance difference. A high variance difference means
     253             : that we have a highly directional pattern (e.g. a high contrast
     254             : edge), so we can apply more deringing. A low variance means that we
     255             : either have a low contrast edge, or a non-directional texture, so
     256             : we want to be careful not to blur. */
     257     1172370 : static INLINE int32_t adjust_strength(int32_t strength, int32_t var) {
     258     1172370 :     const int32_t i = var >> 6 ? AOMMIN(get_msb(var >> 6), 12) : 0;
     259             :     /* We use the variance of 8x8 blocks to adjust the strength. */
     260     1169690 :     return var ? (strength * (4 + i) + 8) >> 4 : 0;
     261             : }
     262             : 
     263      106007 : void eb_cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int32_t dstride, uint16_t *in,
     264             :     int32_t xdec, int32_t ydec, int32_t dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
     265             :     int32_t *dirinit, int32_t var[CDEF_NBLOCKS][CDEF_NBLOCKS], int32_t pli,
     266             :     cdef_list *dlist, int32_t cdef_count, int32_t level,
     267             :     int32_t sec_strength, int32_t pri_damping, int32_t sec_damping,
     268             :     int32_t coeff_shift) {
     269             :     int32_t bi;
     270             :     int32_t bx;
     271             :     int32_t by;
     272             :     int32_t bsize, bsizex, bsizey;
     273             : 
     274      106007 :     int32_t pri_strength = level << coeff_shift;
     275      106007 :     sec_strength <<= coeff_shift;
     276      106007 :     sec_damping += coeff_shift - (pli != AOM_PLANE_Y);
     277      106007 :     pri_damping += coeff_shift - (pli != AOM_PLANE_Y);
     278      106007 :     bsize =
     279      106007 :         ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8);
     280      106007 :     bsizex = 3 - xdec;
     281      106007 :     bsizey = 3 - ydec;
     282      106007 :     if (dirinit && pri_strength == 0 && sec_strength == 0) {
     283             :         // If we're here, both primary and secondary strengths are 0, and
     284             :         // we still haven't written anything to y[] yet, so we just copy
     285             :         // the input to y[]. This is necessary only for eb_av1_cdef_search()
     286             :         // and only eb_av1_cdef_search() sets dirinit.
     287      119198 :         for (bi = 0; bi < cdef_count; bi++) {
     288      114898 :             by = dlist[bi].by << bsizey;
     289      114898 :             bx = dlist[bi].bx << bsizex;
     290             :             int32_t iy, ix;
     291             :             // TODO(stemidts/jmvalin): SIMD optimisations
     292      114898 :             if (dst8) {
     293      718659 :                 for (iy = 0; iy < 1 << bsizey; iy++)
     294     4177440 :                     for (ix = 0; ix < 1 << bsizex; ix++)
     295     3573680 :                         dst8[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
     296     3573680 :                         (uint8_t)in[(by + iy) * CDEF_BSTRIDE + bx + ix];
     297             :             }
     298             :             else {
     299           0 :                 for (iy = 0; iy < 1 << bsizey; iy++)
     300           0 :                     for (ix = 0; ix < 1 << bsizex; ix++)
     301           0 :                         dst16[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
     302           0 :                         in[(by + iy) * CDEF_BSTRIDE + bx + ix];
     303             :             }
     304             :         }
     305        4300 :         return;
     306             :     }
     307             : 
     308      101707 :     if (pli == 0) {
     309       33927 :         if (!dirinit || !*dirinit) {
     310       88955 :             for (bi = 0; bi < cdef_count; bi++) {
     311       85796 :                 by = dlist[bi].by;
     312       85796 :                 bx = dlist[bi].bx;
     313             : 
     314       85800 :                 dir[by][bx] = eb_cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx],
     315       85796 :                     CDEF_BSTRIDE, &var[by][bx], coeff_shift);
     316             :             }
     317        3159 :             if (dirinit) *dirinit = 1;
     318             :         }
     319             :     }
     320      101711 :     if (pli == 1 && xdec != ydec) {
     321           0 :         for (bi = 0; bi < cdef_count; bi++) {
     322           0 :             /*static*/ const int32_t conv422[8] = { 7, 0, 2, 4, 5, 6, 6, 6 };
     323           0 :             /*static*/ const int32_t conv440[8] = { 1, 2, 2, 2, 3, 4, 6, 0 };
     324           0 :             by = dlist[bi].by;
     325           0 :             bx = dlist[bi].bx;
     326           0 :             dir[by][bx] = (xdec ? conv422 : conv440)[dir[by][bx]];
     327             :         }
     328             :     }
     329             : 
     330     3561860 :     for (bi = 0; bi < cdef_count; bi++) {
     331     3413200 :         int32_t t = dlist[bi].skip ? 0 : pri_strength;
     332     3413200 :         int32_t s = dlist[bi].skip ? 0 : sec_strength;
     333     3413200 :         by = dlist[bi].by;
     334     3413200 :         bx = dlist[bi].bx;
     335     3413200 :         if (dst8)
     336    13462200 :             eb_cdef_filter_block(
     337     3449740 :                 &dst8[dirinit ? bi << (bsizex + bsizey) : (by << bsizey) * dstride + (bx << bsizex)],
     338             :                 NULL,
     339             :                 dirinit ? 1 << bsizex : dstride,
     340     3449740 :                 &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
     341     1172840 :                 (pli ? t : adjust_strength(t, var[by][bx])), s,
     342     3098780 :                 t ? dir[by][bx] : 0, pri_damping, sec_damping, bsize,
     343             :                 coeff_shift);
     344             :         else
     345           0 :             eb_cdef_filter_block(
     346             :                 NULL,
     347           0 :                 &dst16[dirinit ? bi << (bsizex + bsizey)
     348           0 :                 : (by << bsizey) * dstride + (bx << bsizex)],
     349             :                 dirinit ? 1 << bsizex : dstride,
     350           0 :                 &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
     351           0 :                 (pli ? t : adjust_strength(t, var[by][bx])), s, t ? dir[by][bx] : 0,
     352             :                 pri_damping, sec_damping, bsize, coeff_shift);
     353             :     }
     354             : }
     355             : 
     356       14333 : int32_t eb_sb_all_skip(PictureControlSet   *picture_control_set_ptr, const Av1Common *const cm, int32_t mi_row, int32_t mi_col) {
     357             :     int32_t maxc, maxr;
     358       14333 :     int32_t skip = 1;
     359       14333 :     maxc = cm->mi_cols - mi_col;
     360       14333 :     maxr = cm->mi_rows - mi_row;
     361             : 
     362       14333 :     maxr = AOMMIN(maxr, MI_SIZE_64X64);
     363       14333 :     maxc = AOMMIN(maxc, MI_SIZE_64X64);
     364             : 
     365      221944 :     for (int32_t r = 0; r < maxr; r++) {
     366     3320370 :         for (int32_t c = 0; c < maxc; c++) {
     367     3112760 :             skip =
     368     5664950 :                 skip &&
     369     2552190 :                 picture_control_set_ptr->mi_grid_base[(mi_row + r) * picture_control_set_ptr->mi_stride + mi_col + c]->mbmi.block_mi.skip;
     370             :             /// cm->mi_grid_visible[(mi_row + r) * cm->mi_stride + mi_col + c]->skip;
     371             :         }
     372             :     }
     373       14333 :     return skip;
     374             : }
     375             : 
     376      267206 : static int32_t is_8x8_block_skip(ModeInfo **grid, int32_t mi_row, int32_t mi_col,
     377             :     int32_t mi_stride) {
     378      267206 :     int32_t is_skip = 1;
     379      800405 :     for (int32_t r = 0; r < mi_size_high[BLOCK_8X8]; ++r)
     380     1598740 :         for (int32_t c = 0; c < mi_size_wide[BLOCK_8X8]; ++c)
     381     1065540 :             is_skip &= (int32_t)(grid[(mi_row + r) * mi_stride + (mi_col + c)]->mbmi.block_mi.skip);
     382             : 
     383      267206 :     return is_skip;
     384             : }
     385             : 
     386        4557 : int32_t eb_sb_compute_cdef_list(PictureControlSet            *picture_control_set_ptr, const Av1Common *const cm, int32_t mi_row, int32_t mi_col,
     387             :     cdef_list *dlist, BlockSize bs)
     388             : {
     389             :     //MbModeInfo **grid = cm->mi_grid_visible;
     390        4557 :     ModeInfo **grid = picture_control_set_ptr->mi_grid_base;
     391             : 
     392        4557 :     int32_t maxc = cm->mi_cols - mi_col;
     393        4557 :     int32_t maxr = cm->mi_rows - mi_row;
     394             : 
     395        4557 :     if (bs == BLOCK_128X128 || bs == BLOCK_128X64)
     396           0 :         maxc = AOMMIN(maxc, MI_SIZE_128X128);
     397             :     else
     398        4559 :         maxc = AOMMIN(maxc, MI_SIZE_64X64);
     399        4557 :     if (bs == BLOCK_128X128 || bs == BLOCK_64X128)
     400           0 :         maxr = AOMMIN(maxr, MI_SIZE_128X128);
     401             :     else
     402        4557 :         maxr = AOMMIN(maxr, MI_SIZE_64X64);
     403             : 
     404        4557 :     const int32_t r_step = mi_size_high[BLOCK_8X8];
     405        4557 :     const int32_t c_step = mi_size_wide[BLOCK_8X8];
     406        4557 :     const int32_t r_shift = (r_step == 2);
     407        4557 :     const int32_t c_shift = (c_step == 2);
     408             : 
     409        4557 :     assert(r_step == 1 || r_step == 2);
     410        4557 :     assert(c_step == 1 || c_step == 2);
     411             : 
     412        4557 :     int32_t count = 0;
     413             : 
     414       39748 :     for (int32_t r = 0; r < maxr; r += r_step) {
     415      301174 :         for (int32_t c = 0; c < maxc; c += c_step) {
     416      265983 :             if (!is_8x8_block_skip(grid, mi_row + r, mi_col + c, picture_control_set_ptr->mi_stride)) {
     417       70986 :                 dlist[count].by = (uint8_t)(r >> r_shift);
     418       70986 :                 dlist[count].bx = (uint8_t)(c >> c_shift);
     419       70986 :                 dlist[count].skip = 0;
     420       70986 :                 count++;
     421             :             }
     422             :         }
     423             :     }
     424        5945 :     return count;
     425             : }
     426           0 : void eb_copy_rect8_8bit_to_16bit_c(uint16_t *dst, int32_t dstride, const uint8_t *src,
     427             :     int32_t sstride, int32_t v, int32_t h) {
     428           0 :     for (int32_t i = 0; i < v; i++) {
     429           0 :         for (int32_t j = 0; j < h; j++)
     430           0 :             dst[i * dstride + j] = src[i * sstride + j];
     431             :     }
     432           0 : }
     433             : 
     434       17690 : void copy_sb8_16(uint16_t *dst, int32_t dstride,
     435             :     const uint8_t *src, int32_t src_voffset, int32_t src_hoffset,
     436             :     int32_t sstride, int32_t vsize, int32_t hsize) {
     437             :         {
     438       17690 :             const uint8_t *base = &src[src_voffset * sstride + src_hoffset];
     439             : 
     440       17690 :             eb_copy_rect8_8bit_to_16bit(dst, dstride, base, sstride, vsize, hsize);
     441             :         }
     442       17686 : }
     443             : 
     444       13809 : void fill_rect(uint16_t *dst, int32_t dstride, int32_t v, int32_t h,
     445             :     uint16_t x) {
     446      319832 :     for (int32_t i = 0; i < v; i++) {
     447     3295350 :         for (int32_t j = 0; j < h; j++)
     448     2989320 :             dst[i * dstride + j] = x;
     449             :     }
     450       13809 : }
     451             : 
     452       14748 : void copy_rect(uint16_t *dst, int32_t dstride, const uint16_t *src,
     453             :     int32_t sstride, int32_t v, int32_t h) {
     454      405347 :     for (int32_t i = 0; i < v; i++) {
     455     3818650 :         for (int32_t j = 0; j < h; j++)
     456     3428050 :             dst[i * dstride + j] = src[i * sstride + j];
     457             :     }
     458       14748 : }
     459             : 
     460          90 : void eb_av1_cdef_frame(
     461             :     EncDecContext                *context_ptr,
     462             :     SequenceControlSet           *sequence_control_set_ptr,
     463             :     PictureControlSet            *pCs){
     464             :     (void)context_ptr;
     465             : 
     466          90 :     struct PictureParentControlSet     *pPcs = pCs->parent_pcs_ptr;
     467          90 :     Av1Common*   cm = pPcs->av1_cm;
     468          90 :     FrameHeader *frm_hdr = &pPcs->frm_hdr;
     469             : 
     470             :     EbPictureBufferDesc  * recon_picture_ptr;
     471             : 
     472          90 :     if (pPcs->is_used_as_reference_flag == EB_TRUE)
     473          68 :         recon_picture_ptr = ((EbReferenceObject*)pCs->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture;
     474             :     else
     475          22 :         recon_picture_ptr = pCs->recon_picture_ptr;
     476             : 
     477          90 :     EbByte  reconBufferY = &((recon_picture_ptr->buffer_y)[recon_picture_ptr->origin_x + recon_picture_ptr->origin_y * recon_picture_ptr->stride_y]);
     478          90 :     EbByte  reconBufferCb = &((recon_picture_ptr->buffer_cb)[recon_picture_ptr->origin_x / 2 + recon_picture_ptr->origin_y / 2 * recon_picture_ptr->stride_cb]);
     479          90 :     EbByte  reconBufferCr = &((recon_picture_ptr->buffer_cr)[recon_picture_ptr->origin_x / 2 + recon_picture_ptr->origin_y / 2 * recon_picture_ptr->stride_cr]);
     480             : 
     481          90 :     const int32_t num_planes = 3;// av1_num_planes(cm);
     482             :     DECLARE_ALIGNED(16, uint16_t, src[CDEF_INBUF_SIZE]);
     483             :     uint16_t *linebuf[3];
     484             :     uint16_t *colbuf[3];
     485             :     cdef_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
     486             :     uint8_t *row_cdef, *prev_row_cdef, *curr_row_cdef;
     487             :     int32_t cdef_count;
     488          90 :     int32_t dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
     489          90 :     int32_t var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
     490             :     int32_t mi_wide_l2[3];
     491             :     int32_t mi_high_l2[3];
     492             :     int32_t xdec[3];
     493             :     int32_t ydec[3];
     494          90 :     int32_t coeff_shift = AOMMAX(sequence_control_set_ptr->static_config.encoder_bit_depth/*cm->bit_depth*/ - 8, 0);
     495          90 :     const int32_t nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
     496          90 :     const int32_t nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
     497             :     //eb_av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0, num_planes);
     498          90 :     row_cdef = (uint8_t *)eb_aom_malloc(sizeof(*row_cdef) * (nhfb + 2) * 2);
     499          90 :     assert(row_cdef != NULL);
     500          90 :     memset(row_cdef, 1, sizeof(*row_cdef) * (nhfb + 2) * 2);
     501          90 :     prev_row_cdef = row_cdef + 1;
     502          90 :     curr_row_cdef = prev_row_cdef + nhfb + 2;
     503         360 :     for (int32_t pli = 0; pli < num_planes; pli++) {
     504         270 :         int32_t subsampling_x = (pli == 0) ? 0 : 1;
     505         270 :         int32_t subsampling_y = (pli == 0) ? 0 : 1;
     506             : 
     507         270 :         xdec[pli] = subsampling_x; //CHKN xd->plane[pli].subsampling_x;
     508         270 :         ydec[pli] = subsampling_y; //CHKN  xd->plane[pli].subsampling_y;
     509         270 :         mi_wide_l2[pli] = MI_SIZE_LOG2 - subsampling_x; //CHKN xd->plane[pli].subsampling_x;
     510         270 :         mi_high_l2[pli] = MI_SIZE_LOG2 - subsampling_y; //CHKN xd->plane[pli].subsampling_y;
     511             :     }
     512             : 
     513          90 :     const int32_t stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER;
     514         360 :     for (int32_t pli = 0; pli < num_planes; pli++) {
     515         270 :         linebuf[pli] = (uint16_t *)eb_aom_malloc(sizeof(*linebuf) * CDEF_VBORDER * stride);
     516         270 :         colbuf[pli] = (uint16_t *)eb_aom_malloc(sizeof(*colbuf)  * ((CDEF_BLOCKSIZE << mi_high_l2[pli]) + 2 * CDEF_VBORDER) * CDEF_HBORDER);
     517             :     }
     518             : 
     519         630 :     for (int32_t fbr = 0; fbr < nvfb; fbr++) {
     520        2160 :         for (int32_t pli = 0; pli < num_planes; pli++) {
     521        1620 :             const int32_t block_height =
     522        1620 :                 (MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * CDEF_VBORDER;
     523        1620 :             fill_rect(colbuf[pli], CDEF_HBORDER, block_height, CDEF_HBORDER,
     524             :                 CDEF_VERY_LARGE);
     525             :         }
     526             : 
     527         540 :         int32_t cdef_left = 1;
     528        5940 :         for (int32_t fbc = 0; fbc < nhfb; fbc++) {
     529             :             int32_t level, sec_strength;
     530             :             int32_t uv_level, uv_sec_strength;
     531             :             int32_t nhb, nvb;
     532        5400 :             int32_t cstart = 0;
     533        5400 :             curr_row_cdef[fbc] = 0;
     534             : 
     535             :             //WAHT IS THIS  ?? CHKN -->for
     536        5400 :             if (pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc] == NULL ||
     537        5400 :                 pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]->mbmi.cdef_strength == -1) {
     538           0 :                 cdef_left = 0;
     539           0 :                 printf("\n\n\nCDEF ERROR: Skipping Current FB\n\n\n");
     540           0 :                 continue;
     541             :             }
     542             : 
     543        5400 :             if (!cdef_left) cstart = -CDEF_HBORDER;  //CHKN if the left block has not been filtered, then we can use samples on the left as input.
     544             : 
     545        5400 :             nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
     546        5400 :             nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
     547             :             int32_t frame_top, frame_left, frame_bottom, frame_right;
     548             : 
     549        5400 :             int32_t mi_row = MI_SIZE_64X64 * fbr;
     550        5400 :             int32_t mi_col = MI_SIZE_64X64 * fbc;
     551             :             // for the current filter block, it's top left corner mi structure (mi_tl)
     552             :             // is first accessed to check whether the top and left boundaries are
     553             :             // frame boundaries. Then bottom-left and top-right mi structures are
     554             :             // accessed to check whether the bottom and right boundaries
     555             :             // (respectively) are frame boundaries.
     556             :             //
     557             :             // Note that we can't just check the bottom-right mi structure - eg. if
     558             :             // we're at the right-hand edge of the frame but not the bottom, then
     559             :             // the bottom-right mi is NULL but the bottom-left is not.
     560        5400 :             frame_top = (mi_row == 0) ? 1 : 0;
     561        5400 :             frame_left = (mi_col == 0) ? 1 : 0;
     562             : 
     563        5400 :             if (fbr != nvfb - 1)
     564        4500 :                 frame_bottom = (mi_row + MI_SIZE_64X64 == cm->mi_rows) ? 1 : 0;
     565             :             else
     566         900 :                 frame_bottom = 1;
     567             : 
     568        5400 :             if (fbc != nhfb - 1)
     569        4860 :                 frame_right = (mi_col + MI_SIZE_64X64 == cm->mi_cols) ? 1 : 0;
     570             :             else
     571         540 :                 frame_right = 1;
     572             : 
     573        5400 :             const int32_t mbmi_cdef_strength = pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]->mbmi.cdef_strength;
     574        5400 :             level = frm_hdr->CDEF_params.cdef_y_strength[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
     575        5400 :             sec_strength = frm_hdr->CDEF_params.cdef_y_strength[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS;
     576        5400 :             sec_strength += sec_strength == 3;
     577        5400 :             uv_level = frm_hdr->CDEF_params.cdef_uv_strength[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
     578        5400 :             uv_sec_strength = frm_hdr->CDEF_params.cdef_uv_strength[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS;
     579        5400 :             uv_sec_strength += uv_sec_strength == 3;
     580        8527 :             if ((level == 0 && sec_strength == 0 && uv_level == 0 && uv_sec_strength == 0) ||
     581        3127 :                 (cdef_count = eb_sb_compute_cdef_list(pCs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, BLOCK_64X64)) == 0) {
     582        4255 :                 cdef_left = 0;
     583        4255 :                 continue;
     584             :             }
     585             : 
     586        1145 :             curr_row_cdef[fbc] = 1;
     587        4580 :             for (int32_t pli = 0; pli < num_planes; pli++) {
     588             :                 int32_t coffset;
     589             :                 int32_t rend, cend;
     590        3435 :                 int32_t pri_damping = frm_hdr->CDEF_params.cdef_damping;
     591        3435 :                 int32_t sec_damping = frm_hdr->CDEF_params.cdef_damping;
     592        3435 :                 int32_t hsize = nhb << mi_wide_l2[pli];
     593        3435 :                 int32_t vsize = nvb << mi_high_l2[pli];
     594             : 
     595        3435 :                 if (pli) {
     596        2290 :                     level = uv_level;
     597        2290 :                     sec_strength = uv_sec_strength;
     598             :                 }
     599             : 
     600        3435 :                 if (fbc == nhfb - 1)
     601         528 :                     cend = hsize;
     602             :                 else
     603        2907 :                     cend = hsize + CDEF_HBORDER;
     604             : 
     605        3435 :                 if (fbr == nvfb - 1)
     606         678 :                     rend = vsize;
     607             :                 else
     608        2757 :                     rend = vsize + CDEF_VBORDER;
     609             : 
     610        3435 :                 coffset = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
     611        3435 :                 if (fbc == nhfb - 1) {
     612             :                     /* On the last superblock column, fill in the right border with
     613             :                        CDEF_VERY_LARGE to avoid filtering with the outside. */
     614         528 :                     fill_rect(&src[cend + CDEF_HBORDER], CDEF_BSTRIDE,
     615         528 :                         rend + CDEF_VBORDER, hsize + CDEF_HBORDER - cend,
     616             :                         CDEF_VERY_LARGE);
     617             :                 }
     618        3435 :                 if (fbr == nvfb - 1) {
     619             :                     /* On the last superblock row, fill in the bottom border with
     620             :                        CDEF_VERY_LARGE to avoid filtering with the outside. */
     621         678 :                     fill_rect(&src[(rend + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE,
     622             :                         CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
     623             :                 }
     624             : 
     625        3435 :                 uint8_t* recBuff = 0;
     626        3435 :                 uint32_t recStride = 0;
     627             : 
     628        3435 :                 switch (pli) {
     629        1145 :                 case 0:
     630        1145 :                     recBuff = reconBufferY;
     631        1145 :                     recStride = recon_picture_ptr->stride_y;
     632        1145 :                     break;
     633        1145 :                 case 1:
     634        1145 :                     recBuff = reconBufferCb;
     635        1145 :                     recStride = recon_picture_ptr->stride_cb;
     636             : 
     637        1145 :                     break;
     638        1145 :                 case 2:
     639        1145 :                     recBuff = reconBufferCr;
     640        1145 :                     recStride = recon_picture_ptr->stride_cr;
     641        1145 :                     break;
     642             :                 }
     643             : 
     644             :                 /* Copy in the pixels we need from the current superblock for
     645             :                    deringing.*/
     646        3435 :                 copy_sb8_16(//cm,
     647        3435 :                     &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart],
     648             :                     CDEF_BSTRIDE, recBuff/*xd->plane[pli].dst.buf*/,
     649        3435 :                     (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr, coffset + cstart,
     650             :                     recStride/*xd->plane[pli].dst.stride*/, rend, cend - cstart);
     651        3435 :                 if (!prev_row_cdef[fbc]) {
     652         807 :                     copy_sb8_16(//cm,
     653             :                         &src[CDEF_HBORDER], CDEF_BSTRIDE,
     654             :                         recBuff/*xd->plane[pli].dst.buf*/,
     655         807 :                         (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
     656             :                         coffset, recStride/*xd->plane[pli].dst.stride*/, CDEF_VBORDER, hsize);
     657             :                 }
     658        2628 :                 else if (fbr > 0) {
     659        1968 :                     copy_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, &linebuf[pli][coffset],
     660             :                         stride, CDEF_VBORDER, hsize);
     661             :                 }
     662             :                 else {
     663         660 :                     fill_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hsize,
     664             :                         CDEF_VERY_LARGE);
     665             :                 }
     666             : 
     667        3435 :                 if (!prev_row_cdef[fbc - 1]) {
     668         627 :                     copy_sb8_16(//cm,
     669             :                         src, CDEF_BSTRIDE, recBuff/*xd->plane[pli].dst.buf*/,
     670         627 :                         (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
     671             :                         coffset - CDEF_HBORDER, recStride/*xd->plane[pli].dst.stride*/,
     672             :                         CDEF_VBORDER, CDEF_HBORDER);
     673             :                 }
     674        2808 :                 else if (fbr > 0 && fbc > 0) {
     675        1356 :                     copy_rect(src, CDEF_BSTRIDE, &linebuf[pli][coffset - CDEF_HBORDER],
     676             :                         stride, CDEF_VBORDER, CDEF_HBORDER);
     677             :                 }
     678             :                 else {
     679        1452 :                     fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
     680             :                         CDEF_VERY_LARGE);
     681             :                 }
     682             : 
     683        3435 :                 if (!prev_row_cdef[fbc + 1]) {
     684        1131 :                     copy_sb8_16(//cm,
     685        1131 :                         &src[CDEF_HBORDER + (nhb << mi_wide_l2[pli])],
     686             :                         CDEF_BSTRIDE, recBuff/*xd->plane[pli].dst.buf*/,
     687        1131 :                         (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
     688             :                         coffset + hsize, recStride/*xd->plane[pli].dst.stride*/, CDEF_VBORDER,
     689             :                         CDEF_HBORDER);
     690             :                 }
     691        2304 :                 else if (fbr > 0 && fbc < nhfb - 1) {
     692        1242 :                     copy_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE,
     693        1242 :                         &linebuf[pli][coffset + hsize], stride, CDEF_VBORDER,
     694             :                         CDEF_HBORDER);
     695             :                 }
     696             :                 else {
     697        1062 :                     fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER,
     698             :                         CDEF_HBORDER, CDEF_VERY_LARGE);
     699             :                 }
     700             : 
     701        3435 :                 if (cdef_left) {
     702             :                     /* If we deringed the superblock on the left then we need to copy in
     703             :                        saved pixels. */
     704        2661 :                     copy_rect(src, CDEF_BSTRIDE, colbuf[pli], CDEF_HBORDER,
     705             :                         rend + CDEF_VBORDER, CDEF_HBORDER);
     706             :                 }
     707             : 
     708             :                 /* Saving pixels in case we need to dering the superblock on the
     709             :                     right. */
     710        3435 :                 if (fbc < nhfb - 1)
     711        2907 :                     copy_rect(colbuf[pli], CDEF_HBORDER, src + hsize, CDEF_BSTRIDE,
     712             :                         rend + CDEF_VBORDER, CDEF_HBORDER);
     713             : 
     714        3435 :                 if (fbr < nvfb - 1)
     715        2757 :                     copy_sb8_16(
     716             :                         //cm,
     717        2757 :                         &linebuf[pli][coffset], stride, recBuff/*xd->plane[pli].dst.buf*/,
     718        2757 :                         (MI_SIZE_64X64 << mi_high_l2[pli]) * (fbr + 1) - CDEF_VBORDER,
     719             :                         coffset, recStride/*xd->plane[pli].dst.stride*/, CDEF_VBORDER, hsize);
     720             : 
     721        3435 :                 if (frame_top) {
     722         660 :                     fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, hsize + 2 * CDEF_HBORDER,
     723             :                         CDEF_VERY_LARGE);
     724             :                 }
     725        3435 :                 if (frame_left) {
     726         972 :                     fill_rect(src, CDEF_BSTRIDE, vsize + 2 * CDEF_VBORDER, CDEF_HBORDER,
     727             :                         CDEF_VERY_LARGE);
     728             :                 }
     729        3435 :                 if (frame_bottom) {
     730         678 :                     fill_rect(&src[(vsize + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE,
     731             :                         CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
     732             :                 }
     733        3435 :                 if (frame_right) {
     734         528 :                     fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE,
     735             :                         vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
     736             :                 }
     737             : 
     738             :                 //if (cm->use_highbitdepth) {
     739             :                 //  eb_cdef_filter_fb(
     740             :                 //      NULL,
     741             :                 //      &CONVERT_TO_SHORTPTR(
     742             :                 //          xd->plane[pli]
     743             :                 //              .dst.buf)[xd->plane[pli].dst.stride *
     744             :                 //                            (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
     745             :                 //                        (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
     746             :                 //      xd->plane[pli].dst.stride,
     747             :                 //      &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
     748             :                 //      ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
     749             :                 //      sec_strength, pri_damping, sec_damping, coeff_shift);
     750             :                 //} else
     751             :                 {
     752        3435 :                     eb_cdef_filter_fb(
     753        3435 :                         &recBuff[recStride *(MI_SIZE_64X64 * fbr << mi_high_l2[pli]) + (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
     754             :                         //&xd->plane[pli].dst.buf[xd->plane[pli].dst.stride *(MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +(fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
     755             :                         NULL, recStride/*xd->plane[pli].dst.stride*/,
     756             :                         &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
     757             :                         ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
     758             :                         sec_strength, pri_damping, sec_damping, coeff_shift);
     759             :                 }
     760             :             }
     761        1145 :             cdef_left = 1;  //CHKN filtered data is written back directy to recFrame.
     762             :         }
     763             :         {
     764         540 :             uint8_t *tmp = prev_row_cdef;
     765         540 :             prev_row_cdef = curr_row_cdef;
     766         540 :             curr_row_cdef = tmp;
     767             :         }
     768             :     }
     769          90 :     eb_aom_free(row_cdef);
     770         360 :     for (int32_t pli = 0; pli < num_planes; pli++) {
     771         270 :         eb_aom_free(linebuf[pli]);
     772         270 :         eb_aom_free(colbuf[pli]);
     773             :     }
     774          90 : }
     775             : 
     776           0 : void av1_cdef_frame16bit(
     777             :     EncDecContext                *context_ptr,
     778             :     SequenceControlSet           *sequence_control_set_ptr,
     779             :     PictureControlSet            *pCs){
     780             :     (void)context_ptr;
     781           0 :     struct PictureParentControlSet     *pPcs = pCs->parent_pcs_ptr;
     782           0 :     Av1Common*   cm = pPcs->av1_cm;
     783           0 :     FrameHeader *frm_hdr = &pPcs->frm_hdr;
     784             : 
     785             :     EbPictureBufferDesc  * recon_picture_ptr;
     786             : 
     787           0 :     if (pPcs->is_used_as_reference_flag == EB_TRUE)
     788           0 :         recon_picture_ptr = ((EbReferenceObject*)pCs->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture16bit;
     789             : 
     790             :     else
     791           0 :         recon_picture_ptr = pCs->recon_picture16bit_ptr;
     792             : 
     793           0 :     uint16_t*  reconBufferY = (uint16_t*)recon_picture_ptr->buffer_y + (recon_picture_ptr->origin_x + recon_picture_ptr->origin_y     * recon_picture_ptr->stride_y);
     794           0 :     uint16_t*  reconBufferCb = (uint16_t*)recon_picture_ptr->buffer_cb + (recon_picture_ptr->origin_x / 2 + recon_picture_ptr->origin_y / 2 * recon_picture_ptr->stride_cb);
     795           0 :     uint16_t*  reconBufferCr = (uint16_t*)recon_picture_ptr->buffer_cr + (recon_picture_ptr->origin_x / 2 + recon_picture_ptr->origin_y / 2 * recon_picture_ptr->stride_cr);
     796             : 
     797           0 :     const int32_t num_planes = 3;// av1_num_planes(cm);
     798             :     DECLARE_ALIGNED(16, uint16_t, src[CDEF_INBUF_SIZE]);
     799             :     uint16_t *linebuf[3];
     800             :     uint16_t *colbuf[3];
     801             :     cdef_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
     802             :     uint8_t *row_cdef, *prev_row_cdef, *curr_row_cdef;
     803             :     int32_t cdef_count;
     804           0 :     int32_t dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
     805           0 :     int32_t var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
     806             :     int32_t mi_wide_l2[3];
     807             :     int32_t mi_high_l2[3];
     808             :     int32_t xdec[3];
     809             :     int32_t ydec[3];
     810           0 :     int32_t coeff_shift = AOMMAX(sequence_control_set_ptr->static_config.encoder_bit_depth/*cm->bit_depth*/ - 8, 0);
     811           0 :     const int32_t nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
     812           0 :     const int32_t nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
     813             :     //eb_av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0, num_planes);
     814           0 :     row_cdef = (uint8_t *)eb_aom_malloc(sizeof(*row_cdef) * (nhfb + 2) * 2);
     815           0 :     assert(row_cdef);
     816           0 :     memset(row_cdef, 1, sizeof(*row_cdef) * (nhfb + 2) * 2);
     817           0 :     prev_row_cdef = row_cdef + 1;
     818           0 :     curr_row_cdef = prev_row_cdef + nhfb + 2;
     819           0 :     for (int32_t pli = 0; pli < num_planes; pli++) {
     820           0 :         int32_t subsampling_x = (pli == 0) ? 0 : 1;
     821           0 :         int32_t subsampling_y = (pli == 0) ? 0 : 1;
     822             : 
     823           0 :         xdec[pli] = subsampling_x; //CHKN xd->plane[pli].subsampling_x;
     824           0 :         ydec[pli] = subsampling_y; //CHKN  xd->plane[pli].subsampling_y;
     825           0 :         mi_wide_l2[pli] = MI_SIZE_LOG2 - subsampling_x; //CHKN xd->plane[pli].subsampling_x;
     826           0 :         mi_high_l2[pli] = MI_SIZE_LOG2 - subsampling_y; //CHKN xd->plane[pli].subsampling_y;
     827             :     }
     828             : 
     829           0 :     const int32_t stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER;
     830           0 :     for (int32_t pli = 0; pli < num_planes; pli++) {
     831           0 :         linebuf[pli] = (uint16_t *)eb_aom_malloc(sizeof(*linebuf) * CDEF_VBORDER * stride);
     832           0 :         colbuf[pli] = (uint16_t *)eb_aom_malloc(sizeof(*colbuf)  * ((CDEF_BLOCKSIZE << mi_high_l2[pli]) + 2 * CDEF_VBORDER) * CDEF_HBORDER);
     833             :     }
     834             : 
     835           0 :     for (int32_t fbr = 0; fbr < nvfb; fbr++) {
     836           0 :         for (int32_t pli = 0; pli < num_planes; pli++) {
     837           0 :             const int32_t block_height =
     838           0 :                 (MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * CDEF_VBORDER;
     839           0 :             fill_rect(colbuf[pli], CDEF_HBORDER, block_height, CDEF_HBORDER,
     840             :                 CDEF_VERY_LARGE);
     841             :         }
     842             : 
     843           0 :         int32_t cdef_left = 1;
     844           0 :         for (int32_t fbc = 0; fbc < nhfb; fbc++) {
     845             :             int32_t level, sec_strength;
     846             :             int32_t uv_level, uv_sec_strength;
     847             :             int32_t nhb, nvb;
     848           0 :             int32_t cstart = 0;
     849           0 :             curr_row_cdef[fbc] = 0;
     850             : 
     851             :             //WAHT IS THIS  ?? CHKN -->for
     852           0 :             if (pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc] == NULL ||
     853           0 :                 pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]->mbmi.cdef_strength == -1) {
     854           0 :                 cdef_left = 0;
     855           0 :                 printf("\n\n\nCDEF ERROR: Skipping Current FB\n\n\n");
     856           0 :                 continue;
     857             :             }
     858             : 
     859           0 :             if (!cdef_left) cstart = -CDEF_HBORDER;  //CHKN if the left block has not been filtered, then we can use samples on the left as input.
     860             : 
     861           0 :             nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
     862           0 :             nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
     863             :             int32_t frame_top, frame_left, frame_bottom, frame_right;
     864             : 
     865           0 :             int32_t mi_row = MI_SIZE_64X64 * fbr;
     866           0 :             int32_t mi_col = MI_SIZE_64X64 * fbc;
     867             :             // for the current filter block, it's top left corner mi structure (mi_tl)
     868             :             // is first accessed to check whether the top and left boundaries are
     869             :             // frame boundaries. Then bottom-left and top-right mi structures are
     870             :             // accessed to check whether the bottom and right boundaries
     871             :             // (respectively) are frame boundaries.
     872             :             //
     873             :             // Note that we can't just check the bottom-right mi structure - eg. if
     874             :             // we're at the right-hand edge of the frame but not the bottom, then
     875             :             // the bottom-right mi is NULL but the bottom-left is not.
     876           0 :             frame_top = (mi_row == 0) ? 1 : 0;
     877           0 :             frame_left = (mi_col == 0) ? 1 : 0;
     878             : 
     879           0 :             if (fbr != nvfb - 1)
     880           0 :                 frame_bottom = (mi_row + MI_SIZE_64X64 == cm->mi_rows) ? 1 : 0;
     881             :             else
     882           0 :                 frame_bottom = 1;
     883             : 
     884           0 :             if (fbc != nhfb - 1)
     885           0 :                 frame_right = (mi_col + MI_SIZE_64X64 == cm->mi_cols) ? 1 : 0;
     886             :             else
     887           0 :                 frame_right = 1;
     888             : 
     889           0 :             const int32_t mbmi_cdef_strength = pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]->mbmi.cdef_strength;
     890           0 :             level = frm_hdr->CDEF_params.cdef_y_strength[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
     891           0 :             sec_strength = frm_hdr->CDEF_params.cdef_y_strength[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS;
     892           0 :             sec_strength += sec_strength == 3;
     893           0 :             uv_level = frm_hdr->CDEF_params.cdef_uv_strength[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
     894           0 :             uv_sec_strength = frm_hdr->CDEF_params.cdef_uv_strength[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS;
     895           0 :             uv_sec_strength += uv_sec_strength == 3;
     896           0 :             if ((level == 0 && sec_strength == 0 && uv_level == 0 && uv_sec_strength == 0) ||
     897           0 :                 (cdef_count = eb_sb_compute_cdef_list(pCs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, BLOCK_64X64)) == 0) {
     898           0 :                 cdef_left = 0;
     899           0 :                 continue;
     900             :             }
     901             : 
     902           0 :             curr_row_cdef[fbc] = 1;
     903           0 :             for (int32_t pli = 0; pli < num_planes; pli++) {
     904             :                 int32_t coffset;
     905             :                 int32_t rend, cend;
     906           0 :                 int32_t pri_damping = frm_hdr->CDEF_params.cdef_damping;
     907           0 :                 int32_t sec_damping = frm_hdr->CDEF_params.cdef_damping;
     908           0 :                 int32_t hsize = nhb << mi_wide_l2[pli];
     909           0 :                 int32_t vsize = nvb << mi_high_l2[pli];
     910             : 
     911           0 :                 if (pli) {
     912           0 :                     level = uv_level;
     913           0 :                     sec_strength = uv_sec_strength;
     914             :                 }
     915             : 
     916           0 :                 if (fbc == nhfb - 1)
     917           0 :                     cend = hsize;
     918             :                 else
     919           0 :                     cend = hsize + CDEF_HBORDER;
     920             : 
     921           0 :                 if (fbr == nvfb - 1)
     922           0 :                     rend = vsize;
     923             :                 else
     924           0 :                     rend = vsize + CDEF_VBORDER;
     925             : 
     926           0 :                 coffset = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
     927           0 :                 if (fbc == nhfb - 1) {
     928             :                     /* On the last superblock column, fill in the right border with
     929             :                     CDEF_VERY_LARGE to avoid filtering with the outside. */
     930           0 :                     fill_rect(&src[cend + CDEF_HBORDER], CDEF_BSTRIDE,
     931           0 :                         rend + CDEF_VBORDER, hsize + CDEF_HBORDER - cend,
     932             :                         CDEF_VERY_LARGE);
     933             :                 }
     934           0 :                 if (fbr == nvfb - 1) {
     935             :                     /* On the last superblock row, fill in the bottom border with
     936             :                     CDEF_VERY_LARGE to avoid filtering with the outside. */
     937           0 :                     fill_rect(&src[(rend + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE,
     938             :                         CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
     939             :                 }
     940             : 
     941           0 :                 uint16_t* recBuff = 0;
     942           0 :                 uint32_t recStride = 0;
     943             : 
     944           0 :                 switch (pli) {
     945           0 :                 case 0:
     946           0 :                     recBuff = reconBufferY;
     947           0 :                     recStride = recon_picture_ptr->stride_y;
     948           0 :                     break;
     949           0 :                 case 1:
     950           0 :                     recBuff = reconBufferCb;
     951           0 :                     recStride = recon_picture_ptr->stride_cb;
     952             : 
     953           0 :                     break;
     954           0 :                 case 2:
     955           0 :                     recBuff = reconBufferCr;
     956           0 :                     recStride = recon_picture_ptr->stride_cr;
     957           0 :                     break;
     958             :                 }
     959             : 
     960             :                 //--ok
     961             :                                 /* Copy in the pixels we need from the current superblock for
     962             :                                 deringing.*/
     963             : 
     964           0 :                 copy_sb16_16(//cm,
     965           0 :                     &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart],
     966             :                     CDEF_BSTRIDE, recBuff/*xd->plane[pli].dst.buf*/,
     967           0 :                     (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr, coffset + cstart,
     968             :                     recStride/*xd->plane[pli].dst.stride*/, rend, cend - cstart);
     969             : 
     970           0 :                 if (!prev_row_cdef[fbc]) {
     971           0 :                     copy_sb16_16(//cm,
     972             :                         &src[CDEF_HBORDER], CDEF_BSTRIDE,
     973             :                         recBuff/*xd->plane[pli].dst.buf*/,
     974           0 :                         (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
     975             :                         coffset, recStride/*xd->plane[pli].dst.stride*/, CDEF_VBORDER, hsize);
     976             :                 }
     977           0 :                 else if (fbr > 0) {
     978           0 :                     copy_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, &linebuf[pli][coffset],
     979             :                         stride, CDEF_VBORDER, hsize);
     980             :                 }
     981             :                 else {
     982           0 :                     fill_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hsize,
     983             :                         CDEF_VERY_LARGE);
     984             :                 }
     985             : 
     986           0 :                 if (!prev_row_cdef[fbc - 1]) {
     987           0 :                     copy_sb16_16(//cm,
     988             :                         src, CDEF_BSTRIDE, recBuff/*xd->plane[pli].dst.buf*/,
     989           0 :                         (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
     990             :                         coffset - CDEF_HBORDER, recStride/*xd->plane[pli].dst.stride*/,
     991             :                         CDEF_VBORDER, CDEF_HBORDER);
     992             :                 }
     993           0 :                 else if (fbr > 0 && fbc > 0) {
     994           0 :                     copy_rect(src, CDEF_BSTRIDE, &linebuf[pli][coffset - CDEF_HBORDER],
     995             :                         stride, CDEF_VBORDER, CDEF_HBORDER);
     996             :                 }
     997             :                 else {
     998           0 :                     fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
     999             :                         CDEF_VERY_LARGE);
    1000             :                 }
    1001             : 
    1002           0 :                 if (!prev_row_cdef[fbc + 1]) {
    1003           0 :                     copy_sb16_16(//cm,
    1004           0 :                         &src[CDEF_HBORDER + (nhb << mi_wide_l2[pli])],
    1005             :                         CDEF_BSTRIDE, recBuff/*xd->plane[pli].dst.buf*/,
    1006           0 :                         (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
    1007             :                         coffset + hsize, recStride/*xd->plane[pli].dst.stride*/, CDEF_VBORDER,
    1008             :                         CDEF_HBORDER);
    1009             :                 }
    1010           0 :                 else if (fbr > 0 && fbc < nhfb - 1) {
    1011           0 :                     copy_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE,
    1012           0 :                         &linebuf[pli][coffset + hsize], stride, CDEF_VBORDER,
    1013             :                         CDEF_HBORDER);
    1014             :                 }
    1015             :                 else {
    1016           0 :                     fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER,
    1017             :                         CDEF_HBORDER, CDEF_VERY_LARGE);
    1018             :                 }
    1019             : 
    1020           0 :                 if (cdef_left) {
    1021             :                     /* If we deringed the superblock on the left then we need to copy in
    1022             :                     saved pixels. */
    1023           0 :                     copy_rect(src, CDEF_BSTRIDE, colbuf[pli], CDEF_HBORDER,
    1024             :                         rend + CDEF_VBORDER, CDEF_HBORDER);
    1025             :                 }
    1026             : 
    1027             :                 /* Saving pixels in case we need to dering the superblock on the
    1028             :                 right. */
    1029           0 :                 if (fbc < nhfb - 1)
    1030           0 :                     copy_rect(colbuf[pli], CDEF_HBORDER, src + hsize, CDEF_BSTRIDE,
    1031             :                         rend + CDEF_VBORDER, CDEF_HBORDER);
    1032           0 :                 if (fbr < nvfb - 1)
    1033           0 :                     copy_sb16_16(
    1034             :                         //cm,
    1035           0 :                         &linebuf[pli][coffset], stride, recBuff/*xd->plane[pli].dst.buf*/,
    1036           0 :                         (MI_SIZE_64X64 << mi_high_l2[pli]) * (fbr + 1) - CDEF_VBORDER,
    1037             :                         coffset, recStride/*xd->plane[pli].dst.stride*/, CDEF_VBORDER, hsize);
    1038           0 :                 if (frame_top) {
    1039           0 :                     fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, hsize + 2 * CDEF_HBORDER,
    1040             :                         CDEF_VERY_LARGE);
    1041             :                 }
    1042           0 :                 if (frame_left) {
    1043           0 :                     fill_rect(src, CDEF_BSTRIDE, vsize + 2 * CDEF_VBORDER, CDEF_HBORDER,
    1044             :                         CDEF_VERY_LARGE);
    1045             :                 }
    1046           0 :                 if (frame_bottom) {
    1047           0 :                     fill_rect(&src[(vsize + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE,
    1048             :                         CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
    1049             :                 }
    1050           0 :                 if (frame_right) {
    1051           0 :                     fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE,
    1052             :                         vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
    1053             :                 }
    1054             : 
    1055             :                 //if (cm->use_highbitdepth) {
    1056             :                 //  eb_cdef_filter_fb(
    1057             :                 //      NULL,
    1058             :                 //      &CONVERT_TO_SHORTPTR(
    1059             :                 //          xd->plane[pli]
    1060             :                 //              .dst.buf)[xd->plane[pli].dst.stride *
    1061             :                 //                            (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
    1062             :                 //                        (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
    1063             :                 //      xd->plane[pli].dst.stride,
    1064             :                 //      &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
    1065             :                 //      ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
    1066             :                 //      sec_strength, pri_damping, sec_damping, coeff_shift);
    1067             :                 //} else
    1068             :                 {
    1069           0 :                     eb_cdef_filter_fb(
    1070             :                         NULL,
    1071           0 :                         &recBuff[recStride *(MI_SIZE_64X64 * fbr << mi_high_l2[pli]) + (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
    1072             :                         //&xd->plane[pli].dst.buf[xd->plane[pli].dst.stride *(MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +(fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
    1073             :                         recStride/*xd->plane[pli].dst.stride*/,
    1074             :                         &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
    1075             :                         ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
    1076             :                         sec_strength, pri_damping, sec_damping, coeff_shift);
    1077             :                 }
    1078             :             }
    1079           0 :             cdef_left = 1;  //CHKN filtered data is written back directy to recFrame.
    1080             :         }
    1081             :         {
    1082           0 :             uint8_t *tmp = prev_row_cdef;
    1083           0 :             prev_row_cdef = curr_row_cdef;
    1084           0 :             curr_row_cdef = tmp;
    1085             :         }
    1086             :     }
    1087           0 :     eb_aom_free(row_cdef);
    1088           0 :     for (int32_t pli = 0; pli < num_planes; pli++) {
    1089           0 :         eb_aom_free(linebuf[pli]);
    1090           0 :         eb_aom_free(colbuf[pli]);
    1091             :     }
    1092           0 : }
    1093             : 
    1094             : ///-------search
    1095             : 
    1096             : static int32_t priconv[REDUCED_PRI_STRENGTHS] = { 0, 1, 2, 3, 5, 7, 10, 13 };
    1097             : 
    1098             : /* Search for the best strength to add as an option, knowing we
    1099             : already selected nb_strengths options. */
    1100           0 : static uint64_t search_one(int32_t *lev, int32_t nb_strengths,
    1101             :     uint64_t mse[][TOTAL_STRENGTHS], int32_t sb_count,
    1102             :     int32_t fast, int32_t start_gi, int32_t end_gi) {
    1103             :     uint64_t tot_mse[TOTAL_STRENGTHS];
    1104             :     (void)fast;
    1105           0 :     const int32_t total_strengths = end_gi;
    1106             :     int32_t i, j;
    1107           0 :     uint64_t best_tot_mse = (uint64_t)1 << 63;
    1108           0 :     int32_t best_id = 0;
    1109           0 :     memset(tot_mse, 0, sizeof(tot_mse));
    1110           0 :     for (i = 0; i < sb_count; i++) {
    1111             :         int32_t gi;
    1112           0 :         uint64_t best_mse = (uint64_t)1 << 63;
    1113             :         /* Find best mse among already selected options. */
    1114           0 :         for (gi = 0; gi < nb_strengths; gi++) {
    1115           0 :             if (mse[i][lev[gi]] < best_mse)
    1116           0 :                 best_mse = mse[i][lev[gi]];
    1117             :         }
    1118             :         /* Find best mse when adding each possible new option. */
    1119             : 
    1120           0 :         for (j = start_gi; j < total_strengths; j++) {
    1121           0 :             uint64_t best = best_mse;
    1122           0 :             if (mse[i][j] < best) best = mse[i][j];
    1123           0 :             tot_mse[j] += best;
    1124             :         }
    1125             :     }
    1126           0 :     for (j = start_gi; j < total_strengths; j++) {
    1127           0 :         if (tot_mse[j] < best_tot_mse) {
    1128           0 :             best_tot_mse = tot_mse[j];
    1129           0 :             best_id = j;
    1130             :         }
    1131             :     }
    1132           0 :     lev[nb_strengths] = best_id;
    1133           0 :     return best_tot_mse;
    1134             : }
    1135             : 
    1136             : /* Search for the best luma+chroma strength to add as an option, knowing we
    1137             : already selected nb_strengths options. */
    1138           0 : uint64_t search_one_dual_c(int *lev0, int *lev1, int nb_strengths,
    1139             :     uint64_t(**mse)[TOTAL_STRENGTHS], int sb_count,
    1140             :     int fast, int start_gi, int end_gi) {
    1141             :     uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
    1142             :     int32_t i, j;
    1143           0 :     uint64_t best_tot_mse = (uint64_t)1 << 63;
    1144           0 :     int32_t best_id0 = 0;
    1145           0 :     int32_t best_id1 = 0;
    1146             :     (void)fast;
    1147           0 :     const int32_t total_strengths = end_gi;
    1148           0 :     memset(tot_mse, 0, sizeof(tot_mse));
    1149           0 :     for (i = 0; i < sb_count; i++) {
    1150             :         int32_t gi;
    1151           0 :         uint64_t best_mse = (uint64_t)1 << 63;
    1152             :         /* Find best mse among already selected options. */
    1153           0 :         for (gi = 0; gi < nb_strengths; gi++) {
    1154           0 :             uint64_t curr = mse[0][i][lev0[gi]];
    1155           0 :             curr += mse[1][i][lev1[gi]];
    1156           0 :             if (curr < best_mse)
    1157           0 :                 best_mse = curr;
    1158             :         }
    1159             :         /* Find best mse when adding each possible new option. */
    1160           0 :         for (j = start_gi; j < total_strengths; j++) {
    1161             :             int32_t k;
    1162           0 :             for (k = start_gi; k < total_strengths; k++) {
    1163           0 :                 uint64_t best = best_mse;
    1164           0 :                 uint64_t curr = mse[0][i][j];
    1165           0 :                 curr += mse[1][i][k];
    1166           0 :                 if (curr < best) best = curr;
    1167           0 :                 tot_mse[j][k] += best;
    1168             :             }
    1169             :         }
    1170             :     }
    1171             : 
    1172           0 :     for (j = start_gi; j < total_strengths; j++) {
    1173             :         int32_t k;
    1174           0 :         for (k = start_gi; k < total_strengths; k++) {
    1175           0 :             if (tot_mse[j][k] < best_tot_mse) {
    1176           0 :                 best_tot_mse = tot_mse[j][k];
    1177           0 :                 best_id0 = j;
    1178           0 :                 best_id1 = k;
    1179             :             }
    1180             :         }
    1181             :     }
    1182           0 :     lev0[nb_strengths] = best_id0;
    1183           0 :     lev1[nb_strengths] = best_id1;
    1184           0 :     return best_tot_mse;
    1185             : }
    1186             : 
    1187             : /* Search for the set of strengths that minimizes mse. */
    1188           0 : static uint64_t joint_strength_search(int32_t *best_lev, int32_t nb_strengths,
    1189             :     uint64_t mse[][TOTAL_STRENGTHS],
    1190             :     int32_t sb_count, int32_t fast, int32_t start_gi, int32_t end_gi) {
    1191             :     uint64_t best_tot_mse;
    1192             :     int32_t i;
    1193           0 :     best_tot_mse = (uint64_t)1 << 63;
    1194             :     /* Greedy search: add one strength options at a time. */
    1195           0 :     for (i = 0; i < nb_strengths; i++)
    1196           0 :         best_tot_mse = search_one(best_lev, i, mse, sb_count, fast, start_gi, end_gi);
    1197             :     /* Trying to refine the greedy search by reconsidering each
    1198             :     already-selected option. */
    1199           0 :     if (!fast) {
    1200           0 :         for (i = 0; i < 4 * nb_strengths; i++) {
    1201             :             int32_t j;
    1202           0 :             for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
    1203             :             best_tot_mse =
    1204           0 :                 search_one(best_lev, nb_strengths - 1, mse, sb_count, fast, start_gi, end_gi);
    1205             :         }
    1206             :     }
    1207           0 :     return best_tot_mse;
    1208             : }
    1209             : 
    1210             : /* Search for the set of luma+chroma strengths that minimizes mse. */
    1211         480 : static uint64_t joint_strength_search_dual(int32_t *best_lev0, int32_t *best_lev1,
    1212             :     int32_t nb_strengths,
    1213             :     uint64_t(**mse)[TOTAL_STRENGTHS],
    1214             :     int32_t sb_count, int32_t fast, int32_t start_gi, int32_t end_gi) {
    1215             :     uint64_t best_tot_mse;
    1216             :     int32_t i;
    1217         480 :     best_tot_mse = (uint64_t)1 << 63;
    1218             :     /* Greedy search: add one strength options at a time. */
    1219        2280 :     for (i = 0; i < nb_strengths; i++)
    1220        1800 :         best_tot_mse = search_one_dual(best_lev0, best_lev1, i, mse, sb_count, fast, start_gi, end_gi);
    1221             :     /* Trying to refine the greedy search by reconsidering each
    1222             :     already-selected option. */
    1223        7679 :     for (i = 0; i < 4 * nb_strengths; i++) {
    1224             :         int32_t j;
    1225       40797 :         for (j = 0; j < nb_strengths - 1; j++) {
    1226       33597 :             best_lev0[j] = best_lev0[j + 1];
    1227       33597 :             best_lev1[j] = best_lev1[j + 1];
    1228             :         }
    1229        7200 :         best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse, sb_count, fast, start_gi, end_gi);
    1230             :     }
    1231         479 :     return best_tot_mse;
    1232             : }
    1233             : 
    1234             : /* FIXME: SSE-optimize this. */
    1235           0 :  void copy_sb16_16(uint16_t *dst, int32_t dstride, const uint16_t *src,
    1236             :     int32_t src_voffset, int32_t src_hoffset, int32_t sstride,
    1237             :     int32_t vsize, int32_t hsize) {
    1238             :     int32_t r, c;
    1239           0 :     const uint16_t *base = &src[src_voffset * sstride + src_hoffset];
    1240           0 :     for (r = 0; r < vsize; r++) {
    1241           0 :         EB_MEMCPY(dst, (void*)base, 2 * hsize);
    1242           0 :         dst += dstride;
    1243           0 :         base += sstride;
    1244             :     }
    1245             :     UNUSED(c);
    1246           0 : }
    1247             : 
    1248           0 : static INLINE uint64_t dist_8x8_16bit_c(const uint16_t *src, const uint16_t *dst, const int32_t dstride, const int32_t coeff_shift) {
    1249           0 :     uint64_t svar = 0;
    1250           0 :     uint64_t dvar = 0;
    1251           0 :     uint64_t sum_s = 0;
    1252           0 :     uint64_t sum_d = 0;
    1253           0 :     uint64_t sum_s2 = 0;
    1254           0 :     uint64_t sum_d2 = 0;
    1255           0 :     uint64_t sum_sd = 0;
    1256             :     int32_t i, j;
    1257           0 :     for (i = 0; i < 8; i++) {
    1258           0 :         for (j = 0; j < 8; j++) {
    1259           0 :             sum_s += src[8 * i + j];
    1260           0 :             sum_d += dst[i * dstride + j];
    1261           0 :             sum_s2 += src[8 * i + j] * src[8 * i + j];
    1262           0 :             sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
    1263           0 :             sum_sd += src[8 * i + j] * dst[i * dstride + j];
    1264             :         }
    1265             :     }
    1266             :     /* Compute the variance -- the calculation cannot go negative. */
    1267           0 :     svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
    1268           0 :     dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
    1269           0 :     return (uint64_t)floor(
    1270           0 :         .5 + (sum_d2 + sum_s2 - 2 * sum_sd) * .5 *
    1271           0 :         (svar + dvar + (400 << 2 * coeff_shift)) /
    1272           0 :         (sqrt((20000 << 4 * coeff_shift) + svar * (double)dvar)));
    1273             : }
    1274             : 
    1275           0 : static INLINE uint64_t mse_8_16bit(const uint16_t *src, const uint16_t *dst, const int32_t dstride, const int32_t height) {
    1276           0 :     uint64_t sum = 0;
    1277             :     int32_t i, j;
    1278           0 :     for (i = 0; i < height; i++) {
    1279           0 :         for (j = 0; j < 8; j++) {
    1280           0 :             int32_t e = dst[i * dstride + j] - src[8 * i + j];
    1281           0 :             sum += e * e;
    1282             :         }
    1283             :     }
    1284           0 :     return sum;
    1285             : }
    1286             : 
    1287           0 : static INLINE uint64_t mse_4_16bit_c(const uint16_t *src, const uint16_t *dst, const int32_t dstride, const int32_t height) {
    1288           0 :     uint64_t sum = 0;
    1289             :     int32_t i, j;
    1290           0 :     for (i = 0; i < height; i++) {
    1291           0 :         for (j = 0; j < 4; j++) {
    1292           0 :             int32_t e = dst[i * dstride + j] - src[4 * i + j];
    1293           0 :             sum += e * e;
    1294             :         }
    1295             :     }
    1296           0 :     return sum;
    1297             : }
    1298             : 
    1299           0 : static INLINE uint64_t dist_8x8_8bit_c(const uint8_t *src, const uint8_t *dst, const int32_t dstride, const int32_t coeff_shift) {
    1300           0 :     uint64_t svar = 0;
    1301           0 :     uint64_t dvar = 0;
    1302           0 :     uint64_t sum_s = 0;
    1303           0 :     uint64_t sum_d = 0;
    1304           0 :     uint64_t sum_s2 = 0;
    1305           0 :     uint64_t sum_d2 = 0;
    1306           0 :     uint64_t sum_sd = 0;
    1307             :     int32_t i, j;
    1308           0 :     for (i = 0; i < 8; i++) {
    1309           0 :         for (j = 0; j < 8; j++) {
    1310           0 :             sum_s += src[8 * i + j];
    1311           0 :             sum_d += dst[i * dstride + j];
    1312           0 :             sum_s2 += src[8 * i + j] * src[8 * i + j];
    1313           0 :             sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
    1314           0 :             sum_sd += src[8 * i + j] * dst[i * dstride + j];
    1315             :         }
    1316             :     }
    1317             :     /* Compute the variance -- the calculation cannot go negative. */
    1318           0 :     svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
    1319           0 :     dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
    1320           0 :     return (uint64_t)floor(
    1321           0 :         .5 + (sum_d2 + sum_s2 - 2 * sum_sd) * .5 *
    1322           0 :         (svar + dvar + (400 << 2 * coeff_shift)) /
    1323           0 :         (sqrt((20000 << 4 * coeff_shift) + svar * (double)dvar)));
    1324             : }
    1325             : 
    1326           0 : static INLINE uint64_t mse_8_8bit(const uint8_t *src, const uint8_t *dst, const int32_t dstride, const int32_t height) {
    1327           0 :     uint64_t sum = 0;
    1328             :     int32_t i, j;
    1329           0 :     for (i = 0; i < height; i++) {
    1330           0 :         for (j = 0; j < 8; j++) {
    1331           0 :             int32_t e = dst[i * dstride + j] - src[8 * i + j];
    1332           0 :             sum += e * e;
    1333             :         }
    1334             :     }
    1335           0 :     return sum;
    1336             : }
    1337             : 
    1338           0 : static INLINE uint64_t mse_4_8bit_c(const uint8_t *src, const uint8_t *dst, const int32_t dstride, const int32_t height) {
    1339           0 :     uint64_t sum = 0;
    1340             :     int32_t i, j;
    1341           0 :     for (i = 0; i < height; i++) {
    1342           0 :         for (j = 0; j < 4; j++) {
    1343           0 :             int32_t e = dst[i * dstride + j] - src[4 * i + j];
    1344           0 :             sum += e * e;
    1345             :         }
    1346             :     }
    1347           0 :     return sum;
    1348             : }
    1349             : 
    1350             : /* Compute MSE only on the blocks we filtered. */
    1351           0 : uint64_t compute_cdef_dist_c(const uint16_t *dst, int32_t dstride, const uint16_t *src, const cdef_list *dlist, int32_t cdef_count, BlockSize bsize, int32_t coeff_shift, int32_t pli) {
    1352           0 :     uint64_t sum = 0;
    1353             :     int32_t bi, bx, by;
    1354           0 :     if (bsize == BLOCK_8X8) {
    1355           0 :         for (bi = 0; bi < cdef_count; bi++) {
    1356           0 :             by = dlist[bi].by;
    1357           0 :             bx = dlist[bi].bx;
    1358           0 :             if (pli == 0) {
    1359           0 :                 sum += dist_8x8_16bit_c(&src[bi << (3 + 3)], &dst[(by << 3) * dstride + (bx << 3)], dstride,
    1360             :                     coeff_shift);
    1361             :             }
    1362             :             else
    1363           0 :                 sum += mse_8_16bit(&src[bi << (3 + 3)], &dst[(by << 3) * dstride + (bx << 3)], dstride, 8);
    1364             :         }
    1365             :     }
    1366           0 :     else if (bsize == BLOCK_4X8) {
    1367           0 :         for (bi = 0; bi < cdef_count; bi++) {
    1368           0 :             by = dlist[bi].by;
    1369           0 :             bx = dlist[bi].bx;
    1370           0 :             sum += mse_4_16bit_c(&src[bi << (3 + 2)], &dst[(by << 3) * dstride + (bx << 2)], dstride, 8);
    1371             :         }
    1372             :     }
    1373           0 :     else if (bsize == BLOCK_8X4) {
    1374           0 :         for (bi = 0; bi < cdef_count; bi++) {
    1375           0 :             by = dlist[bi].by;
    1376           0 :             bx = dlist[bi].bx;
    1377           0 :             sum += mse_8_16bit(&src[bi << (2 + 3)], &dst[(by << 2) * dstride + (bx << 3)], dstride, 4);
    1378             :         }
    1379             :     }
    1380             :     else {
    1381           0 :         assert(bsize == BLOCK_4X4);
    1382           0 :         for (bi = 0; bi < cdef_count; bi++) {
    1383           0 :             by = dlist[bi].by;
    1384           0 :             bx = dlist[bi].bx;
    1385           0 :             sum += mse_4_16bit_c(&src[bi << (2 + 2)], &dst[(by << 2) * dstride + (bx << 2)], dstride, 4);
    1386             :         }
    1387             :     }
    1388           0 :     return sum >> 2 * coeff_shift;
    1389             : }
    1390             : 
    1391           0 : uint64_t compute_cdef_dist_8bit_c(const uint8_t *dst8, int32_t dstride, const uint8_t *src8, const cdef_list *dlist, int32_t cdef_count, BlockSize bsize, int32_t coeff_shift, int32_t pli) {
    1392           0 :     uint64_t sum = 0;
    1393             :     int32_t bi, bx, by;
    1394           0 :     if (bsize == BLOCK_8X8) {
    1395           0 :         for (bi = 0; bi < cdef_count; bi++) {
    1396           0 :             by = dlist[bi].by;
    1397           0 :             bx = dlist[bi].bx;
    1398           0 :             if (pli == 0) {
    1399           0 :                 sum += dist_8x8_8bit_c(&src8[bi << (3 + 3)], &dst8[(by << 3) * dstride + (bx << 3)], dstride,
    1400             :                     coeff_shift);
    1401             :             }
    1402             :             else
    1403           0 :                 sum += mse_8_8bit(&src8[bi << (3 + 3)], &dst8[(by << 3) * dstride + (bx << 3)], dstride, 8);
    1404             :         }
    1405             :     }
    1406           0 :     else if (bsize == BLOCK_4X8) {
    1407           0 :         for (bi = 0; bi < cdef_count; bi++) {
    1408           0 :             by = dlist[bi].by;
    1409           0 :             bx = dlist[bi].bx;
    1410           0 :             sum += mse_4_8bit_c(&src8[bi << (3 + 2)], &dst8[(by << 3) * dstride + (bx << 2)], dstride, 8);
    1411             :         }
    1412             :     }
    1413           0 :     else if (bsize == BLOCK_8X4) {
    1414           0 :         for (bi = 0; bi < cdef_count; bi++) {
    1415           0 :             by = dlist[bi].by;
    1416           0 :             bx = dlist[bi].bx;
    1417           0 :             sum += mse_8_8bit(&src8[bi << (2 + 3)], &dst8[(by << 2) * dstride + (bx << 3)], dstride, 4);
    1418             :         }
    1419             :     }
    1420             :     else {
    1421           0 :         assert(bsize == BLOCK_4X4);
    1422           0 :         for (bi = 0; bi < cdef_count; bi++) {
    1423           0 :             by = dlist[bi].by;
    1424           0 :             bx = dlist[bi].bx;
    1425           0 :             sum += mse_4_8bit_c(&src8[bi << (2 + 2)], &dst8[(by << 2) * dstride + (bx << 2)], dstride, 4);
    1426             :         }
    1427             :     }
    1428           0 :     return sum >> 2 * coeff_shift;
    1429             : }
    1430             : 
    1431         120 : void finish_cdef_search(
    1432             :     EncDecContext                *context_ptr,
    1433             :     SequenceControlSet           *sequence_control_set_ptr,
    1434             :     PictureControlSet            *picture_control_set_ptr
    1435             :     , int32_t                      selected_strength_cnt[64]
    1436             : )
    1437             : {
    1438             :     (void)context_ptr;
    1439         120 :     int32_t fast = 0;
    1440         120 :     struct PictureParentControlSet     *pPcs = picture_control_set_ptr->parent_pcs_ptr;
    1441         120 :     FrameHeader *frm_hdr = &pPcs->frm_hdr;
    1442         120 :     Av1Common*   cm = pPcs->av1_cm;
    1443         120 :     int32_t mi_rows = pPcs->av1_cm->mi_rows;
    1444         120 :     int32_t mi_cols = pPcs->av1_cm->mi_cols;
    1445             : 
    1446             :     int32_t fbr, fbc;
    1447             : 
    1448             :     int32_t pli;
    1449             : 
    1450         120 :     uint64_t best_tot_mse = (uint64_t)1 << 63;
    1451             :     uint64_t tot_mse;
    1452             :     int32_t sb_count;
    1453         120 :     int32_t nvfb = (mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
    1454         120 :     int32_t nhfb = (mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
    1455         120 :     int32_t *sb_index = (int32_t *)malloc(nvfb * nhfb * sizeof(*sb_index));
    1456         120 :     int32_t *selected_strength = (int32_t *)malloc(nvfb * nhfb * sizeof(*sb_index));
    1457         120 :     int32_t best_frame_gi_cnt = 0;
    1458         120 :     const int32_t total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
    1459             :     int32_t gi_step;
    1460             :     int32_t mid_gi;
    1461             :     int32_t start_gi;
    1462             :     int32_t end_gi;
    1463             : 
    1464         120 :     assert(sb_index != NULL);
    1465         120 :     assert(selected_strength != NULL);
    1466             : 
    1467         120 :     gi_step = get_cdef_gi_step(pPcs->cdef_filter_mode);
    1468             : 
    1469         120 :     mid_gi = pPcs->cdf_ref_frame_strenght;
    1470         120 :     start_gi = pPcs->use_ref_frame_cdef_strength && pPcs->cdef_filter_mode == 1 ? (AOMMAX(0, mid_gi - gi_step)) : 0;
    1471         120 :     end_gi = pPcs->use_ref_frame_cdef_strength ? AOMMIN(total_strengths, mid_gi + gi_step) : pPcs->cdef_filter_mode == 1 ? 8 : total_strengths;
    1472             : 
    1473             :     uint64_t(*mse[2])[TOTAL_STRENGTHS];
    1474         120 :     int32_t pri_damping = 3 + (frm_hdr->quantization_params.base_q_idx >> 6);
    1475             :     //int32_t sec_damping = 3 + (frm_hdr->quantization_params.base_q_idx >> 6);
    1476             :     int32_t i;
    1477             :     int32_t nb_strengths;
    1478             :     int32_t nb_strength_bits;
    1479             :     int32_t quantizer;
    1480             :     double lambda;
    1481         120 :     const int32_t num_planes = 3;
    1482             : 
    1483         120 :     quantizer =
    1484         120 :         eb_av1_ac_quant_Q3(frm_hdr->quantization_params.base_q_idx, 0, (AomBitDepth)sequence_control_set_ptr->static_config.encoder_bit_depth) >> (sequence_control_set_ptr->static_config.encoder_bit_depth - 8);
    1485         120 :     lambda = .12 * quantizer * quantizer / 256.;
    1486             : 
    1487         120 :     mse[0] = (uint64_t(*)[64])malloc(sizeof(**mse) * nvfb * nhfb);
    1488         120 :     mse[1] = (uint64_t(*)[64])malloc(sizeof(**mse) * nvfb * nhfb);
    1489             : 
    1490         120 :     sb_count = 0;
    1491         840 :     for (fbr = 0; fbr < nvfb; ++fbr) {
    1492        7920 :         for (fbc = 0; fbc < nhfb; ++fbc) {
    1493        7200 :             ModeInfo **mi = picture_control_set_ptr->mi_grid_base + MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc;
    1494        7200 :             const MbModeInfo *mbmi = &mi[0]->mbmi;
    1495             : 
    1496        7200 :             if (((fbc & 1) &&
    1497        3600 :                 (mbmi->block_mi.sb_type == BLOCK_128X128 || mbmi->block_mi.sb_type == BLOCK_128X64)) ||
    1498        7200 :                 ((fbr & 1) &&
    1499        3600 :                 (mbmi->block_mi.sb_type == BLOCK_128X128 || mbmi->block_mi.sb_type == BLOCK_64X128)))
    1500             :             {
    1501           0 :                 continue;
    1502             :             }
    1503             : 
    1504             :             // No filtering if the entire filter block is skipped
    1505        7200 :             if (eb_sb_all_skip(picture_control_set_ptr, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64))
    1506        5765 :                 continue;
    1507             : 
    1508        5740 :             for (pli = 0; pli < num_planes; pli++) {
    1509        4305 :                 if (pli == 0)
    1510        1435 :                      memcpy(mse[0][sb_count], picture_control_set_ptr->mse_seg[0][fbr*nhfb + fbc], TOTAL_STRENGTHS * sizeof(uint64_t));
    1511        4305 :                 if (pli == 2)
    1512        1435 :                      memcpy(mse[1][sb_count], picture_control_set_ptr->mse_seg[1][fbr*nhfb + fbc], TOTAL_STRENGTHS * sizeof(uint64_t));
    1513        4305 :                 sb_index[sb_count] = MI_SIZE_64X64 * fbr * picture_control_set_ptr->mi_stride + MI_SIZE_64X64 * fbc;
    1514             :             }
    1515        1435 :             sb_count++;
    1516             :         }
    1517             :     }
    1518             : 
    1519         120 :     nb_strength_bits = 0;
    1520             :     /* Search for different number of signalling bits. */
    1521         600 :     for (i = 0; i <= 3; i++) {
    1522             :         int32_t j;
    1523             :         int32_t best_lev0[CDEF_MAX_STRENGTHS];
    1524         480 :         int32_t best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
    1525         480 :         nb_strengths = 1 << i;
    1526         480 :         if (num_planes >= 3)
    1527         480 :             tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths, mse, sb_count, fast, start_gi, end_gi);
    1528             :         else
    1529           0 :             tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count, fast, start_gi, end_gi);
    1530             :         /* Count superblock signalling cost. */
    1531         480 :         tot_mse += (uint64_t)(sb_count * lambda * i);
    1532             :         /* Count header signalling cost. */
    1533         480 :         tot_mse += (uint64_t)(nb_strengths * lambda * CDEF_STRENGTH_BITS);
    1534         480 :         if (tot_mse < best_tot_mse) {
    1535         163 :             best_tot_mse = tot_mse;
    1536         163 :             nb_strength_bits = i;
    1537         439 :             for (j = 0; j < 1 << nb_strength_bits; j++) {
    1538         276 :                 frm_hdr->CDEF_params.cdef_y_strength[j] = best_lev0[j];
    1539         276 :                 frm_hdr->CDEF_params.cdef_uv_strength[j] = best_lev1[j];
    1540             :             }
    1541             :         }
    1542             :     }
    1543         120 :     nb_strengths = 1 << nb_strength_bits;
    1544             : 
    1545         120 :     frm_hdr->CDEF_params.cdef_bits = nb_strength_bits;
    1546         120 :     pPcs->nb_cdef_strengths = nb_strengths;
    1547        1555 :     for (i = 0; i < sb_count; i++) {
    1548             :         int32_t gi;
    1549             :         int32_t best_gi;
    1550        1435 :         uint64_t best_mse = (uint64_t)1 << 63;
    1551        1435 :         best_gi = 0;
    1552        6529 :         for (gi = 0; gi < pPcs->nb_cdef_strengths; gi++) {
    1553        5094 :             uint64_t curr = mse[0][i][frm_hdr->CDEF_params.cdef_y_strength[gi]];
    1554        5094 :             if (num_planes >= 3) curr += mse[1][i][frm_hdr->CDEF_params.cdef_uv_strength[gi]];
    1555        5094 :             if (curr < best_mse) {
    1556        2276 :                 best_gi = gi;
    1557        2276 :                 best_mse = curr;
    1558             :             }
    1559             :         }
    1560        1435 :         selected_strength[i] = best_gi;
    1561        1435 :         selected_strength_cnt[best_gi]++;
    1562             : 
    1563        1435 :         picture_control_set_ptr->mi_grid_base[sb_index[i]]->mbmi.cdef_strength = (int8_t)best_gi;
    1564             :         //in case the fb is within a block=128x128 or 128x64, or 64x128, then we genrate param only for the first 64x64.
    1565             :         //since our mi map deos not have the multi pointer single data assignment, we need to duplicate data.
    1566        1435 :         BlockSize sb_type = picture_control_set_ptr->mi_grid_base[sb_index[i]]->mbmi.block_mi.sb_type;
    1567             : 
    1568        1435 :         switch (sb_type)
    1569             :         {
    1570           0 :         case BLOCK_128X128:
    1571           0 :             picture_control_set_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->mbmi.cdef_strength = (int8_t)best_gi;
    1572           0 :             picture_control_set_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * picture_control_set_ptr->mi_stride]->mbmi.cdef_strength = (int8_t)best_gi;
    1573           0 :             picture_control_set_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * picture_control_set_ptr->mi_stride + MI_SIZE_64X64]->mbmi.cdef_strength = (int8_t)best_gi;
    1574           0 :             break;
    1575           0 :         case BLOCK_128X64:
    1576           0 :             picture_control_set_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->mbmi.cdef_strength = (int8_t)best_gi;
    1577           0 :             break;
    1578           0 :         case BLOCK_64X128:
    1579           0 :             picture_control_set_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * picture_control_set_ptr->mi_stride]->mbmi.cdef_strength = (int8_t)best_gi;
    1580           0 :             break;
    1581        1435 :         default:
    1582        1435 :             break;
    1583             :         }
    1584             :     }
    1585             : 
    1586         120 :     if (fast) {
    1587           0 :         for (int32_t j = 0; j < nb_strengths; j++) {
    1588           0 :             frm_hdr->CDEF_params.cdef_y_strength[j] = priconv[frm_hdr->CDEF_params.cdef_y_strength[j] / CDEF_SEC_STRENGTHS] * CDEF_SEC_STRENGTHS + (frm_hdr->CDEF_params.cdef_y_strength[j] % CDEF_SEC_STRENGTHS);
    1589           0 :             frm_hdr->CDEF_params.cdef_uv_strength[j] = priconv[frm_hdr->CDEF_params.cdef_uv_strength[j] / CDEF_SEC_STRENGTHS] * CDEF_SEC_STRENGTHS + (frm_hdr->CDEF_params.cdef_uv_strength[j] % CDEF_SEC_STRENGTHS);
    1590             :         }
    1591             :     }
    1592             :     //cdef_pri_damping & cdef_sec_damping consolidated to cdef_damping
    1593         120 :     frm_hdr->CDEF_params.cdef_damping = pri_damping;
    1594             :     //pPcs->cdef_pri_damping = pri_damping;
    1595             :     //pPcs->cdef_sec_damping = sec_damping;
    1596        7800 :     for (int i = 0; i < total_strengths; i++)
    1597        7680 :         best_frame_gi_cnt += selected_strength_cnt[i] > best_frame_gi_cnt ? 1 : 0;
    1598         120 :     pPcs->cdef_frame_strength = ((best_frame_gi_cnt + 4) / 4) * 4;
    1599             : 
    1600         120 :     free(mse[0]);
    1601         120 :     free(mse[1]);
    1602         120 :     free(sb_index);
    1603         120 :     free(selected_strength);
    1604         120 : }
    1605             : 
    1606           0 : void eb_av1_cdef_search(
    1607             :     EncDecContext                *context_ptr,
    1608             :     SequenceControlSet           *sequence_control_set_ptr,
    1609             :     PictureControlSet            *picture_control_set_ptr
    1610             :     //Yv12BufferConfig *frame,
    1611             :     //const Yv12BufferConfig *ref,
    1612             :     //Av1Common *cm,
    1613             :     //MacroBlockD *xd,
    1614             :     //int32_t fast
    1615             : )
    1616             : {
    1617             :     (void)context_ptr;
    1618           0 :     int32_t fast = 0;
    1619           0 :     struct PictureParentControlSet     *pPcs = picture_control_set_ptr->parent_pcs_ptr;
    1620           0 :     FrameHeader *frm_hdr = &pPcs->frm_hdr;
    1621           0 :     Av1Common*   cm = pPcs->av1_cm;
    1622           0 :     int32_t mi_rows = pPcs->av1_cm->mi_rows;
    1623           0 :     int32_t mi_cols = pPcs->av1_cm->mi_cols;
    1624             : 
    1625             :     EbPictureBufferDesc  * recon_picture_ptr;
    1626           0 :     if (pPcs->is_used_as_reference_flag == EB_TRUE)
    1627           0 :         recon_picture_ptr = ((EbReferenceObject*)picture_control_set_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture;
    1628             :     else
    1629           0 :         recon_picture_ptr = picture_control_set_ptr->recon_picture_ptr;
    1630             : 
    1631           0 :     EbByte  reconBufferY = &((recon_picture_ptr->buffer_y)[recon_picture_ptr->origin_x + recon_picture_ptr->origin_y * recon_picture_ptr->stride_y]);
    1632           0 :     EbByte  reconBufferCb = &((recon_picture_ptr->buffer_cb)[recon_picture_ptr->origin_x / 2 + recon_picture_ptr->origin_y / 2 * recon_picture_ptr->stride_cb]);
    1633           0 :     EbByte  reconBufferCr = &((recon_picture_ptr->buffer_cr)[recon_picture_ptr->origin_x / 2 + recon_picture_ptr->origin_y / 2 * recon_picture_ptr->stride_cr]);
    1634             : 
    1635           0 :     EbPictureBufferDesc *input_picture_ptr = (EbPictureBufferDesc*)picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
    1636           0 :     EbByte  inputBufferY = &((input_picture_ptr->buffer_y)[input_picture_ptr->origin_x + input_picture_ptr->origin_y * input_picture_ptr->stride_y]);
    1637           0 :     EbByte  inputBufferCb = &((input_picture_ptr->buffer_cb)[input_picture_ptr->origin_x / 2 + input_picture_ptr->origin_y / 2 * input_picture_ptr->stride_cb]);
    1638           0 :     EbByte  inputBufferCr = &((input_picture_ptr->buffer_cr)[input_picture_ptr->origin_x / 2 + input_picture_ptr->origin_y / 2 * input_picture_ptr->stride_cr]);
    1639             : 
    1640             :     int32_t r, c;
    1641             :     int32_t fbr, fbc;
    1642             :     uint16_t *src[3];
    1643             :     uint16_t *ref_coeff[3];
    1644             :     /*static*/ cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
    1645           0 :     int32_t dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
    1646           0 :     int32_t var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
    1647             :     int32_t stride[3];
    1648             :     int32_t bsize[3];
    1649             :     int32_t mi_wide_l2[3];
    1650             :     int32_t mi_high_l2[3];
    1651             :     int32_t xdec[3];
    1652             :     int32_t ydec[3];
    1653             :     int32_t pli;
    1654             :     int32_t cdef_count;
    1655             : 
    1656             :     //CHKN int32_t coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
    1657           0 :     int32_t coeff_shift = AOMMAX(sequence_control_set_ptr->static_config.encoder_bit_depth - 8, 0);
    1658             : 
    1659           0 :     uint64_t best_tot_mse = (uint64_t)1 << 63;
    1660             :     uint64_t tot_mse;
    1661             :     int32_t sb_count;
    1662             : 
    1663           0 :     int32_t nvfb = (mi_rows /*cm->mi_rows*/ + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
    1664           0 :     int32_t nhfb = (mi_cols/*cm->mi_cols*/ + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
    1665             : 
    1666           0 :     int32_t *sb_index = (int32_t *)eb_aom_malloc(nvfb * nhfb * sizeof(*sb_index));       //CHKN add cast
    1667           0 :     int32_t *selected_strength = (int32_t *)eb_aom_malloc(nvfb * nhfb * sizeof(*sb_index));
    1668             : 
    1669           0 :     assert(sb_index != NULL);
    1670           0 :     assert(selected_strength != NULL);
    1671             : 
    1672             :     uint64_t(*mse[2])[TOTAL_STRENGTHS];
    1673           0 :     int32_t pri_damping = 3 + (frm_hdr->quantization_params.base_q_idx /*cm->quant_param.base_q_idx*/ >> 6);
    1674           0 :     int32_t sec_damping = 3 + (frm_hdr->quantization_params.base_q_idx /*cm->quant_param.base_q_idx*/ >> 6);
    1675             :     int32_t i;
    1676             :     int32_t nb_strengths;
    1677             :     int32_t nb_strength_bits;
    1678             :     int32_t quantizer;
    1679             :     double lambda;
    1680           0 :     const int32_t num_planes = 3;// av1_num_planes(cm);
    1681           0 :     const int32_t total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
    1682             :     DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
    1683             :     uint16_t *in;
    1684             :     DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
    1685             : 
    1686           0 :     int32_t selected_strength_cnt[TOTAL_STRENGTHS] = { 0 };
    1687           0 :     int32_t best_frame_gi_cnt = 0;
    1688           0 :     int32_t gi_step = get_cdef_gi_step(pPcs->cdef_filter_mode);
    1689           0 :     int32_t mid_gi = pPcs->cdf_ref_frame_strenght;
    1690           0 :     int32_t start_gi = pPcs->use_ref_frame_cdef_strength && pPcs->cdef_filter_mode == 1 ? (AOMMAX(0, mid_gi - gi_step)) : 0;
    1691           0 :     int32_t end_gi = pPcs->use_ref_frame_cdef_strength ? AOMMIN(total_strengths, mid_gi + gi_step) : pPcs->cdef_filter_mode == 1 ? 8 : total_strengths;
    1692             : 
    1693           0 :     quantizer =
    1694             :         //CHKN av1_ac_quant_Q3(cm->quant_param.base_q_idx, 0, cm->bit_depth) >> (cm->bit_depth - 8);
    1695           0 :         eb_av1_ac_quant_Q3(frm_hdr->quantization_params.base_q_idx, 0, (AomBitDepth)sequence_control_set_ptr->static_config.encoder_bit_depth) >> (sequence_control_set_ptr->static_config.encoder_bit_depth - 8);
    1696           0 :     lambda = .12 * quantizer * quantizer / 256.;
    1697             : 
    1698             :     //eb_av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0,    num_planes);
    1699             : 
    1700           0 :     mse[0] = (uint64_t(*)[64])eb_aom_malloc(sizeof(**mse) * nvfb * nhfb);
    1701           0 :     mse[1] = (uint64_t(*)[64])eb_aom_malloc(sizeof(**mse) * nvfb * nhfb);
    1702             : 
    1703           0 :     for (pli = 0; pli < num_planes; pli++) {
    1704           0 :         uint8_t *in_buffer = 0;
    1705           0 :         int32_t in_stride = 0;
    1706             : 
    1707           0 :         uint8_t *ref_buffer = 0;
    1708           0 :         int32_t ref_stride = 0;
    1709           0 :         switch (pli) {
    1710           0 :         case 0:
    1711           0 :             ref_buffer = inputBufferY;
    1712           0 :             ref_stride = input_picture_ptr->stride_y;
    1713           0 :             in_buffer = reconBufferY;
    1714           0 :             in_stride = recon_picture_ptr->stride_y;
    1715           0 :             break;
    1716           0 :         case 1:
    1717           0 :             ref_buffer = inputBufferCb;
    1718           0 :             ref_stride = input_picture_ptr->stride_cb;
    1719           0 :             in_buffer = reconBufferCb;
    1720           0 :             in_stride = recon_picture_ptr->stride_cb;
    1721           0 :             break;
    1722           0 :         case 2:
    1723           0 :             ref_buffer = inputBufferCr;
    1724           0 :             ref_stride = input_picture_ptr->stride_cr;
    1725           0 :             in_buffer = reconBufferCr;
    1726           0 :             in_stride = recon_picture_ptr->stride_cr;
    1727           0 :             break;
    1728             :         }
    1729             : 
    1730             :         ///CHKN: allocate one frame 16bit for src and recon!!
    1731           0 :         src[pli] = (uint16_t*)eb_aom_memalign(32, sizeof(*src)       * mi_rows * mi_cols * MI_SIZE * MI_SIZE);
    1732           0 :         ref_coeff[pli] = (uint16_t*)eb_aom_memalign(32, sizeof(*ref_coeff) * mi_rows * mi_cols * MI_SIZE * MI_SIZE);
    1733             : 
    1734           0 :         int32_t subsampling_x = (pli == 0) ? 0 : 1;
    1735           0 :         int32_t subsampling_y = (pli == 0) ? 0 : 1;
    1736             : 
    1737           0 :         xdec[pli] = subsampling_x; //CHKN  xd->plane[pli].subsampling_x;
    1738           0 :         ydec[pli] = subsampling_y; //CHKN  xd->plane[pli].subsampling_y;
    1739           0 :         bsize[pli] = ydec[pli] ? (xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
    1740           0 :             : (xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
    1741             : 
    1742           0 :         stride[pli] = cm->mi_cols << MI_SIZE_LOG2;
    1743           0 :         mi_wide_l2[pli] = MI_SIZE_LOG2 - subsampling_x;  //CHKN MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
    1744           0 :         mi_high_l2[pli] = MI_SIZE_LOG2 - subsampling_y;  //CHKN MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
    1745             : 
    1746           0 :         const int32_t frame_height = (cm->mi_rows * MI_SIZE) >> subsampling_y;//CHKN  xd->plane[pli].subsampling_y;
    1747           0 :         const int32_t frame_width = (cm->mi_cols * MI_SIZE) >> subsampling_x;//CHKN  xd->plane[pli].subsampling_x;
    1748             : 
    1749           0 :         for (r = 0; r < frame_height; ++r) {
    1750           0 :             for (c = 0; c < frame_width; ++c) {
    1751             :                 //if (cm->use_highbitdepth) {
    1752             :                 //    src[pli][r * stride[pli] + c] = CONVERT_TO_SHORTPTR(
    1753             :                 //        xd->plane[pli].dst.buf)[r * xd->plane[pli].dst.stride + c];
    1754             :                 //    ref_coeff[pli][r * stride[pli] + c] =
    1755             :                 //        CONVERT_TO_SHORTPTR(ref_buffer)[r * ref_stride + c];
    1756             :                 //}
    1757             :                 //else
    1758             :                 {
    1759           0 :                     src[pli][r * stride[pli] + c] = in_buffer[r * in_stride + c];//CHKN xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c];
    1760           0 :                     ref_coeff[pli][r * stride[pli] + c] = ref_buffer[r * ref_stride + c];
    1761             :                 }
    1762             :             }
    1763             :         }
    1764             :     }
    1765             : 
    1766           0 :     in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
    1767           0 :     sb_count = 0;
    1768           0 :     for (fbr = 0; fbr < nvfb; ++fbr) {
    1769           0 :         for (fbc = 0; fbc < nhfb; ++fbc) {
    1770             :             int32_t nvb, nhb;
    1771             :             int32_t gi;
    1772           0 :             int32_t dirinit = 0;
    1773           0 :             nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
    1774           0 :             nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
    1775           0 :             int32_t hb_step = 1; //CHKN these should be all time with 64x64 LCUs
    1776           0 :             int32_t vb_step = 1;
    1777           0 :             BlockSize bs = BLOCK_64X64;
    1778           0 :             ModeInfo **mi = picture_control_set_ptr->mi_grid_base + MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc;
    1779           0 :             const MbModeInfo *mbmi = &mi[0]->mbmi;
    1780             : 
    1781             :             //MbModeInfo *const mbmi =
    1782             :             //    cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
    1783             :             //    MI_SIZE_64X64 * fbc];
    1784             : 
    1785           0 :             if (((fbc & 1) &&
    1786           0 :                 (mbmi->block_mi.sb_type == BLOCK_128X128 || mbmi->block_mi.sb_type == BLOCK_128X64)) ||
    1787           0 :                 ((fbr & 1) &&
    1788           0 :                 (mbmi->block_mi.sb_type == BLOCK_128X128 || mbmi->block_mi.sb_type == BLOCK_64X128)))
    1789           0 :                 continue;
    1790           0 :             if (mbmi->block_mi.sb_type == BLOCK_128X128 || mbmi->block_mi.sb_type == BLOCK_128X64 ||
    1791           0 :                 mbmi->block_mi.sb_type == BLOCK_64X128)
    1792           0 :                 bs = mbmi->block_mi.sb_type;
    1793           0 :             if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
    1794           0 :                 nhb = AOMMIN(MI_SIZE_128X128, cm->mi_cols - MI_SIZE_64X64 * fbc);
    1795           0 :                 hb_step = 2;
    1796             :             }
    1797           0 :             if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
    1798           0 :                 nvb = AOMMIN(MI_SIZE_128X128, cm->mi_rows - MI_SIZE_64X64 * fbr);
    1799           0 :                 vb_step = 2;
    1800             :             }
    1801             : 
    1802             :             // No filtering if the entire filter block is skipped
    1803           0 :             if (eb_sb_all_skip(picture_control_set_ptr, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64))
    1804           0 :                 continue;
    1805             : 
    1806           0 :             cdef_count = eb_sb_compute_cdef_list(picture_control_set_ptr, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs);
    1807             : 
    1808           0 :             for (pli = 0; pli < num_planes; pli++) {
    1809           0 :                 for (i = 0; i < CDEF_INBUF_SIZE; i++)
    1810           0 :                     inbuf[i] = CDEF_VERY_LARGE;
    1811           0 :                 int32_t yoff = CDEF_VBORDER * (fbr != 0);
    1812           0 :                 int32_t xoff = CDEF_HBORDER * (fbc != 0);
    1813           0 :                 int32_t ysize = (nvb << mi_high_l2[pli]) + CDEF_VBORDER * (fbr + vb_step < nvfb) + yoff;
    1814           0 :                 int32_t xsize = (nhb << mi_wide_l2[pli]) + CDEF_HBORDER * (fbc + hb_step < nhfb) + xoff;
    1815             : 
    1816           0 :                 copy_sb16_16(
    1817           0 :                     &in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
    1818           0 :                     src[pli],
    1819           0 :                     (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
    1820           0 :                     (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
    1821             :                     stride[pli], ysize, xsize);
    1822             : 
    1823           0 :                 for (gi = start_gi; gi < end_gi; gi++) {
    1824             :                     int32_t threshold;
    1825             :                     uint64_t curr_mse;
    1826             :                     int32_t sec_strength;
    1827           0 :                     threshold = gi / CDEF_SEC_STRENGTHS;
    1828           0 :                     if (fast) threshold = priconv[threshold];
    1829             :                     /* We avoid filtering the pixels for which some of the pixels to
    1830             :                     average are outside the frame. We could change the filter instead, but it would add special cases for any future vectorization. */
    1831           0 :                     sec_strength = gi % CDEF_SEC_STRENGTHS;
    1832           0 :                     eb_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in, xdec[pli], ydec[pli],
    1833             :                         dir, &dirinit, var, pli, dlist, cdef_count, threshold,
    1834           0 :                         sec_strength + (sec_strength == 3), pri_damping,
    1835             :                         sec_damping, coeff_shift);
    1836             : 
    1837           0 :                     curr_mse = eb_compute_cdef_dist(
    1838           0 :                         ref_coeff[pli] +
    1839           0 :                         (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] +
    1840           0 :                         (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]),
    1841           0 :                         stride[pli], tmp_dst, dlist, cdef_count, (BlockSize)bsize[pli], coeff_shift,
    1842             :                         pli);
    1843             : 
    1844           0 :                     if (pli < 2)
    1845           0 :                         mse[pli][sb_count][gi] = curr_mse;
    1846             :                     else
    1847           0 :                         mse[1][sb_count][gi] += curr_mse;
    1848             : 
    1849           0 :                     sb_index[sb_count] = MI_SIZE_64X64 * fbr * picture_control_set_ptr->mi_stride + MI_SIZE_64X64 * fbc;//CHKN
    1850             :                 }
    1851             :             }
    1852           0 :             sb_count++;
    1853             :         }
    1854             :     }
    1855             : 
    1856           0 :     nb_strength_bits = 0;
    1857             :     /* Search for different number of signalling bits. */
    1858           0 :     for (i = 0; i <= 3; i++) {
    1859             :         int32_t j;
    1860             :         int32_t best_lev0[CDEF_MAX_STRENGTHS];
    1861           0 :         int32_t best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
    1862           0 :         nb_strengths = 1 << i;
    1863           0 :         if (num_planes >= 3)
    1864           0 :             tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths, mse, sb_count, fast, start_gi, end_gi);
    1865             :         else
    1866           0 :             tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count, fast, start_gi, end_gi);
    1867             :         /* Count superblock signalling cost. */
    1868           0 :         tot_mse += (uint64_t)(sb_count * lambda * i);
    1869             :         /* Count header signalling cost. */
    1870           0 :         tot_mse += (uint64_t)(nb_strengths * lambda * CDEF_STRENGTH_BITS);
    1871           0 :         if (tot_mse < best_tot_mse) {
    1872           0 :             best_tot_mse = tot_mse;
    1873           0 :             nb_strength_bits = i;
    1874           0 :             for (j = 0; j < 1 << nb_strength_bits; j++) {
    1875           0 :                 frm_hdr->CDEF_params.cdef_y_strength[j] = best_lev0[j];
    1876           0 :                 frm_hdr->CDEF_params.cdef_uv_strength[j] = best_lev1[j];
    1877             :             }
    1878             :         }
    1879             :     }
    1880           0 :     nb_strengths = 1 << nb_strength_bits;
    1881             : 
    1882           0 :     /*cm*/frm_hdr->CDEF_params.cdef_bits = nb_strength_bits;
    1883           0 :     /*cm*/pPcs->nb_cdef_strengths = nb_strengths;
    1884           0 :     for (i = 0; i < sb_count; i++) {
    1885             :         int32_t gi;
    1886             :         int32_t best_gi;
    1887           0 :         uint64_t best_mse = (uint64_t)1 << 63;
    1888           0 :         best_gi = 0;
    1889           0 :         for (gi = 0; gi < /*cm*/pPcs->nb_cdef_strengths; gi++) {
    1890           0 :             uint64_t curr = mse[0][i][/*cm*/frm_hdr->CDEF_params.cdef_y_strength[gi]];
    1891           0 :             if (num_planes >= 3) curr += mse[1][i][/*cm*/frm_hdr->CDEF_params.cdef_uv_strength[gi]];
    1892           0 :             if (curr < best_mse) {
    1893           0 :                 best_gi = gi;
    1894           0 :                 best_mse = curr;
    1895             :             }
    1896             :         }
    1897           0 :         selected_strength[i] = best_gi;
    1898           0 :         selected_strength_cnt[best_gi]++;
    1899             : 
    1900             :         //CHKN cm->mi_grid_visible[sb_index[i]]->cdef_strength = best_gi;
    1901           0 :         picture_control_set_ptr->mi_grid_base[sb_index[i]]->mbmi.cdef_strength = (int8_t)best_gi;
    1902             :         //in case the fb is within a block=128x128 or 128x64, or 64x128, then we genrate param only for the first 64x64.
    1903             :         //since our mi map deos not have the multi pointer single data assignment, we need to duplicate data.
    1904           0 :         BlockSize sb_type = picture_control_set_ptr->mi_grid_base[sb_index[i]]->mbmi.block_mi.sb_type;
    1905             : 
    1906           0 :         if (sb_type == BLOCK_128X128)
    1907             :         {
    1908           0 :             picture_control_set_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->mbmi.cdef_strength = (int8_t)best_gi;
    1909           0 :             picture_control_set_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * picture_control_set_ptr->mi_stride]->mbmi.cdef_strength = (int8_t)best_gi;
    1910           0 :             picture_control_set_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * picture_control_set_ptr->mi_stride + MI_SIZE_64X64]->mbmi.cdef_strength = (int8_t)best_gi;
    1911             :         }
    1912           0 :         else if (sb_type == BLOCK_128X64)
    1913           0 :             picture_control_set_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->mbmi.cdef_strength = (int8_t)best_gi;
    1914           0 :         else if (sb_type == BLOCK_64X128)
    1915           0 :             picture_control_set_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * picture_control_set_ptr->mi_stride]->mbmi.cdef_strength = (int8_t)best_gi;
    1916             :     }
    1917             : 
    1918           0 :     if (fast) {
    1919           0 :         for (int32_t j = 0; j < nb_strengths; j++) {
    1920           0 :             frm_hdr->CDEF_params.cdef_y_strength[j] = priconv[frm_hdr->CDEF_params.cdef_y_strength[j] / CDEF_SEC_STRENGTHS] * CDEF_SEC_STRENGTHS + (frm_hdr->CDEF_params.cdef_y_strength[j] % CDEF_SEC_STRENGTHS);
    1921           0 :             frm_hdr->CDEF_params.cdef_uv_strength[j] = priconv[frm_hdr->CDEF_params.cdef_uv_strength[j] / CDEF_SEC_STRENGTHS] * CDEF_SEC_STRENGTHS + (frm_hdr->CDEF_params.cdef_uv_strength[j] % CDEF_SEC_STRENGTHS);
    1922             :         }
    1923             :     }
    1924             : 
    1925           0 :     for (int i = 0; i < total_strengths; i++)
    1926           0 :         best_frame_gi_cnt += selected_strength_cnt[i] > best_frame_gi_cnt ? 1 : 0;
    1927           0 :     pPcs->cdef_frame_strength = ((best_frame_gi_cnt + 4) / 4) * 4;
    1928             : 
    1929           0 :     frm_hdr->CDEF_params.cdef_damping = pri_damping;
    1930             :     //pPcs->cdef_pri_damping = pri_damping;
    1931             :     //pPcs->cdef_sec_damping = sec_damping;
    1932             : 
    1933           0 :     eb_aom_free(mse[0]);
    1934           0 :     eb_aom_free(mse[1]);
    1935           0 :     for (pli = 0; pli < num_planes; pli++) {
    1936           0 :         eb_aom_free(src[pli]);
    1937           0 :         eb_aom_free(ref_coeff[pli]);
    1938             :     }
    1939           0 :     eb_aom_free(sb_index);
    1940           0 :     eb_aom_free(selected_strength);
    1941           0 : }
    1942             : 
    1943           0 : void av1_cdef_search16bit(
    1944             :     EncDecContext                *context_ptr,
    1945             :     SequenceControlSet           *sequence_control_set_ptr,
    1946             :     PictureControlSet            *picture_control_set_ptr
    1947             :     //Yv12BufferConfig *frame,
    1948             :     //const Yv12BufferConfig *ref,
    1949             :     //Av1Common *cm,
    1950             :     //MacroBlockD *xd,
    1951             :     //int32_t fast
    1952             : )
    1953             : {
    1954             :     (void)context_ptr;
    1955           0 :     int32_t fast = 0;
    1956           0 :     struct PictureParentControlSet     *pPcs = picture_control_set_ptr->parent_pcs_ptr;
    1957           0 :     FrameHeader *frm_hdr = &pPcs->frm_hdr;
    1958           0 :     Av1Common*   cm = pPcs->av1_cm;
    1959           0 :     int32_t mi_rows = pPcs->av1_cm->mi_rows;
    1960           0 :     int32_t mi_cols = pPcs->av1_cm->mi_cols;
    1961             : 
    1962             :     EbPictureBufferDesc  * recon_picture_ptr;
    1963           0 :     if (pPcs->is_used_as_reference_flag == EB_TRUE)
    1964           0 :         recon_picture_ptr = ((EbReferenceObject*)picture_control_set_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture16bit;
    1965             :     else
    1966           0 :         recon_picture_ptr = picture_control_set_ptr->recon_picture16bit_ptr;
    1967             : 
    1968           0 :     uint16_t*  reconBufferY = (uint16_t*)recon_picture_ptr->buffer_y + (recon_picture_ptr->origin_x + recon_picture_ptr->origin_y     * recon_picture_ptr->stride_y);
    1969           0 :     uint16_t*  reconBufferCb = (uint16_t*)recon_picture_ptr->buffer_cb + (recon_picture_ptr->origin_x / 2 + recon_picture_ptr->origin_y / 2 * recon_picture_ptr->stride_cb);
    1970           0 :     uint16_t*  reconBufferCr = (uint16_t*)recon_picture_ptr->buffer_cr + (recon_picture_ptr->origin_x / 2 + recon_picture_ptr->origin_y / 2 * recon_picture_ptr->stride_cr);
    1971             : 
    1972           0 :     EbPictureBufferDesc *input_picture_ptr = picture_control_set_ptr->input_frame16bit;
    1973           0 :     uint16_t*  inputBufferY = (uint16_t*)input_picture_ptr->buffer_y + (input_picture_ptr->origin_x + input_picture_ptr->origin_y * input_picture_ptr->stride_y);
    1974           0 :     uint16_t*  inputBufferCb = (uint16_t*)input_picture_ptr->buffer_cb + (input_picture_ptr->origin_x / 2 + input_picture_ptr->origin_y / 2 * input_picture_ptr->stride_cb);
    1975           0 :     uint16_t*  inputBufferCr = (uint16_t*)input_picture_ptr->buffer_cr + (input_picture_ptr->origin_x / 2 + input_picture_ptr->origin_y / 2 * input_picture_ptr->stride_cr);
    1976             : 
    1977             :     int32_t r, c;
    1978             :     int32_t fbr, fbc;
    1979             :     uint16_t *src[3];
    1980             :     uint16_t *ref_coeff[3];
    1981             :     /*static*/ cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
    1982           0 :     int32_t dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
    1983           0 :     int32_t var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
    1984             :     int32_t stride[3];
    1985             :     int32_t bsize[3];
    1986             :     int32_t mi_wide_l2[3];
    1987             :     int32_t mi_high_l2[3];
    1988             :     int32_t xdec[3];
    1989             :     int32_t ydec[3];
    1990             :     int32_t pli;
    1991             :     int32_t cdef_count;
    1992             : 
    1993             :     //CHKN int32_t coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
    1994           0 :     int32_t coeff_shift = AOMMAX(sequence_control_set_ptr->static_config.encoder_bit_depth - 8, 0);
    1995             : 
    1996           0 :     uint64_t best_tot_mse = (uint64_t)1 << 63;
    1997             :     uint64_t tot_mse;
    1998             :     int32_t sb_count;
    1999             : 
    2000           0 :     int32_t nvfb = (mi_rows /*cm->mi_rows*/ + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
    2001           0 :     int32_t nhfb = (mi_cols/*cm->mi_cols*/ + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
    2002             : 
    2003           0 :     int32_t *sb_index = (int32_t *)eb_aom_malloc(nvfb * nhfb * sizeof(*sb_index));       //CHKN add cast
    2004           0 :     int32_t *selected_strength = (int32_t *)eb_aom_malloc(nvfb * nhfb * sizeof(*sb_index));
    2005             : 
    2006           0 :     assert(sb_index);
    2007           0 :     assert(selected_strength);
    2008             : 
    2009             :     uint64_t(*mse[2])[TOTAL_STRENGTHS];
    2010             : 
    2011           0 :     int32_t pri_damping = 3 + (frm_hdr->quantization_params.base_q_idx /*cm->quant_param.base_q_idx*/ >> 6);
    2012           0 :     int32_t sec_damping = 3 + (frm_hdr->quantization_params.base_q_idx /*cm->quant_param.base_q_idx*/ >> 6);
    2013             :     int32_t i;
    2014             :     int32_t nb_strengths;
    2015             :     int32_t nb_strength_bits;
    2016             :     int32_t quantizer;
    2017             :     double lambda;
    2018           0 :     const int32_t num_planes = 3;// av1_num_planes(cm);
    2019           0 :     const int32_t total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
    2020             :     DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
    2021             :     uint16_t *in;
    2022             :     DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
    2023             : 
    2024           0 :     int32_t selected_strength_cnt[TOTAL_STRENGTHS] = { 0 };
    2025           0 :     int32_t best_frame_gi_cnt = 0;
    2026           0 :     int32_t gi_step = get_cdef_gi_step(pPcs->cdef_filter_mode);
    2027           0 :     int32_t mid_gi = pPcs->cdf_ref_frame_strenght;
    2028           0 :     int32_t start_gi = pPcs->use_ref_frame_cdef_strength && pPcs->cdef_filter_mode == 1 ? (AOMMAX(0, mid_gi - gi_step)) : 0;
    2029           0 :     int32_t end_gi = pPcs->use_ref_frame_cdef_strength ? AOMMIN(total_strengths, mid_gi + gi_step) : pPcs->cdef_filter_mode == 1 ? 8 : total_strengths;
    2030             : 
    2031           0 :     quantizer =
    2032             :         //CHKN av1_ac_quant_Q3(cm->quant_param.base_q_idx, 0, cm->bit_depth) >> (cm->bit_depth - 8);
    2033           0 :         eb_av1_ac_quant_Q3(frm_hdr->quantization_params.base_q_idx, 0, (AomBitDepth)sequence_control_set_ptr->static_config.encoder_bit_depth) >> (sequence_control_set_ptr->static_config.encoder_bit_depth - 8);
    2034           0 :     lambda = .12 * quantizer * quantizer / 256.;
    2035             : 
    2036             :     //eb_av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0,    num_planes);
    2037             : 
    2038           0 :     mse[0] = (uint64_t(*)[64])eb_aom_malloc(sizeof(**mse) * nvfb * nhfb);
    2039           0 :     mse[1] = (uint64_t(*)[64])eb_aom_malloc(sizeof(**mse) * nvfb * nhfb);
    2040             : 
    2041           0 :     for (pli = 0; pli < num_planes; pli++) {
    2042           0 :         uint16_t *in_buffer = 0;
    2043           0 :         int32_t in_stride = 0;
    2044             : 
    2045           0 :         uint16_t *ref_buffer = 0;
    2046           0 :         int32_t ref_stride = 0;
    2047           0 :         switch (pli) {
    2048           0 :         case 0:
    2049           0 :             ref_buffer = inputBufferY;
    2050           0 :             ref_stride = input_picture_ptr->stride_y;
    2051           0 :             in_buffer = reconBufferY;
    2052           0 :             in_stride = recon_picture_ptr->stride_y;
    2053           0 :             break;
    2054           0 :         case 1:
    2055           0 :             ref_buffer = inputBufferCb;
    2056           0 :             ref_stride = input_picture_ptr->stride_cb;
    2057           0 :             in_buffer = reconBufferCb;
    2058           0 :             in_stride = recon_picture_ptr->stride_cb;
    2059           0 :             break;
    2060           0 :         case 2:
    2061           0 :             ref_buffer = inputBufferCr;
    2062           0 :             ref_stride = input_picture_ptr->stride_cr;
    2063           0 :             in_buffer = reconBufferCr;
    2064           0 :             in_stride = recon_picture_ptr->stride_cr;
    2065           0 :             break;
    2066             :         }
    2067             : 
    2068             :         ///CHKN: allocate one frame 16bit for src and recon!!
    2069           0 :         src[pli] = (uint16_t*)eb_aom_memalign(32, sizeof(*src)       * mi_rows * mi_cols * MI_SIZE * MI_SIZE);
    2070           0 :         ref_coeff[pli] = (uint16_t*)eb_aom_memalign(32, sizeof(*ref_coeff) * mi_rows * mi_cols * MI_SIZE * MI_SIZE);
    2071             : 
    2072           0 :         int32_t subsampling_x = (pli == 0) ? 0 : 1;
    2073           0 :         int32_t subsampling_y = (pli == 0) ? 0 : 1;
    2074             : 
    2075           0 :         xdec[pli] = subsampling_x; //CHKN  xd->plane[pli].subsampling_x;
    2076           0 :         ydec[pli] = subsampling_y; //CHKN  xd->plane[pli].subsampling_y;
    2077           0 :         bsize[pli] = ydec[pli] ? (xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
    2078           0 :             : (xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
    2079             : 
    2080           0 :         stride[pli] = cm->mi_cols << MI_SIZE_LOG2;
    2081           0 :         mi_wide_l2[pli] = MI_SIZE_LOG2 - subsampling_x;  //CHKN MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
    2082           0 :         mi_high_l2[pli] = MI_SIZE_LOG2 - subsampling_y;  //CHKN MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
    2083             : 
    2084           0 :         const int32_t frame_height = (cm->mi_rows * MI_SIZE) >> subsampling_y;//CHKN  xd->plane[pli].subsampling_y;
    2085           0 :         const int32_t frame_width = (cm->mi_cols * MI_SIZE) >> subsampling_x;//CHKN  xd->plane[pli].subsampling_x;
    2086             : 
    2087           0 :         for (r = 0; r < frame_height; ++r) {
    2088           0 :             for (c = 0; c < frame_width; ++c) {
    2089             :                 //if (cm->use_highbitdepth) {
    2090             :                 //    src[pli][r * stride[pli] + c] = CONVERT_TO_SHORTPTR(
    2091             :                 //        xd->plane[pli].dst.buf)[r * xd->plane[pli].dst.stride + c];
    2092             :                 //    ref_coeff[pli][r * stride[pli] + c] =
    2093             :                 //        CONVERT_TO_SHORTPTR(ref_buffer)[r * ref_stride + c];
    2094             :                 //}
    2095             :                 //else
    2096             :                 {
    2097           0 :                     src[pli][r * stride[pli] + c] = in_buffer[r * in_stride + c];//CHKN xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c];
    2098           0 :                     ref_coeff[pli][r * stride[pli] + c] = ref_buffer[r * ref_stride + c];
    2099             :                 }
    2100             :             }
    2101             :         }
    2102             :     }
    2103             : 
    2104           0 :     in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
    2105           0 :     sb_count = 0;
    2106           0 :     for (fbr = 0; fbr < nvfb; ++fbr) {
    2107           0 :         for (fbc = 0; fbc < nhfb; ++fbc) {
    2108             :             int32_t nvb, nhb;
    2109             :             int32_t gi;
    2110           0 :             int32_t dirinit = 0;
    2111           0 :             nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
    2112           0 :             nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
    2113           0 :             int32_t hb_step = 1; //CHKN these should be all time with 64x64 LCUs
    2114           0 :             int32_t vb_step = 1;
    2115           0 :             BlockSize bs = BLOCK_64X64;
    2116           0 :             ModeInfo **mi = picture_control_set_ptr->mi_grid_base + MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc;
    2117           0 :             const MbModeInfo *mbmi = &mi[0]->mbmi;
    2118             : 
    2119             :             //MbModeInfo *const mbmi =
    2120             :             //    cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
    2121             :             //    MI_SIZE_64X64 * fbc];
    2122             : 
    2123           0 :             if (((fbc & 1) &&
    2124           0 :                 (mbmi->block_mi.sb_type == BLOCK_128X128 || mbmi->block_mi.sb_type == BLOCK_128X64)) ||
    2125           0 :                 ((fbr & 1) &&
    2126           0 :                 (mbmi->block_mi.sb_type == BLOCK_128X128 || mbmi->block_mi.sb_type == BLOCK_64X128)))
    2127           0 :                 continue;
    2128           0 :             if (mbmi->block_mi.sb_type == BLOCK_128X128 || mbmi->block_mi.sb_type == BLOCK_128X64 ||
    2129           0 :                 mbmi->block_mi.sb_type == BLOCK_64X128)
    2130           0 :                 bs = mbmi->block_mi.sb_type;
    2131           0 :             if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
    2132           0 :                 nhb = AOMMIN(MI_SIZE_128X128, cm->mi_cols - MI_SIZE_64X64 * fbc);
    2133           0 :                 hb_step = 2;
    2134             :             }
    2135           0 :             if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
    2136           0 :                 nvb = AOMMIN(MI_SIZE_128X128, cm->mi_rows - MI_SIZE_64X64 * fbr);
    2137           0 :                 vb_step = 2;
    2138             :             }
    2139             : 
    2140             :             // No filtering if the entire filter block is skipped
    2141           0 :             if (eb_sb_all_skip(picture_control_set_ptr, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64))
    2142           0 :                 continue;
    2143             : 
    2144           0 :             cdef_count = eb_sb_compute_cdef_list(picture_control_set_ptr, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs);
    2145             : 
    2146           0 :             for (pli = 0; pli < num_planes; pli++) {
    2147           0 :                 for (i = 0; i < CDEF_INBUF_SIZE; i++)
    2148           0 :                     inbuf[i] = CDEF_VERY_LARGE;
    2149           0 :                 for (gi = start_gi; gi < end_gi; gi++) {
    2150             :                     int32_t threshold;
    2151             :                     uint64_t curr_mse;
    2152             :                     int32_t sec_strength;
    2153           0 :                     threshold = gi / CDEF_SEC_STRENGTHS;
    2154           0 :                     if (fast) threshold = priconv[threshold];
    2155             :                     /* We avoid filtering the pixels for which some of the pixels to
    2156             :                     average are outside the frame. We could change the filter instead, but it would add special cases for any future vectorization. */
    2157           0 :                     int32_t yoff = CDEF_VBORDER * (fbr != 0);
    2158           0 :                     int32_t xoff = CDEF_HBORDER * (fbc != 0);
    2159           0 :                     int32_t ysize = (nvb << mi_high_l2[pli]) + CDEF_VBORDER * (fbr + vb_step < nvfb) + yoff;
    2160           0 :                     int32_t xsize = (nhb << mi_wide_l2[pli]) + CDEF_HBORDER * (fbc + hb_step < nhfb) + xoff;
    2161           0 :                     sec_strength = gi % CDEF_SEC_STRENGTHS;
    2162             : 
    2163           0 :                     copy_sb16_16(
    2164           0 :                         &in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
    2165           0 :                         src[pli],
    2166           0 :                         (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
    2167           0 :                         (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
    2168             :                         stride[pli], ysize, xsize);
    2169             : 
    2170           0 :                     eb_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in, xdec[pli], ydec[pli],
    2171             :                         dir, &dirinit, var, pli, dlist, cdef_count, threshold,
    2172           0 :                         sec_strength + (sec_strength == 3), pri_damping,
    2173             :                         sec_damping, coeff_shift);
    2174             : 
    2175           0 :                     curr_mse = eb_compute_cdef_dist(
    2176           0 :                         ref_coeff[pli] +
    2177           0 :                         (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] +
    2178           0 :                         (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]),
    2179           0 :                         stride[pli], tmp_dst, dlist, cdef_count, (BlockSize)bsize[pli], coeff_shift,
    2180             :                         pli);
    2181             : 
    2182           0 :                     if (pli < 2)
    2183           0 :                         mse[pli][sb_count][gi] = curr_mse;
    2184             :                     else
    2185           0 :                         mse[1][sb_count][gi] += curr_mse;
    2186             : 
    2187           0 :                     sb_index[sb_count] = MI_SIZE_64X64 * fbr * picture_control_set_ptr->mi_stride + MI_SIZE_64X64 * fbc;//CHKN
    2188             :                 }
    2189             :             }
    2190           0 :             sb_count++;
    2191             :         }
    2192             :     }
    2193             : 
    2194           0 :     nb_strength_bits = 0;
    2195             :     /* Search for different number of signalling bits. */
    2196           0 :     for (i = 0; i <= 3; i++) {
    2197             :         int32_t j;
    2198             :         int32_t best_lev0[CDEF_MAX_STRENGTHS];
    2199           0 :         int32_t best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
    2200           0 :         nb_strengths = 1 << i;
    2201           0 :         if (num_planes >= 3)
    2202           0 :             tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths, mse, sb_count, fast, start_gi, end_gi);
    2203             :         else
    2204           0 :             tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count, fast, start_gi, end_gi);
    2205             :         /* Count superblock signalling cost. */
    2206           0 :         tot_mse += (uint64_t)(sb_count * lambda * i);
    2207             :         /* Count header signalling cost. */
    2208           0 :         tot_mse += (uint64_t)(nb_strengths * lambda * CDEF_STRENGTH_BITS);
    2209           0 :         if (tot_mse < best_tot_mse) {
    2210           0 :             best_tot_mse = tot_mse;
    2211           0 :             nb_strength_bits = i;
    2212           0 :             for (j = 0; j < 1 << nb_strength_bits; j++) {
    2213           0 :                 frm_hdr->CDEF_params.cdef_y_strength[j] = best_lev0[j];
    2214           0 :                 frm_hdr->CDEF_params.cdef_uv_strength[j] = best_lev1[j];
    2215             :             }
    2216             :         }
    2217             :     }
    2218           0 :     nb_strengths = 1 << nb_strength_bits;
    2219             : 
    2220           0 :     /*cm*/frm_hdr->CDEF_params.cdef_bits = nb_strength_bits;
    2221           0 :     /*cm*/pPcs->nb_cdef_strengths = nb_strengths;
    2222           0 :     for (i = 0; i < sb_count; i++) {
    2223             :         int32_t gi;
    2224             :         int32_t best_gi;
    2225           0 :         uint64_t best_mse = (uint64_t)1 << 63;
    2226           0 :         best_gi = 0;
    2227           0 :         for (gi = 0; gi < /*cm*/pPcs->nb_cdef_strengths; gi++) {
    2228           0 :             uint64_t curr = mse[0][i][/*cm*/frm_hdr->CDEF_params.cdef_y_strength[gi]];
    2229           0 :             if (num_planes >= 3) curr += mse[1][i][/*cm*/frm_hdr->CDEF_params.cdef_uv_strength[gi]];
    2230           0 :             if (curr < best_mse) {
    2231           0 :                 best_gi = gi;
    2232           0 :                 best_mse = curr;
    2233             :             }
    2234             :         }
    2235           0 :         selected_strength[i] = best_gi;
    2236           0 :         selected_strength_cnt[best_gi]++;
    2237             :         //CHKN cm->mi_grid_visible[sb_index[i]]->cdef_strength = best_gi;
    2238           0 :         picture_control_set_ptr->mi_grid_base[sb_index[i]]->mbmi.cdef_strength = (int8_t)best_gi;
    2239             :         //in case the fb is within a block=128x128 or 128x64, or 64x128, then we genrate param only for the first 64x64.
    2240             :         //since our mi map deos not have the multi pointer single data assignment, we need to duplicate data.
    2241           0 :         BlockSize sb_type = picture_control_set_ptr->mi_grid_base[sb_index[i]]->mbmi.block_mi.sb_type;
    2242             : 
    2243           0 :         if (sb_type == BLOCK_128X128)
    2244             :         {
    2245           0 :             picture_control_set_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->mbmi.cdef_strength = (int8_t)best_gi;
    2246           0 :             picture_control_set_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * picture_control_set_ptr->mi_stride]->mbmi.cdef_strength = (int8_t)best_gi;
    2247           0 :             picture_control_set_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * picture_control_set_ptr->mi_stride + MI_SIZE_64X64]->mbmi.cdef_strength = (int8_t)best_gi;
    2248             :         }
    2249           0 :         else if (sb_type == BLOCK_128X64)
    2250           0 :             picture_control_set_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->mbmi.cdef_strength = (int8_t)best_gi;
    2251           0 :         else if (sb_type == BLOCK_64X128)
    2252           0 :             picture_control_set_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * picture_control_set_ptr->mi_stride]->mbmi.cdef_strength = (int8_t)best_gi;
    2253             :         //ModeInfo *miPtr = *(picture_control_set_ptr->mi_grid_base + sb_index[i]);
    2254             :         //uint8_t  miX, miY;
    2255             :         //for (miY = 0; miY < (block_size_high[sb_type] >> MI_SIZE_LOG2); miY++) {
    2256             :         //    for (miX = 0; miX < (block_size_wide[sb_type] >> MI_SIZE_LOG2); miX++) {
    2257             :         //        miPtr[miX + miY * picture_control_set_ptr->mi_stride].mbmi.cdef_strength = (int8_t)best_gi;
    2258             :         //    }
    2259             :         //}
    2260             :     }
    2261             : 
    2262           0 :     if (fast) {
    2263           0 :         for (int32_t j = 0; j < nb_strengths; j++) {
    2264           0 :             frm_hdr->CDEF_params.cdef_y_strength[j] = priconv[frm_hdr->CDEF_params.cdef_y_strength[j] / CDEF_SEC_STRENGTHS] * CDEF_SEC_STRENGTHS + (frm_hdr->CDEF_params.cdef_y_strength[j] % CDEF_SEC_STRENGTHS);
    2265           0 :             frm_hdr->CDEF_params.cdef_uv_strength[j] = priconv[frm_hdr->CDEF_params.cdef_uv_strength[j] / CDEF_SEC_STRENGTHS] * CDEF_SEC_STRENGTHS + (frm_hdr->CDEF_params.cdef_uv_strength[j] % CDEF_SEC_STRENGTHS);
    2266             :         }
    2267             :     }
    2268           0 :     frm_hdr->CDEF_params.cdef_damping = pri_damping;
    2269             :     //pPcs->cdef_pri_damping = pri_damping;
    2270             :     //pPcs->cdef_sec_damping = sec_damping;
    2271             : 
    2272           0 :     for (int i = 0; i < total_strengths; i++)
    2273           0 :         best_frame_gi_cnt += selected_strength_cnt[i] > best_frame_gi_cnt ? 1 : 0;
    2274           0 :     pPcs->cdef_frame_strength = ((best_frame_gi_cnt + 4) / 4) * 4;
    2275             : 
    2276           0 :     eb_aom_free(mse[0]);
    2277           0 :     eb_aom_free(mse[1]);
    2278           0 :     for (pli = 0; pli < num_planes; pli++) {
    2279           0 :         eb_aom_free(src[pli]);
    2280           0 :         eb_aom_free(ref_coeff[pli]);
    2281             :     }
    2282           0 :     eb_aom_free(sb_index);
    2283           0 :     eb_aom_free(selected_strength);
    2284           0 : }

Generated by: LCOV version 1.14