LCOV - coverage.info - Codec/convolve.c

LCOV - code coverage report

Current view:	top level - Codec - convolve.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	0	156	0.0 %
Date:	2019-11-25 17:38:06	Functions:	0	16	0.0 %

          Line data    Source code

       1             : /*
       2             :  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #include <assert.h>
      13             : #include "EbDefinitions.h"
      14             : #include "aom_dsp_rtcd.h"
      15             : #include "convolve.h"
      16             : 
      17             :  // Note: Fixed size intermediate buffers, place limits on parameters
      18             :  // of some functions. 2d filtering proceeds in 2 steps:
      19             :  //   (1) Interpolate horizontally into an intermediate buffer, temp.
      20             :  //   (2) Interpolate temp vertically to derive the sub-pixel result.
      21             :  // Deriving the maximum number of rows in the temp buffer (135):
      22             :  // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
      23             :  // --Largest block size is 128x128 pixels.
      24             :  // --128 rows in the downscaled frame span a distance of (128 - 1) * 32 in the
      25             :  //   original frame (in 1/16th pixel units).
      26             :  // --Must round-up because block may be located at sub-pixel position.
      27             :  // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
      28             :  // --((128 - 1) * 32 + 15) >> 4 + 8 = 263.
      29             : #define WIENER_MAX_EXT_SIZE 263
      30             : 
      31           0 : static INLINE int32_t horz_scalar_product(const uint8_t *a, const int16_t *b) {
      32           0 :     int32_t sum = 0;
      33           0 :     for (int32_t k = 0; k < SUBPEL_TAPS; ++k) sum += a[k] * b[k];
      34           0 :     return sum;
      35             : }
      36             : 
      37           0 : static INLINE int32_t highbd_horz_scalar_product(const uint16_t *a,
      38             :     const int16_t *b) {
      39           0 :     int32_t sum = 0;
      40           0 :     for (int32_t k = 0; k < SUBPEL_TAPS; ++k) sum += a[k] * b[k];
      41           0 :     return sum;
      42             : }
      43             : 
      44           0 : static INLINE int32_t highbd_vert_scalar_product(const uint16_t *a,
      45             :     ptrdiff_t a_stride,
      46             :     const int16_t *b) {
      47           0 :     int32_t sum = 0;
      48           0 :     for (int32_t k = 0; k < SUBPEL_TAPS; ++k) sum += a[k * a_stride] * b[k];
      49           0 :     return sum;
      50             : }
      51             : 
      52           0 : static const InterpKernel *get_filter_base(const int16_t *filter) {
      53             :     // NOTE: This assumes that the filter table is 256-byte aligned.
      54             :     // TODO(agrange) Modify to make independent of table alignment.
      55           0 :     return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
      56             : }
      57             : 
      58           0 : static int32_t get_filter_offset(const int16_t *f, const InterpKernel *base) {
      59           0 :     return (int32_t)((const InterpKernel *)(intptr_t)f - base);
      60             : }
      61             : 
      62           0 : static void convolve_add_src_horiz_hip(const uint8_t *src, ptrdiff_t src_stride,
      63             :     uint16_t *dst, ptrdiff_t dst_stride,
      64             :     const InterpKernel *x_filters, int32_t x0_q4,
      65             :     int32_t x_step_q4, int32_t w, int32_t h,
      66             :     int32_t round0_bits) {
      67           0 :     const int32_t bd = 8;
      68           0 :     src -= SUBPEL_TAPS / 2 - 1;
      69           0 :     for (int32_t y = 0; y < h; ++y) {
      70           0 :         int32_t x_q4 = x0_q4;
      71           0 :         for (int32_t x = 0; x < w; ++x) {
      72           0 :             const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
      73           0 :             const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
      74           0 :             const int32_t rounding = ((int32_t)src_x[SUBPEL_TAPS / 2 - 1] << FILTER_BITS) +
      75           0 :                 (1 << (bd + FILTER_BITS - 1));
      76           0 :             const int32_t sum = horz_scalar_product(src_x, x_filter) + rounding;
      77           0 :             dst[x] = (uint16_t)clamp(ROUND_POWER_OF_TWO(sum, round0_bits), 0,
      78           0 :                 WIENER_CLAMP_LIMIT(round0_bits, bd) - 1);
      79           0 :             x_q4 += x_step_q4;
      80             :         }
      81           0 :         src += src_stride;
      82           0 :         dst += dst_stride;
      83             :     }
      84           0 : }
      85             : 
      86           0 : static void convolve_add_src_vert_hip(const uint16_t *src, ptrdiff_t src_stride,
      87             :     uint8_t *dst, ptrdiff_t dst_stride,
      88             :     const InterpKernel *y_filters, int32_t y0_q4,
      89             :     int32_t y_step_q4, int32_t w, int32_t h,
      90             :     int32_t round1_bits) {
      91           0 :     const int32_t bd = 8;
      92           0 :     src -= src_stride * (SUBPEL_TAPS / 2 - 1);
      93             : 
      94           0 :     for (int32_t x = 0; x < w; ++x) {
      95           0 :         int32_t y_q4 = y0_q4;
      96           0 :         for (int32_t y = 0; y < h; ++y) {
      97           0 :             const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
      98           0 :             const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
      99           0 :             const int32_t rounding =
     100           0 :                 ((int32_t)src_y[(SUBPEL_TAPS / 2 - 1) * src_stride] << FILTER_BITS) -
     101           0 :                 (1 << (bd + round1_bits - 1));
     102           0 :             const int32_t sum =
     103           0 :                 highbd_vert_scalar_product(src_y, src_stride, y_filter) + rounding;
     104           0 :             dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, round1_bits));
     105           0 :             y_q4 += y_step_q4;
     106             :         }
     107           0 :         ++src;
     108           0 :         ++dst;
     109             :     }
     110           0 : }
     111             : 
     112           0 : void eb_av1_wiener_convolve_add_src_c(const uint8_t *src, ptrdiff_t src_stride,
     113             :     uint8_t *dst, ptrdiff_t dst_stride,
     114             :     const int16_t *filter_x, int32_t x_step_q4,
     115             :     const int16_t *filter_y, int32_t y_step_q4,
     116             :     int32_t w, int32_t h,
     117             :     const ConvolveParams *conv_params) {
     118           0 :     const InterpKernel *const filters_x = get_filter_base(filter_x);
     119           0 :     const int32_t x0_q4 = get_filter_offset(filter_x, filters_x);
     120             : 
     121           0 :     const InterpKernel *const filters_y = get_filter_base(filter_y);
     122           0 :     const int32_t y0_q4 = get_filter_offset(filter_y, filters_y);
     123             : 
     124             :     uint16_t temp[WIENER_MAX_EXT_SIZE * MAX_SB_SIZE];
     125           0 :     const int32_t intermediate_height =
     126           0 :         (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
     127             : 
     128           0 :     assert(w <= MAX_SB_SIZE);
     129           0 :     assert(h <= MAX_SB_SIZE);
     130           0 :     assert(y_step_q4 <= 32);
     131           0 :     assert(x_step_q4 <= 32);
     132             : 
     133           0 :     convolve_add_src_horiz_hip(src - src_stride * (SUBPEL_TAPS / 2 - 1),
     134             :         src_stride, temp, MAX_SB_SIZE, filters_x, x0_q4,
     135             :         x_step_q4, w, intermediate_height,
     136             :         conv_params->round_0);
     137           0 :     convolve_add_src_vert_hip(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),
     138             :         MAX_SB_SIZE, dst, dst_stride, filters_y, y0_q4,
     139             :         y_step_q4, w, h, conv_params->round_1);
     140           0 : }
     141             : 
     142           0 : static void highbd_convolve_add_src_horiz_hip(
     143             :     const uint8_t *src8, ptrdiff_t src_stride, uint16_t *dst,
     144             :     ptrdiff_t dst_stride, const InterpKernel *x_filters, int32_t x0_q4,
     145             :     int32_t x_step_q4, int32_t w, int32_t h, int32_t round0_bits, int32_t bd) {
     146           0 :     const int32_t extraprec_clamp_limit = WIENER_CLAMP_LIMIT(round0_bits, bd);
     147           0 :     uint16_t *src = CONVERT_TO_SHORTPTR(src8);
     148           0 :     src -= SUBPEL_TAPS / 2 - 1;
     149           0 :     for (int32_t y = 0; y < h; ++y) {
     150           0 :         int32_t x_q4 = x0_q4;
     151           0 :         for (int32_t x = 0; x < w; ++x) {
     152           0 :             const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
     153           0 :             const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
     154           0 :             const int32_t rounding = ((int32_t)src_x[SUBPEL_TAPS / 2 - 1] << FILTER_BITS) +
     155           0 :                 (1 << (bd + FILTER_BITS - 1));
     156           0 :             const int32_t sum = highbd_horz_scalar_product(src_x, x_filter) + rounding;
     157           0 :             dst[x] = (uint16_t)clamp(ROUND_POWER_OF_TWO(sum, round0_bits), 0,
     158             :                 extraprec_clamp_limit - 1);
     159           0 :             x_q4 += x_step_q4;
     160             :         }
     161           0 :         src += src_stride;
     162           0 :         dst += dst_stride;
     163             :     }
     164           0 : }
     165             : 
     166           0 : static void highbd_convolve_add_src_vert_hip(
     167             :     const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst8,
     168             :     ptrdiff_t dst_stride, const InterpKernel *y_filters, int32_t y0_q4,
     169             :     int32_t y_step_q4, int32_t w, int32_t h, int32_t round1_bits, int32_t bd) {
     170           0 :     uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
     171           0 :     src -= src_stride * (SUBPEL_TAPS / 2 - 1);
     172           0 :     for (int32_t x = 0; x < w; ++x) {
     173           0 :         int32_t y_q4 = y0_q4;
     174           0 :         for (int32_t y = 0; y < h; ++y) {
     175           0 :             const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
     176           0 :             const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
     177           0 :             const int32_t rounding =
     178           0 :                 ((int32_t)src_y[(SUBPEL_TAPS / 2 - 1) * src_stride] << FILTER_BITS) -
     179           0 :                 (1 << (bd + round1_bits - 1));
     180           0 :             const int32_t sum =
     181           0 :                 highbd_vert_scalar_product(src_y, src_stride, y_filter) + rounding;
     182           0 :             dst[y * dst_stride] =
     183           0 :                 clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, round1_bits), bd);
     184           0 :             y_q4 += y_step_q4;
     185             :         }
     186           0 :         ++src;
     187           0 :         ++dst;
     188             :     }
     189           0 : }
     190             : 
     191           0 : void eb_av1_highbd_wiener_convolve_add_src_c(
     192             :     const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
     193             :     ptrdiff_t dst_stride, const int16_t *filter_x, int32_t x_step_q4,
     194             :     const int16_t *filter_y, int32_t y_step_q4, int32_t w, int32_t h,
     195             :     const ConvolveParams *conv_params, int32_t bd) {
     196           0 :     const InterpKernel *const filters_x = get_filter_base(filter_x);
     197           0 :     const int32_t x0_q4 = get_filter_offset(filter_x, filters_x);
     198             : 
     199           0 :     const InterpKernel *const filters_y = get_filter_base(filter_y);
     200           0 :     const int32_t y0_q4 = get_filter_offset(filter_y, filters_y);
     201             : 
     202             :     uint16_t temp[WIENER_MAX_EXT_SIZE * MAX_SB_SIZE];
     203           0 :     const int32_t intermediate_height =
     204           0 :         (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
     205             : 
     206           0 :     assert(w <= MAX_SB_SIZE);
     207           0 :     assert(h <= MAX_SB_SIZE);
     208           0 :     assert(y_step_q4 <= 32);
     209           0 :     assert(x_step_q4 <= 32);
     210           0 :     assert(bd + FILTER_BITS - conv_params->round_0 + 2 <= 16);
     211             : 
     212           0 :     highbd_convolve_add_src_horiz_hip(src - src_stride * (SUBPEL_TAPS / 2 - 1),
     213             :         src_stride, temp, MAX_SB_SIZE, filters_x,
     214             :         x0_q4, x_step_q4, w, intermediate_height,
     215             :         conv_params->round_0, bd);
     216           0 :     highbd_convolve_add_src_vert_hip(
     217             :         temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE, dst, dst_stride,
     218             :         filters_y, y0_q4, y_step_q4, w, h, conv_params->round_1, bd);
     219           0 : }
     220             : #if OBMC_FLAG
     221             : 
     222           0 : static INLINE int vert_scalar_product(const uint8_t *a, ptrdiff_t a_stride,
     223             :                                       const int16_t *b) {
     224           0 :   int sum = 0;
     225           0 :   for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k * a_stride] * b[k];
     226           0 :   return sum;
     227             : }
     228             : 
     229           0 : static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
     230             :                            uint8_t *dst, ptrdiff_t dst_stride,
     231             :                            const InterpKernel *x_filters, int x0_q4,
     232             :                            int x_step_q4, int w, int h) {
     233           0 :   src -= SUBPEL_TAPS / 2 - 1;
     234           0 :   for (int y = 0; y < h; ++y) {
     235           0 :     int x_q4 = x0_q4;
     236           0 :     for (int x = 0; x < w; ++x) {
     237           0 :       const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
     238           0 :       const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
     239           0 :       const int sum = horz_scalar_product(src_x, x_filter);
     240           0 :       dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
     241           0 :       x_q4 += x_step_q4;
     242             :     }
     243           0 :     src += src_stride;
     244           0 :     dst += dst_stride;
     245             :   }
     246           0 : }
     247             : 
     248           0 : static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
     249             :                           uint8_t *dst, ptrdiff_t dst_stride,
     250             :                           const InterpKernel *y_filters, int y0_q4,
     251             :                           int y_step_q4, int w, int h) {
     252           0 :   src -= src_stride * (SUBPEL_TAPS / 2 - 1);
     253             : 
     254           0 :   for (int x = 0; x < w; ++x) {
     255           0 :     int y_q4 = y0_q4;
     256           0 :     for (int y = 0; y < h; ++y) {
     257           0 :       const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
     258           0 :       const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
     259           0 :       const int sum = vert_scalar_product(src_y, src_stride, y_filter);
     260           0 :       dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
     261           0 :       y_q4 += y_step_q4;
     262             :     }
     263           0 :     ++src;
     264           0 :     ++dst;
     265             :   }
     266           0 : }
     267             : 
     268           0 : void aom_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
     269             :                            uint8_t *dst, ptrdiff_t dst_stride,
     270             :                            const int16_t *filter_x, int x_step_q4,
     271             :                            const int16_t *filter_y, int y_step_q4, int w,
     272             :                            int h) {
     273           0 :   const InterpKernel *const filters_x = get_filter_base(filter_x);
     274           0 :   const int x0_q4 = get_filter_offset(filter_x, filters_x);
     275             : 
     276             :   (void)filter_y;
     277             :   (void)y_step_q4;
     278             : 
     279           0 :   convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
     280             :                  w, h);
     281           0 : }
     282             : 
     283           0 : void aom_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
     284             :                           uint8_t *dst, ptrdiff_t dst_stride,
     285             :                           const int16_t *filter_x, int x_step_q4,
     286             :                           const int16_t *filter_y, int y_step_q4, int w,
     287             :                           int h) {
     288           0 :   const InterpKernel *const filters_y = get_filter_base(filter_y);
     289           0 :   const int y0_q4 = get_filter_offset(filter_y, filters_y);
     290             : 
     291             :   (void)filter_x;
     292             :   (void)x_step_q4;
     293             : 
     294           0 :   convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4,
     295             :                 w, h);
     296           0 : }
     297             :     static INLINE const int16_t *av1_get_interp_filter_subpel_kernel(
     298             :         const InterpFilterParams filter_params, const int32_t subpel) ;
     299             : 
     300             : 
     301             : #endif

Generated by: LCOV version 1.14