LCOV - coverage.info - Codec/fft.c

LCOV - code coverage report

Current view:	top level - Codec - fft.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	0	117	0.0 %
Date:	2019-11-25 17:38:06	Functions:	0	28	0.0 %

          Line data    Source code

       1             : /*
       2             :  * Copyright (c) 2018, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #include "EbDefinitions.h"
      13             : #include "fft_common.h"
      14             : 
      15           0 : static INLINE void simple_transpose(const float *A, float *B, int32_t n) {
      16           0 :     for (int32_t y = 0; y < n; y++) {
      17           0 :         for (int32_t x = 0; x < n; x++)
      18           0 :             B[y * n + x] = A[x * n + y];
      19             :     }
      20           0 : }
      21             : 
      22             : // The 1d transform is real to complex and packs the complex results in
      23             : // a way to take advantage of conjugate symmetry (e.g., the n/2 + 1 real
      24             : // components, followed by the n/2 - 1 imaginary components). After the
      25             : // transform is done on the rows, the first n/2 + 1 columns are real, and
      26             : // the remaining are the imaginary components. After the transform on the
      27             : // columns, the region of [0, n/2]x[0, n/2] contains the real part of
      28             : // fft of the real columns. The real part of the 2d fft also includes the
      29             : // imaginary part of transformed imaginary columns. This function assembles
      30             : // the correct outputs while putting the real and imaginary components
      31             : // next to each other.
      32           0 : static INLINE void unpack_2d_output(const float *col_fft, float *output,
      33             :     int32_t n) {
      34           0 :     for (int32_t y = 0; y <= n / 2; ++y) {
      35           0 :         const int32_t y2 = y + n / 2;
      36           0 :         const int32_t y_extra = y2 > n / 2 && y2 < n;
      37             : 
      38           0 :         for (int32_t x = 0; x <= n / 2; ++x) {
      39           0 :             const int32_t x2 = x + n / 2;
      40           0 :             const int32_t x_extra = x2 > n / 2 && x2 < n;
      41           0 :             output[2 * (y * n + x)] =
      42           0 :                 col_fft[y * n + x] - (x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
      43           0 :             output[2 * (y * n + x) + 1] = (y_extra ? col_fft[y2 * n + x] : 0) +
      44           0 :                 (x_extra ? col_fft[y * n + x2] : 0);
      45           0 :             if (y_extra) {
      46           0 :                 output[2 * ((n - y) * n + x)] =
      47           0 :                     col_fft[y * n + x] +
      48           0 :                     (x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
      49           0 :                 output[2 * ((n - y) * n + x) + 1] =
      50           0 :                     -(y_extra ? col_fft[y2 * n + x] : 0) +
      51           0 :                     (x_extra ? col_fft[y * n + x2] : 0);
      52             :             }
      53             :         }
      54             :     }
      55           0 : }
      56             : 
      57           0 : void eb_aom_fft_2d_gen(const float *input, float *temp, float *output, int32_t n,
      58             :     aom_fft_1d_func_t tform, aom_fft_transpose_func_t transpose,
      59             :     aom_fft_unpack_func_t unpack, int32_t vec_size) {
      60           0 :     for (int32_t x = 0; x < n; x += vec_size)
      61           0 :         tform(input + x, output + x, n);
      62           0 :     transpose(output, temp, n);
      63             : 
      64           0 :     for (int32_t x = 0; x < n; x += vec_size)
      65           0 :         tform(temp + x, output + x, n);
      66           0 :     transpose(output, temp, n);
      67             : 
      68           0 :     unpack(temp, output, n);
      69           0 : }
      70             : 
      71           0 : static INLINE void store_float(float *output, float input) { *output = input; }
      72           0 : static INLINE float add_float(float a, float b) { return a + b; }
      73           0 : static INLINE float sub_float(float a, float b) { return a - b; }
      74           0 : static INLINE float mul_float(float a, float b) { return a * b; }
      75             : 
      76           0 : GEN_FFT_2(void, float, float, float, *, store_float);
      77           0 : GEN_FFT_4(void, float, float, float, *, store_float, (float), add_float,
      78             :     sub_float);
      79           0 : GEN_FFT_8(void, float, float, float, *, store_float, (float), add_float,
      80             :     sub_float, mul_float);
      81           0 : GEN_FFT_16(void, float, float, float, *, store_float, (float), add_float,
      82             :     sub_float, mul_float);
      83           0 : GEN_FFT_32(void, float, float, float, *, store_float, (float), add_float,
      84             :     sub_float, mul_float);
      85             : 
      86           0 : void eb_aom_fft2x2_float_c(const float *input, float *temp, float *output) {
      87           0 :     eb_aom_fft_2d_gen(input, temp, output, 2, eb_aom_fft1d_2_float, simple_transpose,
      88             :         unpack_2d_output, 1);
      89           0 : }
      90             : 
      91           0 : void eb_aom_fft4x4_float_c(const float *input, float *temp, float *output) {
      92           0 :     eb_aom_fft_2d_gen(input, temp, output, 4, eb_aom_fft1d_4_float, simple_transpose,
      93             :         unpack_2d_output, 1);
      94           0 : }
      95             : 
      96           0 : void eb_aom_fft8x8_float_c(const float *input, float *temp, float *output) {
      97           0 :     eb_aom_fft_2d_gen(input, temp, output, 8, eb_aom_fft1d_8_float, simple_transpose,
      98             :         unpack_2d_output, 1);
      99           0 : }
     100             : 
     101           0 : void eb_aom_fft16x16_float_c(const float *input, float *temp, float *output) {
     102           0 :     eb_aom_fft_2d_gen(input, temp, output, 16, eb_aom_fft1d_16_float, simple_transpose,
     103             :         unpack_2d_output, 1);
     104           0 : }
     105             : 
     106           0 : void eb_aom_fft32x32_float_c(const float *input, float *temp, float *output) {
     107           0 :     eb_aom_fft_2d_gen(input, temp, output, 32, eb_aom_fft1d_32_float, simple_transpose,
     108             :         unpack_2d_output, 1);
     109           0 : }
     110             : 
     111           0 : void eb_aom_ifft_2d_gen(const float *input, float *temp, float *output, int32_t n,
     112             :     aom_fft_1d_func_t fft_single, aom_fft_1d_func_t fft_multi,
     113             :     aom_fft_1d_func_t ifft_multi,
     114             :     aom_fft_transpose_func_t transpose, int32_t vec_size) {
     115             :     // Column 0 and n/2 have conjugate symmetry, so we can directly do the ifft
     116             :     // and get real outputs.
     117           0 :     for (int32_t y = 0; y <= n / 2; ++y) {
     118           0 :         output[y * n] = input[2 * y * n];
     119           0 :         output[y * n + 1] = input[2 * (y * n + n / 2)];
     120             :     }
     121           0 :     for (int32_t y = n / 2 + 1; y < n; ++y) {
     122           0 :         output[y * n] = input[2 * (y - n / 2) * n + 1];
     123           0 :         output[y * n + 1] = input[2 * ((y - n / 2) * n + n / 2) + 1];
     124             :     }
     125             : 
     126           0 :     for (int32_t i = 0; i < 2; i += vec_size)
     127           0 :         ifft_multi(output + i, temp + i, n);
     128             :     // For the other columns, since we don't have a full ifft for complex inputs
     129             :     // we have to split them into the real and imaginary counterparts.
     130             :     // Pack the real component, then the imaginary components.
     131           0 :     for (int32_t y = 0; y < n; ++y) {
     132           0 :         for (int32_t x = 1; x < n / 2; ++x)
     133           0 :             output[y * n + (x + 1)] = input[2 * (y * n + x)];
     134           0 :         for (int32_t x = 1; x < n / 2; ++x)
     135           0 :             output[y * n + (x + n / 2)] = input[2 * (y * n + x) + 1];
     136             :     }
     137           0 :     for (int32_t y = 2; y < vec_size; y++)
     138           0 :         fft_single(output + y, temp + y, n);
     139             :     // This is the part that can be sped up with SIMD
     140           0 :     for (int32_t y = AOMMAX(2, vec_size); y < n; y += vec_size)
     141           0 :         fft_multi(output + y, temp + y, n);
     142             :     // Put the 0 and n/2 th results in the correct place.
     143           0 :     for (int32_t x = 0; x < n; ++x) {
     144           0 :         output[x] = temp[x * n];
     145           0 :         output[(n / 2) * n + x] = temp[x * n + 1];
     146             :     }
     147             :     // This rearranges and transposes.
     148           0 :     for (int32_t y = 1; y < n / 2; ++y) {
     149             :         // Fill in the real columns
     150           0 :         for (int32_t x = 0; x <= n / 2; ++x) {
     151           0 :             output[x + y * n] =
     152           0 :                 temp[(y + 1) + x * n] +
     153           0 :                 ((x > 0 && x < n / 2) ? temp[(y + n / 2) + (x + n / 2) * n] : 0);
     154             :         }
     155           0 :         for (int32_t x = n / 2 + 1; x < n; ++x) {
     156           0 :             output[x + y * n] = temp[(y + 1) + (n - x) * n] -
     157           0 :                 temp[(y + n / 2) + ((n - x) + n / 2) * n];
     158             :         }
     159             :         // Fill in the imag columns
     160           0 :         for (int32_t x = 0; x <= n / 2; ++x) {
     161           0 :             output[x + (y + n / 2) * n] =
     162           0 :                 temp[(y + n / 2) + x * n] -
     163           0 :                 ((x > 0 && x < n / 2) ? temp[(y + 1) + (x + n / 2) * n] : 0);
     164             :         }
     165           0 :         for (int32_t x = n / 2 + 1; x < n; ++x) {
     166           0 :             output[x + (y + n / 2) * n] = temp[(y + 1) + ((n - x) + n / 2) * n] +
     167           0 :                 temp[(y + n / 2) + (n - x) * n];
     168             :         }
     169             :     }
     170           0 :     for (int32_t y = 0; y < n; y += vec_size)
     171           0 :         ifft_multi(output + y, temp + y, n);
     172           0 :     transpose(temp, output, n);
     173           0 : }
     174             : 
     175           0 : GEN_IFFT_2(void, float, float, float, *, store_float);
     176           0 : GEN_IFFT_4(void, float, float, float, *, store_float, (float), add_float,
     177             :     sub_float);
     178           0 : GEN_IFFT_8(void, float, float, float, *, store_float, (float), add_float,
     179             :     sub_float, mul_float);
     180           0 : GEN_IFFT_16(void, float, float, float, *, store_float, (float), add_float,
     181             :     sub_float, mul_float);
     182           0 : GEN_IFFT_32(void, float, float, float, *, store_float, (float), add_float,
     183             :     sub_float, mul_float);
     184             : 
     185           0 : void eb_aom_ifft2x2_float_c(const float *input, float *temp, float *output) {
     186           0 :     eb_aom_ifft_2d_gen(input, temp, output, 2, eb_aom_fft1d_2_float, eb_aom_fft1d_2_float,
     187             :         eb_aom_ifft1d_2_float, simple_transpose, 1);
     188           0 : }
     189             : 
     190           0 : void eb_aom_ifft4x4_float_c(const float* input, float* temp, float* output) {
     191           0 :     eb_aom_ifft_2d_gen(input, temp, output, 4, eb_aom_fft1d_4_float, eb_aom_fft1d_4_float,
     192             :         eb_aom_ifft1d_4_float, simple_transpose, 1);
     193           0 : }
     194             : 
     195           0 : void eb_aom_ifft8x8_float_c(const float* input, float* temp, float* output) {
     196           0 :     eb_aom_ifft_2d_gen(input, temp, output, 8, eb_aom_fft1d_8_float, eb_aom_fft1d_8_float,
     197             :         eb_aom_ifft1d_8_float, simple_transpose, 1);
     198           0 : }
     199             : 
     200           0 : void eb_aom_ifft16x16_float_c(const float* input, float* temp, float* output) {
     201           0 :     eb_aom_ifft_2d_gen(input, temp, output, 16, eb_aom_fft1d_16_float,
     202             :         eb_aom_fft1d_16_float, eb_aom_ifft1d_16_float, simple_transpose, 1);
     203           0 : }
     204             : 
     205           0 : void eb_aom_ifft32x32_float_c(const float* input, float* temp, float* output) {
     206           0 :     eb_aom_ifft_2d_gen(input, temp, output, 32, eb_aom_fft1d_32_float,
     207             :         eb_aom_fft1d_32_float, eb_aom_ifft1d_32_float, simple_transpose, 1);
     208           0 : }

Generated by: LCOV version 1.14