Line data Source code
1 : /*
2 : * Copyright (c) 2018, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include <immintrin.h>
13 :
14 : #include "EbDefinitions.h"
15 : #include "fft_common.h"
16 :
17 : extern void eb_aom_transpose_float_sse2(const float *A, float *B, int32_t n);
18 : extern void eb_aom_fft_unpack_2d_output_sse2(const float *col_fft, float *output,
19 : int32_t n);
20 :
21 : // Generate the 1d forward transforms for float using _mm256
22 0 : GEN_FFT_8(static INLINE void, avx2, float, __m256, _mm256_load_ps,
23 : _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
24 : _mm256_mul_ps);
25 0 : GEN_FFT_16(static INLINE void, avx2, float, __m256, _mm256_load_ps,
26 : _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
27 : _mm256_mul_ps);
28 0 : GEN_FFT_32(static INLINE void, avx2, float, __m256, _mm256_load_ps,
29 : _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
30 : _mm256_mul_ps);
31 :
32 0 : void eb_aom_fft8x8_float_avx2(const float *input, float *temp, float *output) {
33 0 : eb_aom_fft_2d_gen(input, temp, output, 8, eb_aom_fft1d_8_avx2,
34 : eb_aom_transpose_float_sse2, eb_aom_fft_unpack_2d_output_sse2, 8);
35 0 : }
36 :
37 0 : void eb_aom_fft16x16_float_avx2(const float *input, float *temp, float *output) {
38 0 : eb_aom_fft_2d_gen(input, temp, output, 16, eb_aom_fft1d_16_avx2,
39 : eb_aom_transpose_float_sse2, eb_aom_fft_unpack_2d_output_sse2, 8);
40 0 : }
41 :
42 0 : void eb_aom_fft32x32_float_avx2(const float *input, float *temp, float *output) {
43 0 : eb_aom_fft_2d_gen(input, temp, output, 32, eb_aom_fft1d_32_avx2,
44 : eb_aom_transpose_float_sse2, eb_aom_fft_unpack_2d_output_sse2, 8);
45 0 : }
46 :
47 : // Generate the 1d inverse transforms for float using _mm256
48 0 : GEN_IFFT_8(static INLINE void, avx2, float, __m256, _mm256_load_ps,
49 : _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
50 : _mm256_mul_ps);
51 0 : GEN_IFFT_16(static INLINE void, avx2, float, __m256, _mm256_load_ps,
52 : _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
53 : _mm256_mul_ps);
54 0 : GEN_IFFT_32(static INLINE void, avx2, float, __m256, _mm256_load_ps,
55 : _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
56 : _mm256_mul_ps);
57 :
58 0 : void eb_aom_ifft8x8_float_avx2(const float *input, float *temp, float *output) {
59 0 : eb_aom_ifft_2d_gen(input, temp, output, 8, eb_aom_fft1d_8_float, eb_aom_fft1d_8_avx2,
60 : eb_aom_ifft1d_8_avx2, eb_aom_transpose_float_sse2, 8);
61 0 : }
62 :
63 0 : void eb_aom_ifft16x16_float_avx2(const float *input, float *temp, float *output) {
64 0 : eb_aom_ifft_2d_gen(input, temp, output, 16, eb_aom_fft1d_16_float,
65 : eb_aom_fft1d_16_avx2, eb_aom_ifft1d_16_avx2,
66 : eb_aom_transpose_float_sse2, 8);
67 0 : }
68 :
69 0 : void eb_aom_ifft32x32_float_avx2(const float *input, float *temp, float *output) {
70 0 : eb_aom_ifft_2d_gen(input, temp, output, 32, eb_aom_fft1d_32_float,
71 : eb_aom_fft1d_32_avx2, eb_aom_ifft1d_32_avx2,
72 : eb_aom_transpose_float_sse2, 8);
73 0 : }
|