Line data Source code
1 : /* 2 : * Copyright (c) 2018, Alliance for Open Media. All rights reserved 3 : * 4 : * This source code is subject to the terms of the BSD 2 Clause License and 5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 : * was not distributed with this source code in the LICENSE file, you can 7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 : * Media Patent License 1.0 was not distributed with this source code in the 9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 : */ 11 : #ifndef AV1_COMMON_X86_AV1_INV_TXFM_AVX2_H_ 12 : #define AV1_COMMON_X86_AV1_INV_TXFM_AVX2_H_ 13 : 14 : #include <immintrin.h> 15 : 16 : #include "EbDefinitions.h" 17 : #include "aom_dsp_rtcd.h" 18 : #include "txfm_common_avx2.h" 19 : 20 : #ifdef __cplusplus 21 : extern "C" { 22 : #endif 23 : 24 : // half input is zero 25 : #define btf_16_w16_0_avx2(w0, w1, in, out0, out1) \ 26 : { \ 27 : const __m256i _w0 = _mm256_set1_epi16(w0 * 8); \ 28 : const __m256i _w1 = _mm256_set1_epi16(w1 * 8); \ 29 : const __m256i _in = in; \ 30 : out0 = _mm256_mulhrs_epi16(_in, _w0); \ 31 : out1 = _mm256_mulhrs_epi16(_in, _w1); \ 32 : } 33 : 34 2600500 : static INLINE void round_shift_avx2(const __m256i *input, __m256i *output, 35 : int32_t size) { 36 2600500 : const __m256i scale = _mm256_set1_epi16(NewInvSqrt2 * 8); 37 65862500 : for (int32_t i = 0; i < size; ++i) 38 126524000 : output[i] = _mm256_mulhrs_epi16(input[i], scale); 39 : 40 2600500 : } 41 : 42 244261000 : static INLINE void write_recon_w16_avx2(__m256i res, uint8_t *output_r, uint8_t *output_w) { 43 244261000 : __m128i pred = _mm_loadu_si128((__m128i const *)(output_r)); 44 488521000 : __m256i u = _mm256_adds_epi16(_mm256_cvtepu8_epi16(pred), res); 45 244261000 : __m128i y = _mm256_castsi256_si128( 46 244261000 : _mm256_permute4x64_epi64(_mm256_packus_epi16(u, u), 168)); 47 : _mm_storeu_si128((__m128i *)(output_w), y); 48 244261000 : } 49 : 50 8611450 : static INLINE void lowbd_write_buffer_16xn_avx2(__m256i *in, 51 : uint8_t *output_r, int32_t stride_r, 52 : uint8_t *output_w, int32_t stride_w, 53 : int32_t flipud, int32_t height) { 54 8611450 : int32_t j = flipud ? (height - 1) : 0; 55 8611450 : const int32_t step = flipud ? -1 : 1; 56 245201000 : for (int32_t i = 0; i < height; ++i, j += step) 57 236593000 : write_recon_w16_avx2(in[j], output_r + i * stride_r, output_w + i * stride_w); 58 8608050 : } 59 : 60 : #ifdef __cplusplus 61 : } 62 : #endif 63 : 64 : #endif // AV1_COMMON_X86_AV1_INV_TXFM_AVX2_H_