LCOV - code coverage report
Current view: top level - ASM_AVX2 - av1_inv_txfm_avx2.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 17 17 100.0 %
Date: 2019-11-25 17:38:06 Functions: 3 3 100.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2018, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : #ifndef AV1_COMMON_X86_AV1_INV_TXFM_AVX2_H_
      12             : #define AV1_COMMON_X86_AV1_INV_TXFM_AVX2_H_
      13             : 
      14             : #include <immintrin.h>
      15             : 
      16             : #include "EbDefinitions.h"
      17             : #include "aom_dsp_rtcd.h"
      18             : #include "txfm_common_avx2.h"
      19             : 
      20             : #ifdef __cplusplus
      21             : extern "C" {
      22             : #endif
      23             : 
      24             :     // half input is zero
      25             : #define btf_16_w16_0_avx2(w0, w1, in, out0, out1)  \
      26             :   {                                                \
      27             :     const __m256i _w0 = _mm256_set1_epi16(w0 * 8); \
      28             :     const __m256i _w1 = _mm256_set1_epi16(w1 * 8); \
      29             :     const __m256i _in = in;                        \
      30             :     out0 = _mm256_mulhrs_epi16(_in, _w0);          \
      31             :     out1 = _mm256_mulhrs_epi16(_in, _w1);          \
      32             :   }
      33             : 
      34     2600500 :     static INLINE void round_shift_avx2(const __m256i *input, __m256i *output,
      35             :         int32_t size) {
      36     2600500 :         const __m256i scale = _mm256_set1_epi16(NewInvSqrt2 * 8);
      37    65862500 :         for (int32_t i = 0; i < size; ++i)
      38   126524000 :             output[i] = _mm256_mulhrs_epi16(input[i], scale);
      39             : 
      40     2600500 :     }
      41             : 
      42   244261000 :     static INLINE void write_recon_w16_avx2(__m256i res, uint8_t *output_r, uint8_t *output_w) {
      43   244261000 :         __m128i pred = _mm_loadu_si128((__m128i const *)(output_r));
      44   488521000 :         __m256i u = _mm256_adds_epi16(_mm256_cvtepu8_epi16(pred), res);
      45   244261000 :         __m128i y = _mm256_castsi256_si128(
      46   244261000 :             _mm256_permute4x64_epi64(_mm256_packus_epi16(u, u), 168));
      47             :         _mm_storeu_si128((__m128i *)(output_w), y);
      48   244261000 :     }
      49             : 
      50     8611450 :     static INLINE void lowbd_write_buffer_16xn_avx2(__m256i *in,
      51             :         uint8_t *output_r, int32_t stride_r,
      52             :         uint8_t *output_w, int32_t stride_w,
      53             :         int32_t flipud, int32_t height) {
      54     8611450 :         int32_t j = flipud ? (height - 1) : 0;
      55     8611450 :         const int32_t step = flipud ? -1 : 1;
      56   245201000 :         for (int32_t i = 0; i < height; ++i, j += step)
      57   236593000 :             write_recon_w16_avx2(in[j], output_r + i * stride_r, output_w + i * stride_w);
      58     8608050 :     }
      59             : 
      60             : #ifdef __cplusplus
      61             : }
      62             : #endif
      63             : 
      64             : #endif  // AV1_COMMON_X86_AV1_INV_TXFM_AVX2_H_

Generated by: LCOV version 1.14