LCOV - code coverage report
Current view: top level - ASM_AVX2 - synonyms_avx2.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 11 15 73.3 %
Date: 2019-11-25 17:38:06 Functions: 5 6 83.3 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2018, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #ifndef AOM_DSP_X86_SYNONYMS_AVX2_H_
      13             : #define AOM_DSP_X86_SYNONYMS_AVX2_H_
      14             : 
      15             : #include <immintrin.h>
      16             : 
      17             :  /**
      18             :   * Various reusable shorthands for x86 SIMD intrinsics.
      19             :   *
      20             :   * Intrinsics prefixed with xx_ operate on or return 128bit XMM registers.
      21             :   * Intrinsics prefixed with yy_ operate on or return 256bit YMM registers.
      22             :   */
      23             : 
      24             :   // Loads and stores to do away with the tedium of casting the address
      25             :   // to the right type.
      26             : static INLINE __m256i yy_load_256(const void *a) {
      27             :     return _mm256_load_si256((const __m256i *)a);
      28             : }
      29             : 
      30  7030172000 : static INLINE __m256i yy_loadu_256(const void *a) {
      31  7030172000 :     return _mm256_loadu_si256((const __m256i *)a);
      32             : }
      33             : 
      34             : static INLINE void yy_store_256(void *const a, const __m256i v) {
      35             :     _mm256_store_si256((__m256i *)a, v);
      36             : }
      37             : 
      38  1414362900 : static INLINE void yy_storeu_256(void *const a, const __m256i v) {
      39             :     _mm256_storeu_si256((__m256i *)a, v);
      40  1414362900 : }
      41             : 
      42             : // The _mm256_set1_epi64x() intrinsic is undefined for some Visual Studio
      43             : // compilers. The following function is equivalent to _mm256_set1_epi64x()
      44             : // acting on a 32-bit integer.
      45   293263000 : static INLINE __m256i yy_set1_64_from_32i(int32_t a) {
      46             : #if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
      47             :     return _mm256_set_epi32(0, a, 0, a, 0, a, 0, a);
      48             : #else
      49   586526000 :     return _mm256_set1_epi64x((uint32_t)a);
      50             : #endif
      51             : }
      52             : 
      53             : // Some compilers don't have _mm256_set_m128i defined in immintrin.h. We
      54             : // therefore define an equivalent function using a different intrinsic.
      55             : // ([ hi ], [ lo ]) -> [ hi ][ lo ]
      56   167612000 : static INLINE __m256i yy_set_m128i(__m128i hi, __m128i lo) {
      57   335224000 :     return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1);
      58             : }
      59             : 
      60   172063000 : static INLINE __m256i yy_roundn_epu16(__m256i v_val_w, int bits) {
      61   344126000 :     const __m256i v_s_w = _mm256_srli_epi16(v_val_w, bits - 1);
      62   344126000 :     return _mm256_avg_epu16(v_s_w, _mm256_setzero_si256());
      63             : }
      64           0 : static INLINE void yy_storeu2_128(void *hi, void *lo, const __m256i a) {
      65           0 :     _mm_storeu_si128((__m128i *)hi, _mm256_extracti128_si256(a, 1));
      66           0 :     _mm_storeu_si128((__m128i *)lo, _mm256_castsi256_si128(a));
      67           0 : }
      68             : #endif  // AOM_DSP_X86_SYNONYMS_AVX2_H_

Generated by: LCOV version 1.14