LCOV - code coverage report
Current view: top level - ASM_AVX2 - EbMemory_AVX2.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 40 40 100.0 %
Date: 2019-11-25 17:38:06 Functions: 10 10 100.0 %

          Line data    Source code
       1             : /*
       2             : * Copyright(c) 2019 Intel Corporation
       3             : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
       4             : */
       5             : 
       6             : #ifndef EbMemory_AVX2_h
       7             : #define EbMemory_AVX2_h
       8             : 
       9             : #include "synonyms.h"
      10             : 
      11             : #ifdef __cplusplus
      12             : extern "C" {
      13             : #endif
      14             : 
      15             : #ifndef _mm256_set_m128i
      16             : #define _mm256_set_m128i(/* __m128i */ hi, /* __m128i */ lo) \
      17             :     _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 0x1)
      18             : #endif
      19             : 
      20             : #ifndef _mm256_setr_m128i
      21             : #define _mm256_setr_m128i(/* __m128i */ lo, /* __m128i */ hi) \
      22             :     _mm256_set_m128i((hi), (lo))
      23             : #endif
      24             : 
      25             : #ifndef _mm256_cvtsi256_si32
      26             : #define _mm256_cvtsi256_si32(a) \
      27             :     _mm_cvtsi128_si32(_mm256_castsi256_si128(a))
      28             : #endif
      29             : 
      30   114883600 : static INLINE __m256i load_u8_4x4_avx2(const uint8_t *const src,
      31             :     const uint32_t stride)
      32             : {
      33             :     __m128i src01, src23;
      34   114883600 :     src01 = _mm_cvtsi32_si128(*(int32_t*)(src + 0 * stride));
      35   114883600 :     src01 = _mm_insert_epi32(src01, *(int32_t *)(src + 1 * stride), 1);
      36   114883600 :     src23 = _mm_cvtsi32_si128(*(int32_t*)(src + 2 * stride));
      37   114883600 :     src23 = _mm_insert_epi32(src23, *(int32_t *)(src + 3 * stride), 1);
      38   229767200 :     return _mm256_setr_m128i(src01, src23);
      39             : }
      40             : 
      41   337398000 : static INLINE __m256i load_u8_8x2_avx2(const uint8_t *const src,
      42             :     const ptrdiff_t stride) {
      43   337398000 :     const __m128i s0 = _mm_loadl_epi64((__m128i *)src);
      44   337398000 :     const __m128i s1 = _mm_loadl_epi64((__m128i *)(src + stride));
      45   674796000 :     return _mm256_setr_m128i(s0, s1);
      46             : }
      47             : 
      48   611215900 : static INLINE __m256i load_u8_8x4_avx2(const uint8_t *const src,
      49             :     const uint32_t stride)
      50             : {
      51             :     __m128i src01, src23;
      52   611215900 :     src01 = _mm_loadl_epi64((__m128i *)(src + 0 * stride));
      53  1222519000 :     src01 = _mm_castpd_si128(_mm_loadh_pd(_mm_castsi128_pd(src01),
      54   611215900 :         (double *)(src + 1 * stride)));
      55   611302500 :     src23 = _mm_loadl_epi64((__m128i *)(src + 2 * stride));
      56  1222762000 :     src23 = _mm_castpd_si128(_mm_loadh_pd(_mm_castsi128_pd(src23),
      57   611302500 :         (double *)(src + 3 * stride)));
      58  1222915000 :     return _mm256_setr_m128i(src01, src23);
      59             : }
      60             : 
      61             : static INLINE __m256i load_u8_16x2_avx2(const uint8_t *const src,
      62             :     const uint32_t stride)
      63             : {
      64             :     const __m128i src0 = _mm_load_si128((__m128i *)(src + 0 * stride));
      65             :     const __m128i src1 = _mm_load_si128((__m128i *)(src + 1 * stride));
      66             :     return _mm256_setr_m128i(src0, src1);
      67             : }
      68             : 
      69  2528440000 : static INLINE __m256i loadu_8bit_16x2_avx2(const void *const src,
      70             :     const uint32_t strideInByte)
      71             : {
      72  2528440000 :     const __m128i src0 = _mm_loadu_si128((__m128i *)src);
      73  2528440000 :     const __m128i src1 = _mm_loadu_si128((__m128i *)((uint8_t *)src + strideInByte));
      74  5056875000 :     return _mm256_setr_m128i(src0, src1);
      75             : }
      76             : 
      77  1159622400 : static INLINE __m256i loadu_u8_16x2_avx2(const uint8_t *const src,
      78             :     const uint32_t stride)
      79             : {
      80  1159622400 :     return loadu_8bit_16x2_avx2(src, sizeof(*src) * stride);
      81             : }
      82             : 
      83  1366486000 : static INLINE __m256i loadu_u16_8x2_avx2(const uint16_t *const src,
      84             :     const uint32_t stride)
      85             : {
      86  1366486000 :     return loadu_8bit_16x2_avx2(src, sizeof(*src) * stride);
      87             : }
      88             : 
      89   969758000 : static INLINE void storeu_8bit_16x2_avx2(const __m256i src,
      90             :     void *const dst, const int32_t strideInByte) {
      91   969758000 :     const __m128i d0 = _mm256_castsi256_si128(src);
      92   969758000 :     const __m128i d1 = _mm256_extracti128_si256(src, 1);
      93             :     _mm_storeu_si128((__m128i *)dst, d0);
      94   969758000 :     _mm_storeu_si128((__m128i *)((uint8_t *)dst + strideInByte), d1);
      95   969758000 : }
      96             : 
      97   385235400 : static INLINE void storeu_u8_16x2_avx2(const __m256i src,
      98             :     uint8_t *const dst,
      99             :     const int32_t stride) {
     100   385235400 :     storeu_8bit_16x2_avx2(src, dst, sizeof(*dst) * stride);
     101   385019100 : }
     102             : 
     103   137868000 : static INLINE void storeu_s16_8x2_avx2(const __m256i src,
     104             :     int16_t *const dst,
     105             :     const int32_t stride) {
     106   137868000 :     storeu_8bit_16x2_avx2(src, dst, sizeof(*dst) * stride);
     107   137868000 : }
     108             : 
     109   449856000 : static INLINE void storeu_u16_8x2_avx2(const __m256i src,
     110             :     uint16_t *const dst,
     111             :     const int32_t stride) {
     112   449856000 :     storeu_8bit_16x2_avx2(src, dst,  sizeof(*dst) * stride);
     113   450015000 : }
     114             : 
     115             : #ifdef __cplusplus
     116             : }
     117             : #endif
     118             : #endif // EbIntraPrediction_AVX2_h

Generated by: LCOV version 1.14