LCOV - code coverage report
Current view: top level - ASM_AVX2 - EbCombinedAveragingSAD_Inline_AVX2.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 29 29 100.0 %
Date: 2019-11-25 17:38:06 Functions: 2 2 100.0 %

          Line data    Source code
       1             : /*
       2             : * Copyright(c) 2019 Intel Corporation
       3             : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
       4             : */
       5             : 
       6             : #ifndef EbCombinedAveragingSAD_Inline_AVX2_h
       7             : #define EbCombinedAveragingSAD_Inline_AVX2_h
       8             : 
       9             : #include "immintrin.h"
      10             : #include "EbDefinitions.h"
      11             : #include "EbMemory_AVX2.h"
      12             : 
      13             : #ifdef __cplusplus
      14             : extern "C" {
      15             : #endif
      16             : 
      17   112570000 :     static INLINE void ssd8x2_avx2(const uint8_t *const src,
      18             :         const ptrdiff_t src_stride,
      19             :         const uint8_t *const ref1,
      20             :         const ptrdiff_t ref1_stride,
      21             :         const uint8_t *const ref2,
      22             :         const ptrdiff_t ref2_stride, __m256i *const sum) {
      23   112570000 :         const __m256i zero = _mm256_setzero_si256();
      24   112570000 :         const __m256i s = load_u8_8x2_avx2(src, src_stride);
      25   112540000 :         const __m256i r1 = load_u8_8x2_avx2(ref1, ref1_stride);
      26   112511000 :         const __m256i r2 = load_u8_8x2_avx2(ref2, ref2_stride);
      27   112522000 :         const __m256i avg = _mm256_avg_epu8(r1, r2);
      28   112522000 :         const __m256i s_256 = _mm256_unpacklo_epi8(s, zero);
      29   112522000 :         const __m256i avg_256 = _mm256_unpacklo_epi8(avg, zero);
      30   112522000 :         const __m256i dif = _mm256_sub_epi16(s_256, avg_256);
      31   112522000 :         const __m256i sqr = _mm256_madd_epi16(dif, dif);
      32   112522000 :         *sum = _mm256_add_epi32(*sum, sqr);
      33   112522000 :     }
      34             : 
      35   131381000 :     static INLINE void ssd32_avx2(const uint8_t *const src,
      36             :         const uint8_t *const ref1,
      37             :         const uint8_t *const ref2, __m256i *const sum) {
      38   131381000 :         const __m256i zero = _mm256_setzero_si256();
      39   131381000 :         const __m256i s = _mm256_loadu_si256((__m256i *)src);
      40   131381000 :         const __m256i r1 = _mm256_loadu_si256((__m256i *)ref1);
      41   131381000 :         const __m256i r2 = _mm256_loadu_si256((__m256i *)ref2);
      42   131381000 :         const __m256i avg = _mm256_avg_epu8(r1, r2);
      43   131381000 :         const __m256i s0 = _mm256_unpacklo_epi8(s, zero);
      44   131381000 :         const __m256i s1 = _mm256_unpackhi_epi8(s, zero);
      45   131381000 :         const __m256i avg0 = _mm256_unpacklo_epi8(avg, zero);
      46   131381000 :         const __m256i avg1 = _mm256_unpackhi_epi8(avg, zero);
      47   131381000 :         const __m256i dif0 = _mm256_sub_epi16(s0, avg0);
      48   131381000 :         const __m256i dif1 = _mm256_sub_epi16(s1, avg1);
      49   131381000 :         const __m256i sqr0 = _mm256_madd_epi16(dif0, dif0);
      50   131381000 :         const __m256i sqr1 = _mm256_madd_epi16(dif1, dif1);
      51   131381000 :         *sum = _mm256_add_epi32(*sum, sqr0);
      52   131381000 :         *sum = _mm256_add_epi32(*sum, sqr1);
      53   131381000 :     }
      54             : 
      55             : #ifdef __cplusplus
      56             : }
      57             : #endif
      58             : #endif // EbCombinedAveragingSAD_Inline_AVX2_h

Generated by: LCOV version 1.14