Line data Source code
1 : /* 2 : * Copyright(c) 2019 Intel Corporation 3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent 4 : */ 5 : 6 : #ifndef EbCombinedAveragingSAD_Inline_AVX2_h 7 : #define EbCombinedAveragingSAD_Inline_AVX2_h 8 : 9 : #include "immintrin.h" 10 : #include "EbDefinitions.h" 11 : #include "EbMemory_AVX2.h" 12 : 13 : #ifdef __cplusplus 14 : extern "C" { 15 : #endif 16 : 17 112570000 : static INLINE void ssd8x2_avx2(const uint8_t *const src, 18 : const ptrdiff_t src_stride, 19 : const uint8_t *const ref1, 20 : const ptrdiff_t ref1_stride, 21 : const uint8_t *const ref2, 22 : const ptrdiff_t ref2_stride, __m256i *const sum) { 23 112570000 : const __m256i zero = _mm256_setzero_si256(); 24 112570000 : const __m256i s = load_u8_8x2_avx2(src, src_stride); 25 112540000 : const __m256i r1 = load_u8_8x2_avx2(ref1, ref1_stride); 26 112511000 : const __m256i r2 = load_u8_8x2_avx2(ref2, ref2_stride); 27 112522000 : const __m256i avg = _mm256_avg_epu8(r1, r2); 28 112522000 : const __m256i s_256 = _mm256_unpacklo_epi8(s, zero); 29 112522000 : const __m256i avg_256 = _mm256_unpacklo_epi8(avg, zero); 30 112522000 : const __m256i dif = _mm256_sub_epi16(s_256, avg_256); 31 112522000 : const __m256i sqr = _mm256_madd_epi16(dif, dif); 32 112522000 : *sum = _mm256_add_epi32(*sum, sqr); 33 112522000 : } 34 : 35 131381000 : static INLINE void ssd32_avx2(const uint8_t *const src, 36 : const uint8_t *const ref1, 37 : const uint8_t *const ref2, __m256i *const sum) { 38 131381000 : const __m256i zero = _mm256_setzero_si256(); 39 131381000 : const __m256i s = _mm256_loadu_si256((__m256i *)src); 40 131381000 : const __m256i r1 = _mm256_loadu_si256((__m256i *)ref1); 41 131381000 : const __m256i r2 = _mm256_loadu_si256((__m256i *)ref2); 42 131381000 : const __m256i avg = _mm256_avg_epu8(r1, r2); 43 131381000 : const __m256i s0 = _mm256_unpacklo_epi8(s, zero); 44 131381000 : const __m256i s1 = _mm256_unpackhi_epi8(s, zero); 45 131381000 : const __m256i avg0 = _mm256_unpacklo_epi8(avg, zero); 46 131381000 : const __m256i avg1 = _mm256_unpackhi_epi8(avg, zero); 47 131381000 : const __m256i dif0 = _mm256_sub_epi16(s0, avg0); 48 131381000 : const __m256i dif1 = _mm256_sub_epi16(s1, avg1); 49 131381000 : const __m256i sqr0 = _mm256_madd_epi16(dif0, dif0); 50 131381000 : const __m256i sqr1 = _mm256_madd_epi16(dif1, dif1); 51 131381000 : *sum = _mm256_add_epi32(*sum, sqr0); 52 131381000 : *sum = _mm256_add_epi32(*sum, sqr1); 53 131381000 : } 54 : 55 : #ifdef __cplusplus 56 : } 57 : #endif 58 : #endif // EbCombinedAveragingSAD_Inline_AVX2_h