Line data Source code
1 : /* 2 : * Copyright(c) 2019 Intel Corporation 3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent 4 : */ 5 : 6 : #ifndef EbHighbdIntraPrediction_SSE2_h 7 : #define EbHighbdIntraPrediction_SSE2_h 8 : 9 : #include <emmintrin.h> 10 : #include "EbDefinitions.h" 11 : #include "aom_dsp_rtcd.h" 12 : 13 0 : static INLINE __m128i dc_sum_4x32bit(const __m128i src) { 14 : __m128i sum, sum_hi; 15 0 : sum_hi = _mm_srli_si128(src, 8); 16 0 : sum = _mm_add_epi32(src, sum_hi); 17 0 : sum_hi = _mm_srli_si128(sum, 4); 18 0 : return _mm_add_epi32(sum, sum_hi); 19 : } 20 : 21 0 : static INLINE __m128i dc_sum_4x16bit(const __m128i src) { 22 : __m128i sum, sum_hi; 23 0 : const __m128i src_hi = _mm_srli_si128(src, 4); 24 0 : sum = _mm_add_epi16(src, src_hi); 25 0 : sum_hi = _mm_srli_si128(sum, 2); 26 0 : sum = _mm_add_epi16(sum, sum_hi); 27 : 28 0 : return sum; 29 : } 30 : 31 0 : static INLINE __m128i dc_sum_4x16bit_large(const __m128i src) { 32 : // Unpack to avoid 12-bit overflow. 33 0 : const __m128i src_32 = _mm_unpacklo_epi16(src, _mm_setzero_si128()); 34 0 : return dc_sum_4x32bit(src_32); 35 : } 36 : 37 0 : static INLINE __m128i dc_sum_8x16bit(const __m128i src) { 38 0 : const __m128i src_hi = _mm_srli_si128(src, 8); 39 0 : const __m128i sum = _mm_add_epi16(src, src_hi); 40 0 : return dc_sum_4x16bit(sum); 41 : } 42 : 43 0 : static INLINE __m128i dc_sum_8x16bit_large(const __m128i src) { 44 0 : const __m128i src_hi = _mm_srli_si128(src, 8); 45 0 : const __m128i sum = _mm_add_epi16(src, src_hi); 46 0 : return dc_sum_4x16bit_large(sum); 47 : } 48 : 49 0 : static INLINE __m128i dc_sum_4(const uint16_t *const src) { 50 0 : const __m128i s = _mm_loadl_epi64((const __m128i *)src); 51 0 : return dc_sum_4x16bit(s); 52 : } 53 : 54 0 : static INLINE __m128i dc_sum_8(const uint16_t *const src) { 55 0 : const __m128i s = _mm_loadu_si128((const __m128i *)src); 56 0 : return dc_sum_8x16bit(s); 57 : } 58 : 59 : #endif // EbHighbdIntraPrediction_SSE2_h