Line data Source code
1 : /* 2 : * Copyright(c) 2019 Intel Corporation 3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent 4 : */ 5 : 6 : #include "stdint.h" 7 : #include "emmintrin.h" 8 : #include "EbComputeSAD_SSE2.h" 9 : 10 0 : uint32_t combined_averaging_4xm_sad_sse2_intrin( 11 : uint8_t *src, 12 : uint32_t src_stride, 13 : uint8_t *ref1, 14 : uint32_t ref1_stride, 15 : uint8_t *ref2, 16 : uint32_t ref2_stride, 17 : uint32_t height, 18 : uint32_t width) 19 : { 20 : __m128i sad0, sad1; 21 : uint32_t y; 22 : (void)width; 23 0 : sad0 = sad1 = _mm_setzero_si128(); 24 : 25 0 : for (y = 0; y < height; y += 2) { 26 0 : sad0 = _mm_add_epi32(sad0, _mm_sad_epu8(_mm_cvtsi32_si128(*(uint32_t *)src), _mm_avg_epu8(_mm_cvtsi32_si128(*(uint32_t *)ref1), _mm_cvtsi32_si128(*(uint32_t *)ref2)))); 27 : 28 0 : sad1 = _mm_add_epi32(sad1, _mm_sad_epu8(_mm_cvtsi32_si128(*(uint32_t *)(src + src_stride)), _mm_avg_epu8(_mm_cvtsi32_si128(*(uint32_t *)(ref1 + ref1_stride)), _mm_cvtsi32_si128(*(uint32_t *)(ref2 + ref2_stride))))); 29 0 : src += src_stride << 1; 30 0 : ref1 += ref1_stride << 1; 31 0 : ref2 += ref2_stride << 1; 32 : } 33 0 : return _mm_cvtsi128_si32(_mm_add_epi32(sad0, sad1)); 34 : }