Line data Source code
1 : /*
2 : * Copyright(c) 2019 Intel Corporation
3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 : */
5 :
6 : #ifndef EbPictureOperators_SSE2_h
7 : #define EbPictureOperators_SSE2_h
8 :
9 : #include <emmintrin.h>
10 : #include "EbDefinitions.h"
11 :
12 : #ifdef __cplusplus
13 : extern "C" {
14 : #endif
15 :
16 : //-----
17 : extern void zero_out_coeff4x4_sse(
18 : int16_t *coeff_buffer,
19 : uint32_t coeff_stride,
20 : uint32_t coeff_origin_index,
21 : uint32_t area_width,
22 : uint32_t area_height);
23 :
24 : extern void zero_out_coeff8x8_sse2(
25 : int16_t *coeff_buffer,
26 : uint32_t coeff_stride,
27 : uint32_t coeff_origin_index,
28 : uint32_t area_width,
29 : uint32_t area_height);
30 :
31 : extern void zero_out_coeff16x16_sse2(
32 : int16_t *coeff_buffer,
33 : uint32_t coeff_stride,
34 : uint32_t coeff_origin_index,
35 : uint32_t area_width,
36 : uint32_t area_height);
37 :
38 : extern void zero_out_coeff32x32_sse2(
39 : int16_t *coeff_buffer,
40 : uint32_t coeff_stride,
41 : uint32_t coeff_origin_index,
42 : uint32_t area_width,
43 : uint32_t area_height);
44 :
45 : extern void residual_kernel16bit_sse2_intrin(
46 : uint16_t *input,
47 : uint32_t input_stride,
48 : uint16_t *pred,
49 : uint32_t pred_stride,
50 : int16_t *residual,
51 : uint32_t residual_stride,
52 : uint32_t area_width,
53 : uint32_t area_height);
54 :
55 : void picture_addition_kernel4x4_sse_intrin(
56 : uint8_t *pred_ptr,
57 : uint32_t pred_stride,
58 : int16_t *residual_ptr,
59 : uint32_t residual_stride,
60 : uint8_t *recon_ptr,
61 : uint32_t recon_stride,
62 : uint32_t width,
63 : uint32_t height);
64 :
65 : void picture_addition_kernel8x8_sse2_intrin(
66 : uint8_t *pred_ptr,
67 : uint32_t pred_stride,
68 : int16_t *residual_ptr,
69 : uint32_t residual_stride,
70 : uint8_t *recon_ptr,
71 : uint32_t recon_stride,
72 : uint32_t width,
73 : uint32_t height);
74 :
75 : void picture_addition_kernel16x16_sse2_intrin(
76 : uint8_t *pred_ptr,
77 : uint32_t pred_stride,
78 : int16_t *residual_ptr,
79 : uint32_t residual_stride,
80 : uint8_t *recon_ptr,
81 : uint32_t recon_stride,
82 : uint32_t width,
83 : uint32_t height);
84 :
85 : void picture_addition_kernel32x32_sse2_intrin(
86 : uint8_t *pred_ptr,
87 : uint32_t pred_stride,
88 : int16_t *residual_ptr,
89 : uint32_t residual_stride,
90 : uint8_t *recon_ptr,
91 : uint32_t recon_stride,
92 : uint32_t width,
93 : uint32_t height);
94 :
95 : void picture_addition_kernel64x64_sse2_intrin(
96 : uint8_t *pred_ptr,
97 : uint32_t pred_stride,
98 : int16_t *residual_ptr,
99 : uint32_t residual_stride,
100 : uint8_t *recon_ptr,
101 : uint32_t recon_stride,
102 : uint32_t width,
103 : uint32_t height);
104 :
105 : void residual_kernel_sub_sampled4x4_sse_intrin(
106 : uint8_t *input,
107 : uint32_t input_stride,
108 : uint8_t *pred,
109 : uint32_t pred_stride,
110 : int16_t *residual,
111 : uint32_t residual_stride,
112 : uint32_t area_width,
113 : uint32_t area_height,
114 : uint8_t last_line);
115 :
116 : void residual_kernel_sub_sampled8x8_sse2_intrin(
117 : uint8_t *input,
118 : uint32_t input_stride,
119 : uint8_t *pred,
120 : uint32_t pred_stride,
121 : int16_t *residual,
122 : uint32_t residual_stride,
123 : uint32_t area_width,
124 : uint32_t area_height,
125 : uint8_t last_line);
126 :
127 : void residual_kernel_sub_sampled16x16_sse2_intrin(
128 : uint8_t *input,
129 : uint32_t input_stride,
130 : uint8_t *pred,
131 : uint32_t pred_stride,
132 : int16_t *residual,
133 : uint32_t residual_stride,
134 : uint32_t area_width,
135 : uint32_t area_height,
136 : uint8_t last_line);
137 :
138 : void residual_kernel_sub_sampled32x32_sse2_intrin(
139 : uint8_t *input,
140 : uint32_t input_stride,
141 : uint8_t *pred,
142 : uint32_t pred_stride,
143 : int16_t *residual,
144 : uint32_t residual_stride,
145 : uint32_t area_width,
146 : uint32_t area_height,
147 : uint8_t last_line);
148 :
149 : void residual_kernel_sub_sampled64x64_sse2_intrin(
150 : uint8_t *input,
151 : uint32_t input_stride,
152 : uint8_t *pred,
153 : uint32_t pred_stride,
154 : int16_t *residual,
155 : uint32_t residual_stride,
156 : uint32_t area_width,
157 : uint32_t area_height,
158 : uint8_t last_line);
159 :
160 : void picture_addition_kernel16bit_sse2_intrin(
161 : uint16_t *pred_ptr,
162 : uint32_t pred_stride,
163 : int16_t *residual_ptr,
164 : uint32_t residual_stride,
165 : uint16_t *recon_ptr,
166 : uint32_t recon_stride,
167 : uint32_t width,
168 : uint32_t height);
169 :
170 1195020300 : static INLINE int32_t Hadd32_SSE2_INTRIN(const __m128i src) {
171 1195020300 : const __m128i dst0 = _mm_add_epi32(src, _mm_srli_si128(src, 8));
172 2390050600 : const __m128i dst1 = _mm_add_epi32(dst0, _mm_srli_si128(dst0, 4));
173 :
174 1195020300 : return _mm_cvtsi128_si32(dst1);
175 : }
176 :
177 : uint64_t spatial_full_distortion_kernel4x_n_sse2_intrin(
178 : uint8_t *input,
179 : uint32_t input_offset,
180 : uint32_t input_stride,
181 : uint8_t *recon,
182 : uint32_t recon_offset,
183 : uint32_t recon_stride,
184 : uint32_t area_width,
185 : uint32_t area_height);
186 :
187 : uint64_t spatial_full_distortion_kernel8x_n_sse2_intrin(
188 : uint8_t *input,
189 : uint32_t input_offset,
190 : uint32_t input_stride,
191 : uint8_t *recon,
192 : uint32_t recon_offset,
193 : uint32_t recon_stride,
194 : uint32_t area_width,
195 : uint32_t area_height);
196 :
197 : uint64_t spatial_full_distortion_kernel16x_n_sse2_intrin(
198 : uint8_t *input,
199 : uint32_t input_offset,
200 : uint32_t input_stride,
201 : uint8_t *recon,
202 : uint32_t recon_offset,
203 : uint32_t recon_stride,
204 : uint32_t area_width,
205 : uint32_t area_height);
206 :
207 : uint64_t spatial_full_distortion_kernel32x_n_sse2_intrin(
208 : uint8_t *input,
209 : uint32_t input_offset,
210 : uint32_t input_stride,
211 : uint8_t *recon,
212 : uint32_t recon_offset,
213 : uint32_t recon_stride,
214 : uint32_t area_width,
215 : uint32_t area_height);
216 :
217 : uint64_t spatial_full_distortion_kernel64x_n_sse2_intrin(
218 : uint8_t *input,
219 : uint32_t input_offset,
220 : uint32_t input_stride,
221 : uint8_t *recon,
222 : uint32_t recon_offset,
223 : uint32_t recon_stride,
224 : uint32_t area_width,
225 : uint32_t area_height);
226 :
227 : uint64_t spatial_full_distortion_kernel128x_n_sse2_intrin(
228 : uint8_t *input,
229 : uint32_t input_offset,
230 : uint32_t input_stride,
231 : uint8_t *recon,
232 : uint32_t recon_offset,
233 : uint32_t recon_stride,
234 : uint32_t area_width,
235 : uint32_t area_height);
236 :
237 : void picture_addition_sse2(
238 : uint8_t *pred_ptr,
239 : uint32_t pred_stride,
240 : int16_t *residual_ptr,
241 : uint32_t residual_stride,
242 : uint8_t *recon_ptr,
243 : uint32_t recon_stride,
244 : uint32_t width,
245 : uint32_t height);
246 :
247 : #ifdef __cplusplus
248 : }
249 : #endif
250 : #endif // EbPictureOperators_SSE2_h
|