Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include <assert.h>
13 : #include <stdlib.h>
14 : #include <string.h>
15 :
16 :
17 : #include "EbPictureControlSet.h"
18 : #include "convolve.h"
19 : #include "aom_dsp_rtcd.h"
20 :
21 :
22 : // 2 tap bilinear filters
23 : #define BIL_SUBPEL_BITS 3
24 : #define BIL_SUBPEL_SHIFTS (1 << BIL_SUBPEL_BITS)
25 :
26 : // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
27 : // or vertical direction to produce the filtered output block. Used to implement
28 : // the first-pass of 2-D separable filter.
29 : //
30 : // Produces int16_t output to retain precision for the next pass. Two filter
31 : // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
32 : // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
33 : // It defines the offset required to move from one input to the next.
34 0 : void aom_var_filter_block2d_bil_first_pass_c(const uint8_t *a, uint16_t *b,
35 : unsigned int src_pixels_per_line,
36 : unsigned int pixel_step,
37 : unsigned int output_height,
38 : unsigned int output_width,
39 : const uint8_t *filter) {
40 : unsigned int i, j;
41 :
42 0 : for (i = 0; i < output_height; ++i) {
43 0 : for (j = 0; j < output_width; ++j) {
44 0 : b[j] = ROUND_POWER_OF_TWO(
45 : (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
46 :
47 0 : ++a;
48 : }
49 :
50 0 : a += src_pixels_per_line - output_width;
51 0 : b += output_width;
52 : }
53 0 : }
54 :
55 : // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
56 : // or vertical direction to produce the filtered output block. Used to implement
57 : // the second-pass of 2-D separable filter.
58 : //
59 : // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
60 : // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
61 : // filter is applied horizontally (pixel_step = 1) or vertically
62 : // (pixel_step = stride). It defines the offset required to move from one input
63 : // to the next. Output is 8-bit.
64 0 : void aom_var_filter_block2d_bil_second_pass_c(const uint16_t *a, uint8_t *b,
65 : unsigned int src_pixels_per_line,
66 : unsigned int pixel_step,
67 : unsigned int output_height,
68 : unsigned int output_width,
69 : const uint8_t *filter) {
70 : unsigned int i, j;
71 :
72 0 : for (i = 0; i < output_height; ++i) {
73 0 : for (j = 0; j < output_width; ++j) {
74 0 : b[j] = ROUND_POWER_OF_TWO(
75 : (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
76 0 : ++a;
77 : }
78 :
79 0 : a += src_pixels_per_line - output_width;
80 0 : b += output_width;
81 : }
82 0 : }
83 :
84 : static INLINE const int16_t *av1_get_interp_filter_subpel_kernel(
85 : const InterpFilterParams filter_params, const int32_t subpel) ;
86 :
87 : DECLARE_ALIGNED(256, static const InterpKernel,
88 : av1_bilinear_filters[SUBPEL_SHIFTS]) = {
89 : { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 0, 120, 8, 0, 0, 0 },
90 : { 0, 0, 0, 112, 16, 0, 0, 0 }, { 0, 0, 0, 104, 24, 0, 0, 0 },
91 : { 0, 0, 0, 96, 32, 0, 0, 0 }, { 0, 0, 0, 88, 40, 0, 0, 0 },
92 : { 0, 0, 0, 80, 48, 0, 0, 0 }, { 0, 0, 0, 72, 56, 0, 0, 0 },
93 : { 0, 0, 0, 64, 64, 0, 0, 0 }, { 0, 0, 0, 56, 72, 0, 0, 0 },
94 : { 0, 0, 0, 48, 80, 0, 0, 0 }, { 0, 0, 0, 40, 88, 0, 0, 0 },
95 : { 0, 0, 0, 32, 96, 0, 0, 0 }, { 0, 0, 0, 24, 104, 0, 0, 0 },
96 : { 0, 0, 0, 16, 112, 0, 0, 0 }, { 0, 0, 0, 8, 120, 0, 0, 0 }
97 : };
98 :
99 : DECLARE_ALIGNED(256, static const InterpKernel,
100 : av1_sub_pel_filters_4[SUBPEL_SHIFTS]) = {
101 : { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -4, 126, 8, -2, 0, 0 },
102 : { 0, 0, -8, 122, 18, -4, 0, 0 }, { 0, 0, -10, 116, 28, -6, 0, 0 },
103 : { 0, 0, -12, 110, 38, -8, 0, 0 }, { 0, 0, -12, 102, 48, -10, 0, 0 },
104 : { 0, 0, -14, 94, 58, -10, 0, 0 }, { 0, 0, -12, 84, 66, -10, 0, 0 },
105 : { 0, 0, -12, 76, 76, -12, 0, 0 }, { 0, 0, -10, 66, 84, -12, 0, 0 },
106 : { 0, 0, -10, 58, 94, -14, 0, 0 }, { 0, 0, -10, 48, 102, -12, 0, 0 },
107 : { 0, 0, -8, 38, 110, -12, 0, 0 }, { 0, 0, -6, 28, 116, -10, 0, 0 },
108 : { 0, 0, -4, 18, 122, -8, 0, 0 }, { 0, 0, -2, 8, 126, -4, 0, 0 }
109 : };
110 : DECLARE_ALIGNED(256, static const InterpKernel,
111 : av1_sub_pel_filters_4smooth[SUBPEL_SHIFTS]) = {
112 : { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 30, 62, 34, 2, 0, 0 },
113 : { 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 },
114 : { 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 },
115 : { 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, 0, 14, 54, 48, 12, 0, 0 },
116 : { 0, 0, 12, 52, 52, 12, 0, 0 }, { 0, 0, 12, 48, 54, 14, 0, 0 },
117 : { 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 },
118 : { 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
119 : { 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 30, 0, 0 }
120 : };
121 : DECLARE_ALIGNED(256, static const InterpKernel,
122 : av1_sub_pel_filters_8[SUBPEL_SHIFTS]) = {
123 : { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, -6, 126, 8, -2, 0, 0 },
124 : { 0, 2, -10, 122, 18, -4, 0, 0 }, { 0, 2, -12, 116, 28, -8, 2, 0 },
125 : { 0, 2, -14, 110, 38, -10, 2, 0 }, { 0, 2, -14, 102, 48, -12, 2, 0 },
126 : { 0, 2, -16, 94, 58, -12, 2, 0 }, { 0, 2, -14, 84, 66, -12, 2, 0 },
127 : { 0, 2, -14, 76, 76, -14, 2, 0 }, { 0, 2, -12, 66, 84, -14, 2, 0 },
128 : { 0, 2, -12, 58, 94, -16, 2, 0 }, { 0, 2, -12, 48, 102, -14, 2, 0 },
129 : { 0, 2, -10, 38, 110, -14, 2, 0 }, { 0, 2, -8, 28, 116, -12, 2, 0 },
130 : { 0, 0, -4, 18, 122, -10, 2, 0 }, { 0, 0, -2, 8, 126, -6, 2, 0 }
131 : };
132 :
133 : DECLARE_ALIGNED(256, static const InterpKernel,
134 : av1_sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
135 : { 0, 0, 0, 128, 0, 0, 0, 0 }, { -2, 2, -6, 126, 8, -2, 2, 0 },
136 : { -2, 6, -12, 124, 16, -6, 4, -2 }, { -2, 8, -18, 120, 26, -10, 6, -2 },
137 : { -4, 10, -22, 116, 38, -14, 6, -2 }, { -4, 10, -22, 108, 48, -18, 8, -2 },
138 : { -4, 10, -24, 100, 60, -20, 8, -2 }, { -4, 10, -24, 90, 70, -22, 10, -2 },
139 : { -4, 12, -24, 80, 80, -24, 12, -4 }, { -2, 10, -22, 70, 90, -24, 10, -4 },
140 : { -2, 8, -20, 60, 100, -24, 10, -4 }, { -2, 8, -18, 48, 108, -22, 10, -4 },
141 : { -2, 6, -14, 38, 116, -22, 10, -4 }, { -2, 6, -10, 26, 120, -18, 8, -2 },
142 : { -2, 4, -6, 16, 124, -12, 6, -2 }, { 0, 2, -2, 8, 126, -6, 2, -2 }
143 : };
144 :
145 : DECLARE_ALIGNED(256, static const InterpKernel,
146 : av1_sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
147 : { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, 28, 62, 34, 2, 0, 0 },
148 : { 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 },
149 : { 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 },
150 : { 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, -2, 16, 54, 48, 12, 0, 0 },
151 : { 0, -2, 14, 52, 52, 14, -2, 0 }, { 0, 0, 12, 48, 54, 16, -2, 0 },
152 : { 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 },
153 : { 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
154 : { 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 28, 2, 0 }
155 : };
156 : // For w<=4, MULTITAP_SHARP is the same as EIGHTTAP_REGULAR
157 : static const InterpFilterParams av1_interp_4tap[SWITCHABLE_FILTERS + 1] = {
158 : { (const int16_t *)av1_sub_pel_filters_4, SUBPEL_TAPS, SUBPEL_SHIFTS,
159 : EIGHTTAP_REGULAR },
160 : { (const int16_t *)av1_sub_pel_filters_4smooth, SUBPEL_TAPS, SUBPEL_SHIFTS,
161 : EIGHTTAP_SMOOTH },
162 : { (const int16_t *)av1_sub_pel_filters_4, SUBPEL_TAPS, SUBPEL_SHIFTS,
163 : EIGHTTAP_REGULAR },
164 : { (const int16_t *)av1_bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS,
165 : BILINEAR },
166 : };
167 : static const InterpFilterParams
168 : av1_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
169 : { (const int16_t *)av1_sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS,
170 : EIGHTTAP_REGULAR },
171 : { (const int16_t *)av1_sub_pel_filters_8smooth, SUBPEL_TAPS,
172 : SUBPEL_SHIFTS, EIGHTTAP_SMOOTH },
173 : { (const int16_t *)av1_sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS,
174 : MULTITAP_SHARP },
175 : { (const int16_t *)av1_bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS,
176 : BILINEAR }
177 : };
178 0 : static INLINE const InterpFilterParams *get_4tap_interp_filter_params(
179 : const InterpFilter interp_filter) {
180 0 : return &av1_interp_4tap[interp_filter];
181 : }
182 0 : static INLINE const InterpFilterParams *av1_get_filter(int subpel_search) {
183 0 : assert(subpel_search >= USE_2_TAPS);
184 :
185 0 : switch (subpel_search) {
186 0 : case USE_2_TAPS: return get_4tap_interp_filter_params(BILINEAR);
187 0 : case USE_4_TAPS: return get_4tap_interp_filter_params(EIGHTTAP_REGULAR);
188 0 : case USE_8_TAPS: return &av1_interp_filter_params_list[EIGHTTAP_REGULAR];
189 0 : default: assert(0); return NULL;
190 : }
191 : }
192 :
193 : // Get pred block from up-sampled reference.
194 0 : void aom_upsampled_pred_c(MacroBlockD *xd, const struct AV1Common *const cm ,//const AV1_COMMON *const cm,
195 : int mi_row, int mi_col, const MV *const mv,
196 : uint8_t *comp_pred, int width, int height,
197 : int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
198 : int ref_stride, int subpel_search) {
199 : (void)xd;
200 : (void)cm;
201 : (void)mi_row;
202 : (void)mi_col;
203 : (void)mv;
204 0 : const InterpFilterParams *filter = av1_get_filter(subpel_search);
205 :
206 0 : if (!subpel_x_q3 && !subpel_y_q3) {
207 0 : for (int i = 0; i < height; i++) {
208 0 : memcpy(comp_pred, ref, width * sizeof(*comp_pred));
209 0 : comp_pred += width;
210 0 : ref += ref_stride;
211 : }
212 0 : } else if (!subpel_y_q3) {
213 : const int16_t *const kernel =
214 0 : av1_get_interp_filter_subpel_kernel(*filter, subpel_x_q3 << 1);
215 0 : aom_convolve8_horiz_c(ref, ref_stride, comp_pred, width, kernel, 16, NULL,
216 : -1, width, height);
217 0 : } else if (!subpel_x_q3) {
218 : const int16_t *const kernel =
219 0 : av1_get_interp_filter_subpel_kernel(*filter, subpel_y_q3 << 1);
220 0 : aom_convolve8_vert_c(ref, ref_stride, comp_pred, width, NULL, -1, kernel,
221 : 16, width, height);
222 : } else {
223 : DECLARE_ALIGNED(16, uint8_t,
224 : temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
225 : const int16_t *const kernel_x =
226 0 : av1_get_interp_filter_subpel_kernel(*filter, subpel_x_q3 << 1);
227 : const int16_t *const kernel_y =
228 0 : av1_get_interp_filter_subpel_kernel(*filter, subpel_y_q3 << 1);
229 0 : const int intermediate_height =
230 0 : (((height - 1) * 8 + subpel_y_q3) >> 3) + filter->taps;
231 0 : assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
232 0 : aom_convolve8_horiz_c(ref - ref_stride * ((filter->taps >> 1) - 1),
233 : ref_stride, temp, MAX_SB_SIZE, kernel_x, 16, NULL, -1,
234 : width, intermediate_height);
235 0 : aom_convolve8_vert_c(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1),
236 : MAX_SB_SIZE, comp_pred, width, NULL, -1, kernel_y, 16,
237 : width, height);
238 : }
239 0 : }
240 :
241 0 : void aom_comp_avg_upsampled_pred_c(MacroBlockD *xd, const AV1_COMMON *const cm,
242 : int mi_row, int mi_col, const MV *const mv,
243 : uint8_t *comp_pred, const uint8_t *pred,
244 : int width, int height, int subpel_x_q3,
245 : int subpel_y_q3, const uint8_t *ref,
246 : int ref_stride, int subpel_search) {
247 : int i, j;
248 :
249 0 : aom_upsampled_pred_c(xd, (const struct AV1Common *const)cm, mi_row, mi_col, mv, comp_pred, width, height,
250 : subpel_x_q3, subpel_y_q3, ref, ref_stride, subpel_search);
251 0 : for (i = 0; i < height; i++) {
252 0 : for (j = 0; j < width; j++) {
253 0 : comp_pred[j] = ROUND_POWER_OF_TWO(comp_pred[j] + pred[j], 1);
254 : }
255 0 : comp_pred += width;
256 0 : pred += width;
257 : }
258 0 : }
259 :
260 0 : void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
261 : int height, const uint8_t *ref, int ref_stride,
262 : const uint8_t *mask, int mask_stride,
263 : int invert_mask) {
264 : int i, j;
265 0 : const uint8_t *src0 = invert_mask ? pred : ref;
266 0 : const uint8_t *src1 = invert_mask ? ref : pred;
267 0 : const int stride0 = invert_mask ? width : ref_stride;
268 0 : const int stride1 = invert_mask ? ref_stride : width;
269 0 : for (i = 0; i < height; ++i) {
270 0 : for (j = 0; j < width; ++j) {
271 0 : comp_pred[j] = AOM_BLEND_A64(mask[j], src0[j], src1[j]);
272 : }
273 0 : comp_pred += width;
274 0 : src0 += stride0;
275 0 : src1 += stride1;
276 0 : mask += mask_stride;
277 : }
278 0 : }
279 :
280 0 : void aom_comp_mask_upsampled_pred_c(MacroBlockD *xd, const AV1_COMMON *const cm,
281 : int mi_row, int mi_col, const MV *const mv,
282 : uint8_t *comp_pred, const uint8_t *pred,
283 : int width, int height, int subpel_x_q3,
284 : int subpel_y_q3, const uint8_t *ref,
285 : int ref_stride, const uint8_t *mask,
286 : int mask_stride, int invert_mask,
287 : int subpel_search) {
288 0 : if (subpel_x_q3 | subpel_y_q3) {
289 0 : aom_upsampled_pred_c(xd, (const struct AV1Common *const)cm, mi_row, mi_col, mv, comp_pred, width, height,
290 : subpel_x_q3, subpel_y_q3, ref, ref_stride,
291 : subpel_search);
292 0 : ref = comp_pred;
293 0 : ref_stride = width;
294 : }
295 0 : aom_comp_mask_pred_c(comp_pred, pred, width, height, ref, ref_stride, mask,
296 : mask_stride, invert_mask);
297 0 : }
298 :
299 :
300 0 : static INLINE void obmc_variance(const uint8_t *pre, int pre_stride,
301 : const int32_t *wsrc, const int32_t *mask,
302 : int w, int h, unsigned int *sse, int *sum) {
303 : int i, j;
304 :
305 0 : *sse = 0;
306 0 : *sum = 0;
307 :
308 0 : for (i = 0; i < h; i++) {
309 0 : for (j = 0; j < w; j++) {
310 0 : int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
311 0 : *sum += diff;
312 0 : *sse += diff * diff;
313 : }
314 :
315 0 : pre += pre_stride;
316 0 : wsrc += w;
317 0 : mask += w;
318 : }
319 0 : }
320 :
321 : #define OBMC_VAR(W, H) \
322 : unsigned int aom_obmc_variance##W##x##H##_c( \
323 : const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
324 : const int32_t *mask, unsigned int *sse) { \
325 : int sum; \
326 : obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
327 : return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
328 : }
329 :
330 : #define OBMC_SUBPIX_VAR(W, H) \
331 : unsigned int aom_obmc_sub_pixel_variance##W##x##H##_c( \
332 : const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
333 : const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
334 : uint16_t fdata3[(H + 1) * W]; \
335 : uint8_t temp2[H * W]; \
336 : \
337 : aom_var_filter_block2d_bil_first_pass_c(pre, fdata3, pre_stride, 1, H + 1, \
338 : W, bilinear_filters_2t[xoffset]); \
339 : aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \
340 : bilinear_filters_2t[yoffset]); \
341 : \
342 : return aom_obmc_variance##W##x##H##_c(temp2, W, wsrc, mask, sse); \
343 : }
344 :
345 0 : OBMC_VAR(4, 4)
346 0 : OBMC_SUBPIX_VAR(4, 4)
347 :
348 0 : OBMC_VAR(4, 8)
349 0 : OBMC_SUBPIX_VAR(4, 8)
350 :
351 0 : OBMC_VAR(8, 4)
352 0 : OBMC_SUBPIX_VAR(8, 4)
353 :
354 0 : OBMC_VAR(8, 8)
355 0 : OBMC_SUBPIX_VAR(8, 8)
356 :
357 0 : OBMC_VAR(8, 16)
358 0 : OBMC_SUBPIX_VAR(8, 16)
359 :
360 0 : OBMC_VAR(16, 8)
361 0 : OBMC_SUBPIX_VAR(16, 8)
362 :
363 0 : OBMC_VAR(16, 16)
364 0 : OBMC_SUBPIX_VAR(16, 16)
365 :
366 0 : OBMC_VAR(16, 32)
367 0 : OBMC_SUBPIX_VAR(16, 32)
368 :
369 0 : OBMC_VAR(32, 16)
370 0 : OBMC_SUBPIX_VAR(32, 16)
371 :
372 0 : OBMC_VAR(32, 32)
373 0 : OBMC_SUBPIX_VAR(32, 32)
374 :
375 0 : OBMC_VAR(32, 64)
376 0 : OBMC_SUBPIX_VAR(32, 64)
377 :
378 0 : OBMC_VAR(64, 32)
379 0 : OBMC_SUBPIX_VAR(64, 32)
380 :
381 0 : OBMC_VAR(64, 64)
382 0 : OBMC_SUBPIX_VAR(64, 64)
383 :
384 0 : OBMC_VAR(64, 128)
385 0 : OBMC_SUBPIX_VAR(64, 128)
386 :
387 0 : OBMC_VAR(128, 64)
388 0 : OBMC_SUBPIX_VAR(128, 64)
389 :
390 0 : OBMC_VAR(128, 128)
391 0 : OBMC_SUBPIX_VAR(128, 128)
392 :
393 0 : OBMC_VAR(4, 16)
394 0 : OBMC_SUBPIX_VAR(4, 16)
395 0 : OBMC_VAR(16, 4)
396 0 : OBMC_SUBPIX_VAR(16, 4)
397 0 : OBMC_VAR(8, 32)
398 0 : OBMC_SUBPIX_VAR(8, 32)
399 0 : OBMC_VAR(32, 8)
400 0 : OBMC_SUBPIX_VAR(32, 8)
401 0 : OBMC_VAR(16, 64)
402 0 : OBMC_SUBPIX_VAR(16, 64)
403 0 : OBMC_VAR(64, 16)
404 0 : OBMC_SUBPIX_VAR(64, 16)
405 :
406 0 : void eb_aom_highbd_8_mse16x16_c(const uint8_t* src_ptr, int32_t source_stride,
407 : const uint8_t* ref_ptr, int32_t recon_stride, uint32_t* sse) {
408 0 : const uint16_t* a = CONVERT_TO_SHORTPTR(src_ptr);
409 0 : const uint16_t* b = CONVERT_TO_SHORTPTR(ref_ptr);
410 0 : uint64_t tsse = 0;
411 :
412 0 : for (int i = 0; i < 16; ++i) {
413 0 : for (int j = 0; j < 16; ++j) {
414 0 : const int diff = a[j] - b[j];
415 0 : tsse += (uint32_t)(diff * diff);
416 : }
417 0 : a += source_stride;
418 0 : b += recon_stride;
419 : }
420 0 : *sse = (uint32_t)tsse;
421 0 : }
|