Line data Source code
1 : /*
2 : * Copyright(c) 2019 Intel Corporation
3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 : */
5 :
6 : /*
7 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
8 : *
9 : * This source code is subject to the terms of the BSD 2 Clause License and
10 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
11 : * was not distributed with this source code in the LICENSE file, you can
12 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
13 : * Media Patent License 1.0 was not distributed with this source code in the
14 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
15 : */
16 :
17 : #include <assert.h>
18 : #include "smmintrin.h"
19 :
20 : #include "EbDefinitions.h"
21 :
22 : #include "aom_dsp_rtcd.h"
23 :
24 : // Blending with alpha mask. Mask values come from the range [0, 64],
25 : // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
26 : // be the same as dst, or dst can be different from both sources.
27 :
28 : // NOTE(david.barker): The input and output of aom_blend_a64_d16_mask_c() are
29 : // in a higher intermediate precision, and will later be rounded down to pixel
30 : // precision.
31 : // Thus, in order to avoid double-rounding, we want to use normal right shifts
32 : // within this function, not ROUND_POWER_OF_TWO.
33 : // This works because of the identity:
34 : // ROUND_POWER_OF_TWO(x >> y, z) == ROUND_POWER_OF_TWO(x, y+z)
35 : //
36 : // In contrast, the output of the non-d16 functions will not be further rounded,
37 : // so we *should* use ROUND_POWER_OF_TWO there.
38 :
39 0 : void aom_lowbd_blend_a64_d16_mask_c(
40 : uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
41 : uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
42 : const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw,
43 : int subh, ConvolveParams *conv_params)
44 : {
45 : int i, j;
46 0 : const int bd = 8;
47 0 : const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
48 0 : const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
49 0 : (1 << (offset_bits - conv_params->round_1 - 1));
50 0 : const int round_bits =
51 0 : 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
52 :
53 : assert(IMPLIES((void *)src0 == dst, src0_stride == dst_stride));
54 : assert(IMPLIES((void *)src1 == dst, src1_stride == dst_stride));
55 :
56 : assert(h >= 4);
57 : assert(w >= 4);
58 :
59 0 : if (subw == 0 && subh == 0) {
60 0 : for (i = 0; i < h; ++i) {
61 0 : for (j = 0; j < w; ++j) {
62 : int32_t res;
63 0 : const int m = mask[i * mask_stride + j];
64 0 : res = ((m * (int32_t)src0[i * src0_stride + j] +
65 0 : (AOM_BLEND_A64_MAX_ALPHA - m) *
66 0 : (int32_t)src1[i * src1_stride + j]) >>
67 : AOM_BLEND_A64_ROUND_BITS);
68 0 : res -= round_offset;
69 0 : dst[i * dst_stride + j] =
70 0 : clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
71 : }
72 : }
73 : }
74 0 : else if (subw == 1 && subh == 1) {
75 0 : for (i = 0; i < h; ++i) {
76 0 : for (j = 0; j < w; ++j) {
77 : int32_t res;
78 0 : const int m = ROUND_POWER_OF_TWO(
79 : mask[(2 * i) * mask_stride + (2 * j)] +
80 : mask[(2 * i + 1) * mask_stride + (2 * j)] +
81 : mask[(2 * i) * mask_stride + (2 * j + 1)] +
82 : mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
83 : 2);
84 0 : res = ((m * (int32_t)src0[i * src0_stride + j] +
85 0 : (AOM_BLEND_A64_MAX_ALPHA - m) *
86 0 : (int32_t)src1[i * src1_stride + j]) >>
87 : AOM_BLEND_A64_ROUND_BITS);
88 0 : res -= round_offset;
89 0 : dst[i * dst_stride + j] =
90 0 : clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
91 : }
92 : }
93 : }
94 0 : else if (subw == 1 && subh == 0) {
95 0 : for (i = 0; i < h; ++i) {
96 0 : for (j = 0; j < w; ++j) {
97 : int32_t res;
98 0 : const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
99 : mask[i * mask_stride + (2 * j + 1)]);
100 0 : res = ((m * (int32_t)src0[i * src0_stride + j] +
101 0 : (AOM_BLEND_A64_MAX_ALPHA - m) *
102 0 : (int32_t)src1[i * src1_stride + j]) >>
103 : AOM_BLEND_A64_ROUND_BITS);
104 0 : res -= round_offset;
105 0 : dst[i * dst_stride + j] =
106 0 : clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
107 : }
108 : }
109 : }
110 : else {
111 0 : for (i = 0; i < h; ++i) {
112 0 : for (j = 0; j < w; ++j) {
113 : int32_t res;
114 0 : const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
115 : mask[(2 * i + 1) * mask_stride + j]);
116 0 : res = ((int32_t)(m * (int32_t)src0[i * src0_stride + j] +
117 0 : (AOM_BLEND_A64_MAX_ALPHA - m) *
118 0 : (int32_t)src1[i * src1_stride + j]) >>
119 : AOM_BLEND_A64_ROUND_BITS);
120 0 : res -= round_offset;
121 0 : dst[i * dst_stride + j] =
122 0 : clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
123 : }
124 : }
125 : }
126 0 : }
127 :
128 0 : void aom_highbd_blend_a64_d16_mask_c(
129 : uint8_t *dst_8, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
130 : uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
131 : const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw,
132 : int subh, ConvolveParams *conv_params, const int bd)
133 : {
134 0 : const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
135 0 : const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
136 0 : (1 << (offset_bits - conv_params->round_1 - 1));
137 0 : const int round_bits =
138 0 : 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
139 0 : uint16_t *dst = (uint16_t *)dst_8;
140 :
141 : assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
142 : assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
143 :
144 : assert(h >= 1);
145 : assert(w >= 1);
146 : assert(IS_POWER_OF_TWO(h));
147 : assert(IS_POWER_OF_TWO(w));
148 :
149 : // excerpt from clip_pixel_highbd()
150 : // set saturation_value to (1 << bd) - 1
151 : unsigned int saturation_value;
152 0 : switch (bd) {
153 0 : case 8:
154 0 : default: saturation_value = 255; break;
155 0 : case 10: saturation_value = 1023; break;
156 0 : case 12: saturation_value = 4095; break;
157 : }
158 :
159 0 : if (subw == 0 && subh == 0) {
160 0 : for (int i = 0; i < h; ++i) {
161 0 : for (int j = 0; j < w; ++j) {
162 : int32_t res;
163 0 : const int m = mask[j];
164 0 : res = ((m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
165 : AOM_BLEND_A64_ROUND_BITS);
166 0 : res -= round_offset;
167 0 : unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
168 0 : dst[j] = AOMMIN(v, saturation_value);
169 : }
170 0 : mask += mask_stride;
171 0 : src0 += src0_stride;
172 0 : src1 += src1_stride;
173 0 : dst += dst_stride;
174 : }
175 : }
176 0 : else if (subw == 1 && subh == 1) {
177 0 : for (int i = 0; i < h; ++i) {
178 0 : for (int j = 0; j < w; ++j) {
179 : int32_t res;
180 0 : const int m = ROUND_POWER_OF_TWO(
181 : mask[2 * j] + mask[mask_stride + 2 * j] + mask[2 * j + 1] +
182 : mask[mask_stride + 2 * j + 1],
183 : 2);
184 0 : res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
185 : AOM_BLEND_A64_ROUND_BITS;
186 0 : res -= round_offset;
187 0 : unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
188 0 : dst[j] = AOMMIN(v, saturation_value);
189 : }
190 0 : mask += 2 * mask_stride;
191 0 : src0 += src0_stride;
192 0 : src1 += src1_stride;
193 0 : dst += dst_stride;
194 : }
195 : }
196 0 : else if (subw == 1 && subh == 0) {
197 0 : for (int i = 0; i < h; ++i) {
198 0 : for (int j = 0; j < w; ++j) {
199 : int32_t res;
200 0 : const int m = AOM_BLEND_AVG(mask[2 * j], mask[2 * j + 1]);
201 0 : res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
202 : AOM_BLEND_A64_ROUND_BITS;
203 0 : res -= round_offset;
204 0 : unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
205 0 : dst[j] = AOMMIN(v, saturation_value);
206 : }
207 0 : mask += mask_stride;
208 0 : src0 += src0_stride;
209 0 : src1 += src1_stride;
210 0 : dst += dst_stride;
211 : }
212 : }
213 : else {
214 0 : for (int i = 0; i < h; ++i) {
215 0 : for (int j = 0; j < w; ++j) {
216 : int32_t res;
217 0 : const int m = AOM_BLEND_AVG(mask[j], mask[mask_stride + j]);
218 0 : res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
219 : AOM_BLEND_A64_ROUND_BITS;
220 0 : res -= round_offset;
221 0 : unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
222 0 : dst[j] = AOMMIN(v, saturation_value);
223 : }
224 0 : mask += 2 * mask_stride;
225 0 : src0 += src0_stride;
226 0 : src1 += src1_stride;
227 0 : dst += dst_stride;
228 : }
229 : }
230 0 : }
231 :
232 :
233 : // Blending with alpha mask. Mask values come from the range [0, 64],
234 : // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
235 : // be the same as dst, or dst can be different from both sources.
236 :
237 295896 : void aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride,
238 : const uint8_t *src0, uint32_t src0_stride,
239 : const uint8_t *src1, uint32_t src1_stride,
240 : const uint8_t *mask, uint32_t mask_stride, int w,
241 : int h, int subw, int subh)
242 : {
243 : int i, j;
244 :
245 : assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
246 : assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
247 :
248 : assert(h >= 1);
249 : assert(w >= 1);
250 : assert(IS_POWER_OF_TWO(h));
251 : assert(IS_POWER_OF_TWO(w));
252 :
253 295896 : if (subw == 0 && subh == 0) {
254 2201290 : for (i = 0; i < h; ++i) {
255 5716180 : for (j = 0; j < w; ++j) {
256 3810780 : const int m = mask[i * mask_stride + j];
257 3810780 : dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
258 : src1[i * src1_stride + j]);
259 : }
260 : }
261 : }
262 0 : else if (subw == 1 && subh == 1) {
263 0 : for (i = 0; i < h; ++i) {
264 0 : for (j = 0; j < w; ++j) {
265 0 : const int m = ROUND_POWER_OF_TWO(
266 : mask[(2 * i) * mask_stride + (2 * j)] +
267 : mask[(2 * i + 1) * mask_stride + (2 * j)] +
268 : mask[(2 * i) * mask_stride + (2 * j + 1)] +
269 : mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
270 : 2);
271 0 : dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
272 : src1[i * src1_stride + j]);
273 : }
274 : }
275 : }
276 0 : else if (subw == 1 && subh == 0) {
277 0 : for (i = 0; i < h; ++i) {
278 0 : for (j = 0; j < w; ++j) {
279 0 : const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
280 : mask[i * mask_stride + (2 * j + 1)]);
281 0 : dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
282 : src1[i * src1_stride + j]);
283 : }
284 : }
285 : }
286 : else {
287 0 : for (i = 0; i < h; ++i) {
288 0 : for (j = 0; j < w; ++j) {
289 0 : const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
290 : mask[(2 * i + 1) * mask_stride + j]);
291 0 : dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
292 : src1[i * src1_stride + j]);
293 : }
294 : }
295 : }
296 295896 : }
297 :
298 0 : void aom_highbd_blend_a64_mask_c(uint8_t *dst_8, uint32_t dst_stride,
299 : const uint8_t *src0_8, uint32_t src0_stride,
300 : const uint8_t *src1_8, uint32_t src1_stride,
301 : const uint8_t *mask, uint32_t mask_stride,
302 : int w, int h, int subw, int subh, int bd)
303 : {
304 : int i, j;
305 0 : uint16_t *dst = (uint16_t *)dst_8;
306 0 : const uint16_t *src0 = (uint16_t *)src0_8;
307 0 : const uint16_t *src1 = (uint16_t *)src1_8;
308 : (void)bd;
309 :
310 : assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
311 : assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
312 :
313 : assert(h >= 1);
314 : assert(w >= 1);
315 : assert(IS_POWER_OF_TWO(h));
316 : assert(IS_POWER_OF_TWO(w));
317 :
318 : assert(bd == 8 || bd == 10 || bd == 12);
319 :
320 0 : if (subw == 0 && subh == 0) {
321 0 : for (i = 0; i < h; ++i) {
322 0 : for (j = 0; j < w; ++j) {
323 0 : const int m = mask[i * mask_stride + j];
324 0 : dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
325 : src1[i * src1_stride + j]);
326 : }
327 : }
328 : }
329 0 : else if (subw == 1 && subh == 1) {
330 0 : for (i = 0; i < h; ++i) {
331 0 : for (j = 0; j < w; ++j) {
332 0 : const int m = ROUND_POWER_OF_TWO(
333 : mask[(2 * i) * mask_stride + (2 * j)] +
334 : mask[(2 * i + 1) * mask_stride + (2 * j)] +
335 : mask[(2 * i) * mask_stride + (2 * j + 1)] +
336 : mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
337 : 2);
338 0 : dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
339 : src1[i * src1_stride + j]);
340 : }
341 : }
342 : }
343 0 : else if (subw == 1 && subh == 0) {
344 0 : for (i = 0; i < h; ++i) {
345 0 : for (j = 0; j < w; ++j) {
346 0 : const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
347 : mask[i * mask_stride + (2 * j + 1)]);
348 0 : dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
349 : src1[i * src1_stride + j]);
350 : }
351 : }
352 : }
353 : else {
354 0 : for (i = 0; i < h; ++i) {
355 0 : for (j = 0; j < w; ++j) {
356 0 : const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
357 : mask[(2 * i + 1) * mask_stride + j]);
358 0 : dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
359 : src1[i * src1_stride + j]);
360 : }
361 : }
362 : }
363 0 : }
364 :
365 : /*Vertical mask related blend functions*/
366 0 : void aom_blend_a64_vmask_c(uint8_t *dst, uint32_t dst_stride,
367 : const uint8_t *src0, uint32_t src0_stride,
368 : const uint8_t *src1, uint32_t src1_stride,
369 : const uint8_t *mask, int w, int h)
370 : {
371 : int i, j;
372 :
373 : assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
374 : assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
375 :
376 : assert(h >= 1);
377 : assert(w >= 1);
378 : assert(IS_POWER_OF_TWO(h));
379 : assert(IS_POWER_OF_TWO(w));
380 :
381 0 : for (i = 0; i < h; ++i) {
382 0 : const int m = mask[i];
383 0 : for (j = 0; j < w; ++j) {
384 0 : dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
385 : src1[i * src1_stride + j]);
386 : }
387 : }
388 0 : }
389 :
390 0 : void aom_highbd_blend_a64_vmask_c(uint8_t *dst_8, uint32_t dst_stride,
391 : const uint8_t *src0_8, uint32_t src0_stride,
392 : const uint8_t *src1_8, uint32_t src1_stride,
393 : const uint8_t *mask, int w, int h, int bd)
394 : {
395 : int i, j;
396 0 : uint16_t *dst = (uint16_t *)(dst_8);// CONVERT_TO_SHORTPTR(dst_8);
397 0 : const uint16_t *src0 = (uint16_t *)(src0_8);//CONVERT_TO_SHORTPTR(src0_8);
398 0 : const uint16_t *src1 = (uint16_t *)(src1_8);//CONVERT_TO_SHORTPTR(src1_8);
399 : (void)bd;
400 :
401 : assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
402 : assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
403 :
404 : assert(h >= 1);
405 : assert(w >= 1);
406 : assert(IS_POWER_OF_TWO(h));
407 : assert(IS_POWER_OF_TWO(w));
408 :
409 : assert(bd == 8 || bd == 10 || bd == 12);
410 :
411 0 : for (i = 0; i < h; ++i) {
412 0 : const int m = mask[i];
413 0 : for (j = 0; j < w; ++j) {
414 0 : dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
415 : src1[i * src1_stride + j]);
416 : }
417 : }
418 0 : }
419 :
420 : /*Horizontal mask related blend functions*/
421 0 : void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride,
422 : const uint8_t *src0, uint32_t src0_stride,
423 : const uint8_t *src1, uint32_t src1_stride,
424 : const uint8_t *mask, int w, int h)
425 : {
426 : int i, j;
427 :
428 : assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
429 : assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
430 :
431 : assert(h >= 1);
432 : assert(w >= 1);
433 : assert(IS_POWER_OF_TWO(h));
434 : assert(IS_POWER_OF_TWO(w));
435 :
436 0 : for (i = 0; i < h; ++i) {
437 0 : for (j = 0; j < w; ++j) {
438 0 : dst[i * dst_stride + j] = AOM_BLEND_A64(
439 : mask[j], src0[i * src0_stride + j], src1[i * src1_stride + j]);
440 : }
441 : }
442 0 : }
443 :
444 0 : void aom_highbd_blend_a64_hmask_c(uint8_t *dst_8, uint32_t dst_stride,
445 : const uint8_t *src0_8, uint32_t src0_stride,
446 : const uint8_t *src1_8, uint32_t src1_stride,
447 : const uint8_t *mask, int w, int h, int bd)
448 : {
449 : int i, j;
450 0 : uint16_t *dst = (uint16_t *)(dst_8);// CONVERT_TO_SHORTPTR(dst_8);
451 0 : const uint16_t *src0 = (uint16_t *)(src0_8);//CONVERT_TO_SHORTPTR(src0_8);
452 0 : const uint16_t *src1 = (uint16_t *)(src1_8);//CONVERT_TO_SHORTPTR(src1_8);
453 : (void)bd;
454 :
455 : assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
456 : assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
457 :
458 : assert(h >= 1);
459 : assert(w >= 1);
460 : assert(IS_POWER_OF_TWO(h));
461 : assert(IS_POWER_OF_TWO(w));
462 :
463 : assert(bd == 8 || bd == 10 || bd == 12);
464 :
465 0 : for (i = 0; i < h; ++i) {
466 0 : for (j = 0; j < w; ++j) {
467 0 : dst[i * dst_stride + j] = AOM_BLEND_A64(
468 : mask[j], src0[i * src0_stride + j], src1[i * src1_stride + j]);
469 : }
470 : }
471 0 : }
|