Line data Source code
1 : /*
2 : * Copyright(c) 2019 Intel Corporation
3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 : */
5 :
6 : /*
7 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
8 : *
9 : * This source code is subject to the terms of the BSD 2 Clause License and
10 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
11 : * was not distributed with this source code in the LICENSE file, you can
12 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
13 : * Media Patent License 1.0 was not distributed with this source code in the
14 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
15 : */
16 :
17 : /*********************************
18 : * Includes
19 : *********************************/
20 :
21 : #include "EbPictureOperators.h"
22 : #include "EbPackUnPack.h"
23 : #include "aom_dsp_rtcd.h"
24 :
25 : #define VARIANCE_PRECISION 16
26 : #define MEAN_PRECISION (VARIANCE_PRECISION >> 1)
27 :
28 : void *eb_aom_memset16(void *dest, int32_t val, size_t length);
29 :
30 : /*********************************
31 : * Picture Copy
32 : *********************************/
33 :
34 44199000 : void pic_copy_kernel_8bit(
35 : EbByte src,
36 : uint32_t src_stride,
37 : EbByte dst,
38 : uint32_t dst_stride,
39 : uint32_t area_width,
40 : uint32_t area_height)
41 : {
42 599973000 : for (uint32_t j = 0; j < area_height; j++)
43 555774000 : memcpy(dst + j * dst_stride, src + j * src_stride, area_width);
44 44199000 : }
45 0 : void pic_copy_kernel_16bit(
46 : uint16_t *src,
47 : uint32_t src_stride,
48 : uint16_t *dst,
49 : uint32_t dst_stride,
50 : uint32_t width,
51 : uint32_t height)
52 : {
53 0 : for (uint32_t j = 0; j < height; j++)
54 0 : memcpy(dst + j * dst_stride, src + j * src_stride, sizeof(uint16_t) * width);
55 0 : }
56 :
57 43998500 : EbErrorType picture_copy(
58 : EbPictureBufferDesc *src,
59 : uint32_t src_luma_origin_index,
60 : uint32_t src_chroma_origin_index,
61 : EbPictureBufferDesc *dst,
62 : uint32_t dst_luma_origin_index,
63 : uint32_t dst_chroma_origin_index,
64 : uint32_t area_width,
65 : uint32_t area_height,
66 : uint32_t chroma_area_width,
67 : uint32_t chroma_area_height,
68 : uint32_t component_mask,
69 : EbBool hbd)
70 : {
71 43998500 : EbErrorType return_error = EB_ErrorNone;
72 :
73 43998500 : if (hbd) {
74 0 : if (component_mask & PICTURE_BUFFER_DESC_Y_FLAG)
75 0 : pic_copy_kernel_16bit(
76 0 : ((uint16_t*) src->buffer_y) + src_luma_origin_index,
77 0 : src->stride_y,
78 0 : ((uint16_t*) dst->buffer_y) + dst_luma_origin_index,
79 0 : dst->stride_y,
80 : area_width,
81 : area_height);
82 :
83 0 : if (component_mask & PICTURE_BUFFER_DESC_Cb_FLAG)
84 0 : pic_copy_kernel_16bit(
85 0 : ((uint16_t *) src->buffer_cb) + src_chroma_origin_index,
86 0 : src->stride_cb,
87 0 : ((uint16_t *) dst->buffer_cb) + dst_chroma_origin_index,
88 0 : dst->stride_cb,
89 : chroma_area_width,
90 : chroma_area_height);
91 :
92 0 : if (component_mask & PICTURE_BUFFER_DESC_Cr_FLAG)
93 0 : pic_copy_kernel_16bit(
94 0 : ((uint16_t *) src->buffer_cr) + src_chroma_origin_index,
95 0 : src->stride_cr,
96 0 : ((uint16_t *) dst->buffer_cr) + dst_chroma_origin_index,
97 0 : dst->stride_cr,
98 : chroma_area_width,
99 : chroma_area_height);
100 : } else {
101 43998500 : if (component_mask & PICTURE_BUFFER_DESC_Y_FLAG)
102 27326200 : pic_copy_kernel_8bit(
103 27326200 : &(src->buffer_y[src_luma_origin_index]),
104 27326200 : src->stride_y,
105 27326200 : &(dst->buffer_y[dst_luma_origin_index]),
106 27326200 : dst->stride_y,
107 : area_width,
108 : area_height);
109 :
110 43981400 : if (component_mask & PICTURE_BUFFER_DESC_Cb_FLAG)
111 7579990 : pic_copy_kernel_8bit(
112 7579990 : &(src->buffer_cb[src_chroma_origin_index]),
113 7579990 : src->stride_cb,
114 7579990 : &(dst->buffer_cb[dst_chroma_origin_index]),
115 7579990 : dst->stride_cb,
116 : chroma_area_width,
117 : chroma_area_height);
118 :
119 43980400 : if (component_mask & PICTURE_BUFFER_DESC_Cr_FLAG)
120 9115840 : pic_copy_kernel_8bit(
121 9115840 : &(src->buffer_cr[src_chroma_origin_index]),
122 9115840 : src->stride_cr,
123 9115840 : &(dst->buffer_cr[dst_chroma_origin_index]),
124 9115840 : dst->stride_cr,
125 : chroma_area_width,
126 : chroma_area_height);
127 : }
128 :
129 43966900 : return return_error;
130 : }
131 :
132 : /*******************************************
133 : * Residual Kernel 16bit
134 : Computes the residual data
135 : *******************************************/
136 0 : void residual_kernel16bit(
137 : uint16_t *input,
138 : uint32_t input_stride,
139 : uint16_t *pred,
140 : uint32_t pred_stride,
141 : int16_t *residual,
142 : uint32_t residual_stride,
143 : uint32_t area_width,
144 : uint32_t area_height)
145 : {
146 : uint32_t columnIndex;
147 0 : uint32_t row_index = 0;
148 :
149 0 : while (row_index < area_height) {
150 0 : columnIndex = 0;
151 0 : while (columnIndex < area_width) {
152 0 : residual[columnIndex] = ((int16_t)input[columnIndex]) - ((int16_t)pred[columnIndex]);
153 0 : ++columnIndex;
154 : }
155 :
156 0 : input += input_stride;
157 0 : pred += pred_stride;
158 0 : residual += residual_stride;
159 0 : ++row_index;
160 : }
161 :
162 0 : return;
163 : }
164 : /*******************************************
165 : * Residual Kernel
166 : Computes the residual data
167 : *******************************************/
168 0 : void residual_kernel8bit_c(
169 : uint8_t *input,
170 : uint32_t input_stride,
171 : uint8_t *pred,
172 : uint32_t pred_stride,
173 : int16_t *residual,
174 : uint32_t residual_stride,
175 : uint32_t area_width,
176 : uint32_t area_height)
177 : {
178 : uint32_t columnIndex;
179 0 : uint32_t row_index = 0;
180 :
181 0 : while (row_index < area_height) {
182 0 : columnIndex = 0;
183 0 : while (columnIndex < area_width) {
184 0 : residual[columnIndex] = ((int16_t)input[columnIndex]) - ((int16_t)pred[columnIndex]);
185 0 : ++columnIndex;
186 : }
187 :
188 0 : input += input_stride;
189 0 : pred += pred_stride;
190 0 : residual += residual_stride;
191 0 : ++row_index;
192 : }
193 :
194 0 : return;
195 : }
196 :
197 0 : void residual_kernel_subsampled(
198 : uint8_t *input,
199 : uint32_t input_stride,
200 : uint8_t *pred,
201 : uint32_t pred_stride,
202 : int16_t *residual,
203 : uint32_t residual_stride,
204 : uint32_t area_width,
205 : uint32_t area_height,
206 : uint8_t last_line)
207 : {
208 : uint32_t column_index;
209 0 : uint32_t row_index = 0;
210 :
211 0 : uint8_t *input_O = input;
212 0 : uint8_t *pred_O = pred;
213 0 : int16_t *residual_O = residual;
214 :
215 :
216 : //hard code subampling dimensions, keep residual_stride
217 0 : area_height >>= 1;
218 0 : input_stride <<= 1;
219 0 : pred_stride <<= 1;
220 :
221 :
222 0 : while (row_index < area_height) {
223 0 : column_index = 0;
224 0 : while (column_index < area_width) {
225 0 : residual[column_index] = ((int16_t)input[column_index]) - ((int16_t)pred[column_index]);
226 0 : residual[column_index + residual_stride] = ((int16_t)input[column_index]) - ((int16_t)pred[column_index]);
227 0 : ++column_index;
228 : }
229 :
230 0 : input += input_stride;
231 0 : pred += pred_stride;
232 0 : residual += (residual_stride << 1);
233 0 : ++row_index;
234 : }
235 :
236 : //do the last line:
237 0 : if (last_line) {
238 0 : input_stride = input_stride / 2;
239 0 : pred_stride = pred_stride / 2;
240 0 : area_height = area_height * 2;
241 0 : column_index = 0;
242 0 : while (column_index < area_width) {
243 0 : residual_O[(area_height - 1)*residual_stride + column_index] = ((int16_t)input_O[(area_height - 1)*input_stride + column_index]) - ((int16_t)pred_O[(area_height - 1)*pred_stride + column_index]);
244 0 : ++column_index;
245 : }
246 :
247 : }
248 0 : return;
249 : }
250 :
251 0 : uint64_t ComputeNxMSatd8x8Units_U8(
252 : uint8_t *src, //int16_t *diff, // input parameter, diff samples Ptr
253 : uint32_t src_stride, //uint32_t diffStride, // input parameter, source stride
254 : uint32_t width, // input parameter, block width (N)
255 : uint32_t height, // input parameter, block height (M)
256 : uint64_t *dc_value)
257 : {
258 0 : uint64_t satd = 0;
259 : uint32_t blockIndexInWidth;
260 : uint32_t blockIndexInHeight;
261 :
262 0 : for (blockIndexInHeight = 0; blockIndexInHeight < height >> 3; ++blockIndexInHeight) {
263 0 : for (blockIndexInWidth = 0; blockIndexInWidth < width >> 3; ++blockIndexInWidth)
264 0 : satd += compute8x8_satd_u8(&(src[(blockIndexInWidth << 3) + (blockIndexInHeight << 3) * src_stride]), dc_value, src_stride);
265 : }
266 :
267 0 : return satd;
268 : }
269 :
270 0 : uint64_t ComputeNxMSatd4x4Units_U8(
271 : uint8_t *src, //int16_t *diff, // input parameter, diff samples Ptr
272 : uint32_t src_stride, //uint32_t diffStride, // input parameter, source stride
273 : uint32_t width, // input parameter, block width (N)
274 : uint32_t height, // input parameter, block height (M)
275 : uint64_t *dc_value)
276 : {
277 0 : uint64_t satd = 0;
278 : uint32_t blockIndexInWidth;
279 : uint32_t blockIndexInHeight;
280 :
281 0 : for (blockIndexInHeight = 0; blockIndexInHeight < height >> 2; ++blockIndexInHeight) {
282 0 : for (blockIndexInWidth = 0; blockIndexInWidth < width >> 2; ++blockIndexInWidth)
283 0 : satd += compute4x4_satd_u8(&(src[(blockIndexInWidth << 2) + (blockIndexInHeight << 2) * src_stride]), dc_value, src_stride);
284 : }
285 :
286 0 : return satd;
287 : }
288 : /*******************************************
289 : * returns NxM Sum of Absolute Transformed Differences using compute4x4_satd
290 : *******************************************/
291 0 : uint64_t compute_nx_m_satd_sad_lcu(
292 : uint8_t *src, // input parameter, source samples Ptr
293 : uint32_t src_stride, // input parameter, source stride
294 : uint32_t width, // input parameter, block width (N)
295 : uint32_t height) { // input parameter, block height (M)
296 :
297 0 : uint64_t satd = 0;
298 0 : uint64_t dc_value = 0;
299 0 : uint64_t acValue = 0;
300 :
301 0 : if (width >= 8 && height >= 8) {
302 0 : satd = ComputeNxMSatd8x8Units_U8(
303 : src,
304 : src_stride,
305 : width,
306 : height,
307 : &dc_value);
308 : }
309 : else {
310 : satd =
311 0 : ComputeNxMSatd4x4Units_U8(
312 : src,
313 : src_stride,
314 : width,
315 : height,
316 : &dc_value);
317 : }
318 :
319 0 : acValue = satd - (dc_value >> 2);
320 :
321 0 : return acValue;
322 : }
323 :
324 : /*******************************************
325 : * Picture Full Distortion
326 : * Used in the Full Mode Decision Loop for the only case of a MVP-SKIP candidate
327 : *******************************************/
328 :
329 0 : void full_distortion_kernel32_bits_c(
330 : int32_t *coeff,
331 : uint32_t coeff_stride,
332 : int32_t *recon_coeff,
333 : uint32_t recon_coeff_stride,
334 : uint64_t distortion_result[DIST_CALC_TOTAL],
335 : uint32_t area_width,
336 : uint32_t area_height)
337 : {
338 : uint32_t columnIndex;
339 0 : uint32_t row_index = 0;
340 0 : uint64_t residualDistortion = 0;
341 0 : uint64_t predictionDistortion = 0;
342 :
343 0 : while (row_index < area_height) {
344 0 : columnIndex = 0;
345 0 : while (columnIndex < area_width) {
346 0 : residualDistortion += (int64_t)SQR((int64_t)(coeff[columnIndex]) - (recon_coeff[columnIndex]));
347 0 : predictionDistortion += (int64_t)SQR((int64_t)(coeff[columnIndex]));
348 0 : ++columnIndex;
349 : }
350 :
351 0 : coeff += coeff_stride;
352 0 : recon_coeff += recon_coeff_stride;
353 0 : ++row_index;
354 : }
355 :
356 0 : distortion_result[DIST_CALC_RESIDUAL] = residualDistortion;
357 0 : distortion_result[DIST_CALC_PREDICTION] = predictionDistortion;
358 0 : }
359 :
360 0 : uint64_t full_distortion_kernel16_bits(
361 : uint8_t *input,
362 : uint32_t input_offset,
363 : uint32_t input_stride,
364 : uint8_t *pred,
365 : uint32_t pred_offset,
366 : uint32_t pred_stride,
367 : uint32_t area_width,
368 : uint32_t area_height)
369 : {
370 : uint32_t column_index;
371 0 : uint32_t row_index = 0;
372 0 : uint64_t sse_distortion = 0;
373 :
374 0 : uint16_t *input_16bit = (uint16_t *) input;
375 0 : uint16_t *pred_16bit = (uint16_t *) pred;
376 0 : input_16bit += input_offset;
377 0 : pred_16bit += pred_offset;
378 :
379 0 : while (row_index < area_height) {
380 0 : column_index = 0;
381 0 : while (column_index < area_width) {
382 0 : sse_distortion += (int64_t)SQR((int64_t)(input_16bit[column_index]) - (pred_16bit[column_index]));
383 0 : ++column_index;
384 : }
385 0 : input_16bit += input_stride;
386 0 : pred_16bit += pred_stride;
387 0 : ++row_index;
388 : }
389 :
390 0 : return sse_distortion;
391 : }
392 :
393 : /*******************************************
394 : * Picture Distortion Full Kernel CbfZero
395 : *******************************************/
396 0 : void full_distortion_kernel_cbf_zero32_bits_c(
397 : int32_t *coeff,
398 : uint32_t coeff_stride,
399 : int32_t *recon_coeff,
400 : uint32_t recon_coeff_stride,
401 : uint64_t distortion_result[DIST_CALC_TOTAL],
402 : uint32_t area_width,
403 : uint32_t area_height)
404 : {
405 : uint32_t columnIndex;
406 0 : uint32_t row_index = 0;
407 0 : uint64_t predictionDistortion = 0;
408 : (void)recon_coeff;
409 : (void)recon_coeff_stride;
410 :
411 0 : while (row_index < area_height) {
412 0 : columnIndex = 0;
413 0 : while (columnIndex < area_width) {
414 0 : predictionDistortion += (int64_t)SQR((int64_t)(coeff[columnIndex]));
415 0 : ++columnIndex;
416 : }
417 :
418 0 : coeff += coeff_stride;
419 0 : ++row_index;
420 : }
421 :
422 0 : distortion_result[DIST_CALC_RESIDUAL] = predictionDistortion;
423 0 : distortion_result[DIST_CALC_PREDICTION] = predictionDistortion;
424 0 : }
425 :
426 13648000 : EbErrorType picture_full_distortion32_bits(
427 : EbPictureBufferDesc *coeff,
428 : uint32_t coeff_luma_origin_index,
429 : uint32_t coeff_chroma_origin_index,
430 : EbPictureBufferDesc *recon_coeff,
431 : uint32_t recon_coeff_luma_origin_index,
432 : uint32_t recon_coeff_chroma_origin_index,
433 : uint32_t bwidth,
434 : uint32_t bheight,
435 : uint32_t bwidth_uv,
436 : uint32_t bheight_uv,
437 : uint64_t y_distortion[DIST_CALC_TOTAL],
438 : uint64_t cb_distortion[DIST_CALC_TOTAL],
439 : uint64_t cr_distortion[DIST_CALC_TOTAL],
440 : uint32_t y_count_non_zero_coeffs,
441 : uint32_t cb_count_non_zero_coeffs,
442 : uint32_t cr_count_non_zero_coeffs,
443 : COMPONENT_TYPE component_type)
444 : {
445 13648000 : EbErrorType return_error = EB_ErrorNone;
446 :
447 : //TODO due to a change in full kernel distortion , ASM has to be updated to not accumulate the input distortion by the output
448 :
449 13648000 : if (component_type == COMPONENT_LUMA || component_type == COMPONENT_ALL) {
450 3859480 : y_distortion[0] = 0;
451 3859480 : y_distortion[1] = 0;
452 :
453 3859480 : bwidth = bwidth < 64 ? bwidth : 32;
454 3859480 : bheight = bheight < 64 ? bheight : 32;
455 :
456 3859480 : if (y_count_non_zero_coeffs) {
457 668260 : full_distortion_kernel32_bits(
458 668260 : &(((int32_t*)coeff->buffer_y)[coeff_luma_origin_index]),
459 : bwidth,
460 668260 : &(((int32_t*)recon_coeff->buffer_y)[recon_coeff_luma_origin_index]),
461 : bwidth,
462 : y_distortion,
463 : bwidth,
464 : bheight);
465 : }
466 : else {
467 3191220 : full_distortion_kernel_cbf_zero32_bits(
468 3191220 : &(((int32_t*)coeff->buffer_y)[coeff_luma_origin_index]),
469 : bwidth,
470 3191220 : &(((int32_t*)recon_coeff->buffer_y)[recon_coeff_luma_origin_index]),
471 : bwidth,
472 : y_distortion,
473 : bwidth,
474 : bheight);
475 : }
476 : }
477 :
478 13648700 : if (component_type == COMPONENT_CHROMA_CB || component_type == COMPONENT_CHROMA || component_type == COMPONENT_ALL) {
479 5079490 : cb_distortion[0] = 0;
480 5079490 : cb_distortion[1] = 0;
481 :
482 : // CB
483 5079490 : if (cb_count_non_zero_coeffs) {
484 2782630 : full_distortion_kernel32_bits(
485 2782630 : &(((int32_t*)coeff->buffer_cb)[coeff_chroma_origin_index]),
486 : bwidth_uv,
487 2782630 : &(((int32_t*)recon_coeff->buffer_cb)[recon_coeff_chroma_origin_index]),
488 : bwidth_uv,
489 : cb_distortion,
490 : bwidth_uv,
491 : bheight_uv);
492 : }
493 : else {
494 2296860 : full_distortion_kernel_cbf_zero32_bits(
495 2296860 : &(((int32_t*)coeff->buffer_cb)[coeff_chroma_origin_index]),
496 : bwidth_uv,
497 2296860 : &(((int32_t*)recon_coeff->buffer_cb)[recon_coeff_chroma_origin_index]),
498 : bwidth_uv,
499 : cb_distortion,
500 : bwidth_uv,
501 : bheight_uv);
502 : }
503 : }
504 13648700 : if (component_type == COMPONENT_CHROMA_CR || component_type == COMPONENT_CHROMA || component_type == COMPONENT_ALL) {
505 4714730 : cr_distortion[0] = 0;
506 4714730 : cr_distortion[1] = 0;
507 : // CR
508 4714730 : if (cr_count_non_zero_coeffs) {
509 1842550 : full_distortion_kernel32_bits(
510 1842550 : &(((int32_t*)coeff->buffer_cr)[coeff_chroma_origin_index]),
511 : bwidth_uv,
512 1842550 : &(((int32_t*)recon_coeff->buffer_cr)[recon_coeff_chroma_origin_index]),
513 : bwidth_uv,
514 : cr_distortion,
515 : bwidth_uv,
516 : bheight_uv);
517 : }
518 : else {
519 2872180 : full_distortion_kernel_cbf_zero32_bits(
520 2872180 : &(((int32_t*)coeff->buffer_cr)[coeff_chroma_origin_index]),
521 : bwidth_uv,
522 2872180 : &(((int32_t*)recon_coeff->buffer_cr)[recon_coeff_chroma_origin_index]),
523 : bwidth_uv,
524 : cr_distortion,
525 : bwidth_uv,
526 : bheight_uv);
527 : }
528 : }
529 :
530 13650400 : return return_error;
531 : }
532 :
533 0 : void extract_8bit_data(
534 : uint16_t *in16_bit_buffer,
535 : uint32_t in_stride,
536 : uint8_t *out8_bit_buffer,
537 : uint32_t out8_stride,
538 : uint32_t width,
539 : uint32_t height) {
540 :
541 0 : if (((width & 3) == 0) && ((height & 1) == 0)) {
542 0 : un_pack8_bit_data(
543 : in16_bit_buffer,
544 : in_stride,
545 : out8_bit_buffer,
546 : out8_stride,
547 : width,
548 : height);
549 : }
550 : else {
551 0 : un_pack8_bit_data_c(
552 : in16_bit_buffer,
553 : in_stride,
554 : out8_bit_buffer,
555 : out8_stride,
556 : width,
557 : height);
558 : }
559 0 : }
560 0 : void unpack_l0l1_avg(
561 : uint16_t *ref16_l0,
562 : uint32_t ref_l0_stride,
563 : uint16_t *ref16_l1,
564 : uint32_t ref_l1_stride,
565 : uint8_t *dst_ptr,
566 : uint32_t dst_stride,
567 : uint32_t width,
568 : uint32_t height)
569 : {
570 0 : unpack_avg(
571 : ref16_l0,
572 : ref_l0_stride,
573 : ref16_l1,
574 : ref_l1_stride,
575 : dst_ptr,
576 : dst_stride,
577 : width,
578 : height);
579 0 : }
580 0 : void extract8_bitdata_safe_sub(
581 : uint16_t *in16_bit_buffer,
582 : uint32_t in_stride,
583 : uint8_t *out8_bit_buffer,
584 : uint32_t out8_stride,
585 : uint32_t width,
586 : uint32_t height,
587 : EbBool sub_pred
588 : )
589 : {
590 : /* sub_pred not implemented */
591 : (void)sub_pred;
592 :
593 0 : un_pack8_bit_data(
594 : in16_bit_buffer,
595 : in_stride,
596 : out8_bit_buffer,
597 : out8_stride,
598 : width,
599 : height
600 : );
601 0 : }
602 0 : void unpack_l0l1_avg_safe_sub(
603 : uint16_t *ref16_l0,
604 : uint32_t ref_l0_stride,
605 : uint16_t *ref16_l1,
606 : uint32_t ref_l1_stride,
607 : uint8_t *dst_ptr,
608 : uint32_t dst_stride,
609 : uint32_t width,
610 : uint32_t height,
611 : EbBool sub_pred)
612 : {
613 : //fix C
614 :
615 0 : unpack_avg_safe_sub(
616 : ref16_l0,
617 : ref_l0_stride,
618 : ref16_l1,
619 : ref_l1_stride,
620 : dst_ptr,
621 : dst_stride,
622 : sub_pred,
623 : width,
624 : height);
625 0 : }
626 0 : void un_pack2d(
627 : uint16_t *in16_bit_buffer,
628 : uint32_t in_stride,
629 : uint8_t *out8_bit_buffer,
630 : uint32_t out8_stride,
631 : uint8_t *outn_bit_buffer,
632 : uint32_t outn_stride,
633 : uint32_t width,
634 : uint32_t height)
635 : {
636 :
637 0 : if (((width & 3) == 0) && ((height & 1) == 0)) {
638 0 : un_pack2d_16_bit_src_mul4(
639 : in16_bit_buffer,
640 : in_stride,
641 : out8_bit_buffer,
642 : outn_bit_buffer,
643 : out8_stride,
644 : outn_stride,
645 : width,
646 : height);
647 : } else {
648 0 : eb_enc_msb_un_pack2_d(
649 : in16_bit_buffer,
650 : in_stride,
651 : out8_bit_buffer,
652 : outn_bit_buffer,
653 : out8_stride,
654 : outn_stride,
655 : width,
656 : height);
657 : }
658 0 : }
659 :
660 0 : void pack2d_src(
661 : uint8_t *in8_bit_buffer,
662 : uint32_t in8_stride,
663 : uint8_t *inn_bit_buffer,
664 : uint32_t inn_stride,
665 : uint16_t *out16_bit_buffer,
666 : uint32_t out_stride,
667 : uint32_t width,
668 : uint32_t height)
669 : {
670 :
671 0 : if (((width & 3) == 0) && ((height & 1) == 0)) {
672 0 : pack2d_16_bit_src_mul4(
673 : in8_bit_buffer,
674 : in8_stride,
675 : inn_bit_buffer,
676 : out16_bit_buffer,
677 : inn_stride,
678 : out_stride,
679 : width,
680 : height);
681 : }
682 : else {
683 0 : eb_enc_msb_pack2_d(
684 : in8_bit_buffer,
685 : in8_stride,
686 : inn_bit_buffer,
687 : out16_bit_buffer,
688 : inn_stride,
689 : out_stride,
690 : width,
691 : height);
692 : }
693 :
694 0 : }
695 :
696 0 : void compressed_pack_lcu(
697 : uint8_t *in8_bit_buffer,
698 : uint32_t in8_stride,
699 : uint8_t *inn_bit_buffer,
700 : uint32_t inn_stride,
701 : uint16_t *out16_bit_buffer,
702 : uint32_t out_stride,
703 : uint32_t width,
704 : uint32_t height
705 : )
706 : {
707 0 : if (width == 64 || width == 32) {
708 0 : compressed_packmsb(
709 : in8_bit_buffer,
710 : in8_stride,
711 : inn_bit_buffer,
712 : out16_bit_buffer,
713 : inn_stride,
714 : out_stride,
715 : width,
716 : height);
717 : } else {
718 0 : compressed_packmsb_c(
719 : in8_bit_buffer,
720 : in8_stride,
721 : inn_bit_buffer,
722 : out16_bit_buffer,
723 : inn_stride,
724 : out_stride,
725 : width,
726 : height);
727 : }
728 0 : }
729 :
730 0 : void conv2b_to_c_pack_lcu(
731 : const uint8_t *inn_bit_buffer,
732 : uint32_t inn_stride,
733 : uint8_t *in_compn_bit_buffer,
734 : uint32_t out_stride,
735 : uint8_t *local_cache,
736 : uint32_t width,
737 : uint32_t height)
738 : {
739 0 : if (width == 64 || width == 32) {
740 0 : c_pack(
741 : inn_bit_buffer,
742 : inn_stride,
743 : in_compn_bit_buffer,
744 : out_stride,
745 : local_cache,
746 : width,
747 : height);
748 : }
749 : else {
750 0 : c_pack_c(
751 : inn_bit_buffer,
752 : inn_stride,
753 : in_compn_bit_buffer,
754 : out_stride,
755 : local_cache,
756 : width,
757 : height);
758 : }
759 0 : }
760 :
761 : /*******************************************
762 : * memcpy16bit
763 : *******************************************/
764 0 : void memcpy16bit(
765 : uint16_t * out_ptr,
766 : uint16_t * in_ptr,
767 : uint64_t num_of_elements)
768 : {
769 : uint64_t i;
770 :
771 0 : for (i = 0; i < num_of_elements; i++)
772 0 : out_ptr[i] = in_ptr[i];
773 0 : }
774 :
775 : /*******************************************
776 : * memcpy32bit
777 : *******************************************/
778 0 : void memcpy32bit(
779 : uint32_t * out_ptr,
780 : uint32_t * in_ptr,
781 : uint64_t num_of_elements)
782 : {
783 : uint64_t i;
784 :
785 0 : for (i = 0; i < num_of_elements; i++)
786 0 : out_ptr[i] = in_ptr[i];
787 0 : }
788 :
789 0 : int32_t sum_residual_c(int16_t * in_ptr,
790 : uint32_t size,
791 : uint32_t stride_in)
792 : {
793 0 : int32_t sumBlock = 0;
794 : uint32_t i, j;
795 :
796 0 : for (j = 0; j < size; j++)
797 0 : for (i = 0; i < size; i++)
798 0 : sumBlock += in_ptr[j*stride_in + i];
799 :
800 0 : return sumBlock;
801 : }
802 :
803 0 : void memset16bit_block(
804 : int16_t * in_ptr,
805 : uint32_t stride_in,
806 : uint32_t size,
807 : int16_t value)
808 : {
809 : uint32_t i;
810 0 : for (i = 0; i < size; i++)
811 0 : memset16bit((uint16_t*)in_ptr + i * stride_in, value, size);
812 0 : }
813 :
814 0 : static void extend_plane(uint8_t *const src, int32_t src_stride, int32_t width,
815 : int32_t height, int32_t extend_top, int32_t extend_left,
816 : int32_t extend_bottom, int32_t extend_right) {
817 : int32_t i;
818 0 : const int32_t linesize = extend_left + extend_right + width;
819 :
820 : /* copy the left and right most columns out */
821 0 : uint8_t *src_ptr1 = src;
822 0 : uint8_t *src_ptr2 = src + width - 1;
823 0 : uint8_t *dst_ptr1 = src - extend_left;
824 0 : uint8_t *dst_ptr2 = src + width;
825 :
826 0 : for (i = 0; i < height; ++i) {
827 0 : memset(dst_ptr1, src_ptr1[0], extend_left);
828 0 : memset(dst_ptr2, src_ptr2[0], extend_right);
829 0 : src_ptr1 += src_stride;
830 0 : src_ptr2 += src_stride;
831 0 : dst_ptr1 += src_stride;
832 0 : dst_ptr2 += src_stride;
833 : }
834 :
835 : /* Now copy the top and bottom lines into each line of the respective
836 : * borders
837 : */
838 0 : src_ptr1 = src - extend_left;
839 0 : src_ptr2 = src + src_stride * (height - 1) - extend_left;
840 0 : dst_ptr1 = src + src_stride * -extend_top - extend_left;
841 0 : dst_ptr2 = src + src_stride * height - extend_left;
842 :
843 0 : for (i = 0; i < extend_top; ++i) {
844 0 : memcpy(dst_ptr1, src_ptr1, linesize);
845 0 : dst_ptr1 += src_stride;
846 : }
847 :
848 0 : for (i = 0; i < extend_bottom; ++i) {
849 0 : memcpy(dst_ptr2, src_ptr2, linesize);
850 0 : dst_ptr2 += src_stride;
851 : }
852 0 : }
853 :
854 0 : static void extend_plane_high(uint8_t *const src8, int32_t src_stride, int32_t width,
855 : int32_t height, int32_t extend_top, int32_t extend_left,
856 : int32_t extend_bottom, int32_t extend_right) {
857 : int32_t i;
858 0 : const int32_t linesize = extend_left + extend_right + width;
859 0 : uint16_t *src = CONVERT_TO_SHORTPTR(src8);
860 :
861 : /* copy the left and right most columns out */
862 0 : uint16_t *src_ptr1 = src;
863 0 : uint16_t *src_ptr2 = src + width - 1;
864 0 : uint16_t *dst_ptr1 = src - extend_left;
865 0 : uint16_t *dst_ptr2 = src + width;
866 :
867 0 : for (i = 0; i < height; ++i) {
868 0 : eb_aom_memset16(dst_ptr1, src_ptr1[0], extend_left);
869 0 : eb_aom_memset16(dst_ptr2, src_ptr2[0], extend_right);
870 0 : src_ptr1 += src_stride;
871 0 : src_ptr2 += src_stride;
872 0 : dst_ptr1 += src_stride;
873 0 : dst_ptr2 += src_stride;
874 : }
875 :
876 : /* Now copy the top and bottom lines into each line of the respective
877 : * borders
878 : */
879 0 : src_ptr1 = src - extend_left;
880 0 : src_ptr2 = src + src_stride * (height - 1) - extend_left;
881 0 : dst_ptr1 = src + src_stride * -extend_top - extend_left;
882 0 : dst_ptr2 = src + src_stride * height - extend_left;
883 :
884 0 : for (i = 0; i < extend_top; ++i) {
885 0 : memcpy(dst_ptr1, src_ptr1, linesize * sizeof(uint16_t));
886 0 : dst_ptr1 += src_stride;
887 : }
888 :
889 0 : for (i = 0; i < extend_bottom; ++i) {
890 0 : memcpy(dst_ptr2, src_ptr2, linesize * sizeof(uint16_t));
891 0 : dst_ptr2 += src_stride;
892 : }
893 0 : }
894 :
895 0 : void eb_aom_yv12_extend_frame_borders_c(Yv12BufferConfig *ybf,
896 : const int32_t num_planes) {
897 : assert(ybf->border % 2 == 0);
898 : assert(ybf->y_height - ybf->y_crop_height < 16);
899 : assert(ybf->y_width - ybf->y_crop_width < 16);
900 : assert(ybf->y_height - ybf->y_crop_height >= 0);
901 : assert(ybf->y_width - ybf->y_crop_width >= 0);
902 :
903 0 : if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) {
904 0 : for (int32_t plane = 0; plane < num_planes; ++plane) {
905 0 : const int32_t is_uv = plane > 0;
906 0 : const int32_t plane_border = ybf->border >> is_uv;
907 0 : extend_plane_high(
908 : ybf->buffers[plane], ybf->strides[is_uv], ybf->crop_widths[is_uv],
909 : ybf->crop_heights[is_uv], plane_border, plane_border,
910 0 : plane_border + ybf->heights[is_uv] - ybf->crop_heights[is_uv],
911 0 : plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv]);
912 : }
913 0 : return;
914 : }
915 0 : for (int32_t plane = 0; plane < num_planes; ++plane) {
916 0 : const int32_t is_uv = plane > 0;
917 0 : const int32_t plane_border = ybf->border >> is_uv;
918 0 : extend_plane(ybf->buffers[plane], ybf->strides[is_uv],
919 : ybf->crop_widths[is_uv], ybf->crop_heights[is_uv],
920 : plane_border, plane_border,
921 0 : plane_border + ybf->heights[is_uv] - ybf->crop_heights[is_uv],
922 0 : plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv]);
923 : }
924 : }
925 :
926 0 : static void memcpy_short_addr(uint8_t *dst8, const uint8_t *src8, int32_t num) {
927 0 : uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
928 0 : uint16_t *src = CONVERT_TO_SHORTPTR(src8);
929 0 : memcpy(dst, src, num * sizeof(uint16_t));
930 0 : }
931 :
932 : // Copies the source image into the destination image and updates the
933 : // destination's UMV borders.
934 : // Note: The frames are assumed to be identical in size.
935 0 : void eb_aom_yv12_copy_frame_c(const Yv12BufferConfig *src_bc,
936 : Yv12BufferConfig *dst_bc, const int32_t num_planes) {
937 : assert((src_bc->flags & YV12_FLAG_HIGHBITDEPTH) ==
938 : (dst_bc->flags & YV12_FLAG_HIGHBITDEPTH));
939 :
940 0 : if (src_bc->flags & YV12_FLAG_HIGHBITDEPTH) {
941 0 : for (int32_t plane = 0; plane < num_planes; ++plane) {
942 0 : const uint8_t *plane_src = src_bc->buffers[plane];
943 0 : uint8_t *plane_dst = dst_bc->buffers[plane];
944 0 : const int32_t is_uv = plane > 0;
945 :
946 0 : for (int32_t row = 0; row < src_bc->heights[is_uv]; ++row) {
947 0 : memcpy_short_addr(plane_dst, plane_src, src_bc->widths[is_uv]);
948 0 : plane_src += src_bc->strides[is_uv];
949 0 : plane_dst += dst_bc->strides[is_uv];
950 : }
951 : }
952 0 : eb_aom_yv12_extend_frame_borders_c(dst_bc, num_planes);
953 0 : return;
954 : }
955 0 : for (int32_t plane = 0; plane < num_planes; ++plane) {
956 0 : const uint8_t *plane_src = src_bc->buffers[plane];
957 0 : uint8_t *plane_dst = dst_bc->buffers[plane];
958 0 : const int32_t is_uv = plane > 0;
959 :
960 0 : for (int32_t row = 0; row < src_bc->heights[is_uv]; ++row) {
961 0 : memcpy(plane_dst, plane_src, src_bc->widths[is_uv]);
962 0 : plane_src += src_bc->strides[is_uv];
963 0 : plane_dst += dst_bc->strides[is_uv];
964 : }
965 : }
966 0 : eb_aom_yv12_extend_frame_borders_c(dst_bc, num_planes);
967 : }
968 :
969 60 : void eb_aom_yv12_copy_y_c(const Yv12BufferConfig *src_ybc,
970 : Yv12BufferConfig *dst_ybc) {
971 : int32_t row;
972 60 : const uint8_t *src = src_ybc->y_buffer;
973 60 : uint8_t *dst = dst_ybc->y_buffer;
974 :
975 60 : if (src_ybc->flags & YV12_FLAG_HIGHBITDEPTH) {
976 0 : const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
977 0 : uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
978 0 : for (row = 0; row < src_ybc->y_height; ++row) {
979 0 : memcpy(dst16, src16, src_ybc->y_width * sizeof(uint16_t));
980 0 : src16 += src_ybc->y_stride;
981 0 : dst16 += dst_ybc->y_stride;
982 : }
983 0 : return;
984 : }
985 :
986 21660 : for (row = 0; row < src_ybc->y_height; ++row) {
987 21600 : memcpy(dst, src, src_ybc->y_width);
988 21600 : src += src_ybc->y_stride;
989 21600 : dst += dst_ybc->y_stride;
990 : }
991 : }
992 :
993 7 : void eb_aom_yv12_copy_u_c(const Yv12BufferConfig *src_bc,
994 : Yv12BufferConfig *dst_bc) {
995 : int32_t row;
996 7 : const uint8_t *src = src_bc->u_buffer;
997 7 : uint8_t *dst = dst_bc->u_buffer;
998 :
999 7 : if (src_bc->flags & YV12_FLAG_HIGHBITDEPTH) {
1000 0 : const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
1001 0 : uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1002 0 : for (row = 0; row < src_bc->uv_height; ++row) {
1003 0 : memcpy(dst16, src16, src_bc->uv_width * sizeof(uint16_t));
1004 0 : src16 += src_bc->uv_stride;
1005 0 : dst16 += dst_bc->uv_stride;
1006 : }
1007 0 : return;
1008 : }
1009 :
1010 1267 : for (row = 0; row < src_bc->uv_height; ++row) {
1011 1260 : memcpy(dst, src, src_bc->uv_width);
1012 1260 : src += src_bc->uv_stride;
1013 1260 : dst += dst_bc->uv_stride;
1014 : }
1015 : }
1016 :
1017 5 : void eb_aom_yv12_copy_v_c(const Yv12BufferConfig *src_bc,
1018 : Yv12BufferConfig *dst_bc) {
1019 : int32_t row;
1020 5 : const uint8_t *src = src_bc->v_buffer;
1021 5 : uint8_t *dst = dst_bc->v_buffer;
1022 :
1023 5 : if (src_bc->flags & YV12_FLAG_HIGHBITDEPTH) {
1024 0 : const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
1025 0 : uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1026 0 : for (row = 0; row < src_bc->uv_height; ++row) {
1027 0 : memcpy(dst16, src16, src_bc->uv_width * sizeof(uint16_t));
1028 0 : src16 += src_bc->uv_stride;
1029 0 : dst16 += dst_bc->uv_stride;
1030 : }
1031 0 : return;
1032 : }
1033 :
1034 905 : for (row = 0; row < src_bc->uv_height; ++row) {
1035 900 : memcpy(dst, src, src_bc->uv_width);
1036 900 : src += src_bc->uv_stride;
1037 900 : dst += dst_bc->uv_stride;
1038 : }
1039 : }
|