Line data Source code
1 : /*
2 : * Copyright(c) 2019 Intel Corporation
3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 : */
5 :
6 : /*
7 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
8 : *
9 : * This source code is subject to the terms of the BSD 2 Clause License and
10 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
11 : * was not distributed with this source code in the LICENSE file, you can
12 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
13 : * Media Patent License 1.0 was not distributed with this source code in the
14 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
15 : */
16 :
17 : #include <stdlib.h>
18 : #include "aom_dsp_rtcd.h"
19 : #include "EbDefinitions.h"
20 : #include "EbCdefProcess.h"
21 : #include "EbEncDecResults.h"
22 : #include "EbThreads.h"
23 : #include "EbReferenceObject.h"
24 :
25 : #include "EbCdef.h"
26 : #include "EbEncDecProcess.h"
27 :
28 : static int32_t priconv[REDUCED_PRI_STRENGTHS] = { 0, 1, 2, 3, 5, 7, 10, 13 };
29 :
30 : void copy_sb8_16(uint16_t *dst, int32_t dstride,
31 : const uint8_t *src, int32_t src_voffset, int32_t src_hoffset,
32 : int32_t sstride, int32_t vsize, int32_t hsize);
33 :
34 : void *eb_aom_memalign(size_t align, size_t size);
35 : void eb_aom_free(void *memblk);
36 : void *eb_aom_malloc(size_t size);
37 : int32_t eb_sb_all_skip(PictureControlSet *picture_control_set_ptr, const Av1Common *const cm, int32_t mi_row, int32_t mi_col);
38 : int32_t eb_sb_compute_cdef_list(PictureControlSet *picture_control_set_ptr, const Av1Common *const cm, int32_t mi_row, int32_t mi_col,
39 : cdef_list *dlist, BlockSize bs);
40 : void finish_cdef_search(
41 : EncDecContext *context_ptr,
42 : SequenceControlSet *sequence_control_set_ptr,
43 : PictureControlSet *picture_control_set_ptr
44 : ,int32_t selected_strength_cnt[64]
45 : );
46 : void av1_cdef_frame16bit(
47 : EncDecContext *context_ptr,
48 : SequenceControlSet *sequence_control_set_ptr,
49 : PictureControlSet *pCs);
50 : void eb_av1_cdef_frame(
51 : EncDecContext *context_ptr,
52 : SequenceControlSet *sequence_control_set_ptr,
53 : PictureControlSet *pCs);
54 : void eb_av1_loop_restoration_save_boundary_lines(const Yv12BufferConfig *frame, Av1Common *cm, int32_t after_cdef);
55 :
56 : /******************************************************
57 : * Cdef Context Constructor
58 : ******************************************************/
59 16 : EbErrorType cdef_context_ctor(
60 : CdefContext_t *context_ptr,
61 : EbFifo *cdef_input_fifo_ptr,
62 : EbFifo *cdef_output_fifo_ptr ,
63 : EbBool is16bit,
64 : uint32_t max_input_luma_width,
65 : uint32_t max_input_luma_height){
66 : (void)is16bit;
67 : (void)max_input_luma_width;
68 : (void)max_input_luma_height;
69 :
70 : // Input/Output System Resource Manager FIFOs
71 16 : context_ptr->cdef_input_fifo_ptr = cdef_input_fifo_ptr;
72 16 : context_ptr->cdef_output_fifo_ptr = cdef_output_fifo_ptr;
73 :
74 16 : return EB_ErrorNone;
75 : }
76 :
77 7168 : void cdef_seg_search(
78 : PictureControlSet *picture_control_set_ptr,
79 : SequenceControlSet *sequence_control_set_ptr,
80 : uint32_t segment_index)
81 : {
82 7168 : struct PictureParentControlSet *pPcs = picture_control_set_ptr->parent_pcs_ptr;
83 7168 : FrameHeader *frm_hdr = &pPcs->frm_hdr;
84 7168 : Av1Common* cm = picture_control_set_ptr->parent_pcs_ptr->av1_cm;
85 : uint32_t x_seg_idx;
86 : uint32_t y_seg_idx;
87 7168 : uint32_t picture_width_in_b64 = (sequence_control_set_ptr->seq_header.max_frame_width + 64 - 1) / 64;
88 7168 : uint32_t picture_height_in_b64 = (sequence_control_set_ptr->seq_header.max_frame_height + 64 - 1) / 64;
89 7168 : SEGMENT_CONVERT_IDX_TO_XY(segment_index, x_seg_idx, y_seg_idx, picture_control_set_ptr->cdef_segments_column_count);
90 7168 : uint32_t x_b64_start_idx = SEGMENT_START_IDX(x_seg_idx, picture_width_in_b64, picture_control_set_ptr->cdef_segments_column_count);
91 7168 : uint32_t x_b64_end_idx = SEGMENT_END_IDX(x_seg_idx, picture_width_in_b64, picture_control_set_ptr->cdef_segments_column_count);
92 7168 : uint32_t y_b64_start_idx = SEGMENT_START_IDX(y_seg_idx, picture_height_in_b64, picture_control_set_ptr->cdef_segments_row_count);
93 7168 : uint32_t y_b64_end_idx = SEGMENT_END_IDX(y_seg_idx, picture_height_in_b64, picture_control_set_ptr->cdef_segments_row_count);
94 :
95 7168 : int32_t fast = 0;
96 7168 : int32_t mi_rows = pPcs->av1_cm->mi_rows;
97 7168 : int32_t mi_cols = pPcs->av1_cm->mi_cols;
98 :
99 : uint32_t fbr, fbc;
100 : uint8_t *src[3];
101 : uint8_t *ref_coeff[3];
102 : cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
103 7168 : int32_t dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
104 7168 : int32_t var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
105 : int32_t stride_src[3];
106 : int32_t stride_ref[3];
107 : int32_t bsize[3];
108 : int32_t mi_wide_l2[3];
109 : int32_t mi_high_l2[3];
110 : int32_t xdec[3];
111 : int32_t ydec[3];
112 : int32_t pli;
113 : int32_t cdef_count;
114 7168 : int32_t coeff_shift = AOMMAX(sequence_control_set_ptr->static_config.encoder_bit_depth - 8, 0);
115 7168 : int32_t nvfb = (mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
116 7168 : int32_t nhfb = (mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
117 7168 : int32_t pri_damping = 3 + (frm_hdr->quantization_params.base_q_idx >> 6);
118 7168 : int32_t sec_damping = 3 + (frm_hdr->quantization_params.base_q_idx >> 6);
119 :
120 7168 : const int32_t num_planes = 3;
121 7168 : const int32_t total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
122 : DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
123 : uint16_t *in;
124 : DECLARE_ALIGNED(32, uint8_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
125 :
126 : int32_t gi_step;
127 : int32_t mid_gi;
128 : int32_t start_gi;
129 : int32_t end_gi;
130 :
131 7168 : EbPictureBufferDesc *input_picture_ptr = (EbPictureBufferDesc*)picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
132 : EbPictureBufferDesc * recon_picture_ptr;
133 7168 : if (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_TRUE)
134 4056 : recon_picture_ptr = ((EbReferenceObject*)picture_control_set_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture;
135 : else
136 3112 : recon_picture_ptr = picture_control_set_ptr->recon_picture_ptr;
137 :
138 28667 : for (pli = 0; pli < num_planes; pli++) {
139 21499 : int32_t subsampling_x = (pli == 0) ? 0 : 1;
140 21499 : int32_t subsampling_y = (pli == 0) ? 0 : 1;
141 21499 : xdec[pli] = subsampling_x;
142 21499 : ydec[pli] = subsampling_y;
143 14349 : bsize[pli] = ydec[pli] ? (xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
144 35848 : : (xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
145 21499 : mi_wide_l2[pli] = MI_SIZE_LOG2 - subsampling_x;
146 21499 : mi_high_l2[pli] = MI_SIZE_LOG2 - subsampling_y;
147 :
148 21499 : src[pli] = (uint8_t *)picture_control_set_ptr->src[pli];
149 21499 : ref_coeff[pli] = (uint8_t *)picture_control_set_ptr->ref_coeff[pli];
150 21499 : stride_src[pli]= pli == 0 ? recon_picture_ptr->stride_y : (pli == 1 ? recon_picture_ptr->stride_cb : recon_picture_ptr->stride_cr);
151 21499 : stride_ref[pli]= pli == 0 ? input_picture_ptr->stride_y : (pli == 1 ? input_picture_ptr->stride_cb : input_picture_ptr->stride_cr);
152 : }
153 :
154 7168 : in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
155 :
156 14407 : for (fbr = y_b64_start_idx; fbr < y_b64_end_idx; ++fbr) {
157 14386 : for (fbc = x_b64_start_idx; fbc < x_b64_end_idx; ++fbc) {
158 : int32_t nvb, nhb;
159 : int32_t gi;
160 7147 : int32_t dirinit = 0;
161 7147 : nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
162 7147 : nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
163 7147 : int32_t hb_step = 1; //these should be all time with 64x64 LCUs
164 7147 : int32_t vb_step = 1;
165 7147 : BlockSize bs = BLOCK_64X64;
166 7147 : ModeInfo **mi = picture_control_set_ptr->mi_grid_base + MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc;
167 7147 : const MbModeInfo *mbmi = &mi[0]->mbmi;
168 :
169 7147 : if (((fbc & 1) &&
170 3597 : (mbmi->block_mi.sb_type == BLOCK_128X128 || mbmi->block_mi.sb_type == BLOCK_128X64)) ||
171 7150 : ((fbr & 1) &&
172 3598 : (mbmi->block_mi.sb_type == BLOCK_128X128 || mbmi->block_mi.sb_type == BLOCK_64X128)))
173 5725 : continue;
174 7147 : if (mbmi->block_mi.sb_type == BLOCK_128X128 || mbmi->block_mi.sb_type == BLOCK_128X64 ||
175 7185 : mbmi->block_mi.sb_type == BLOCK_64X128)
176 0 : bs = mbmi->block_mi.sb_type;
177 :
178 7147 : if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
179 0 : nhb = AOMMIN(MI_SIZE_128X128, cm->mi_cols - MI_SIZE_64X64 * fbc);
180 0 : hb_step = 2;
181 : }
182 7147 : if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
183 0 : nvb = AOMMIN(MI_SIZE_128X128, cm->mi_rows - MI_SIZE_64X64 * fbr);
184 0 : vb_step = 2;
185 : }
186 :
187 : // No filtering if the entire filter block is skipped
188 7147 : if (eb_sb_all_skip(picture_control_set_ptr, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64))
189 5725 : continue;
190 :
191 1419 : cdef_count = eb_sb_compute_cdef_list(picture_control_set_ptr, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs);
192 :
193 5795 : for (pli = 0; pli < num_planes; pli++) {
194 65011400 : for (int i = 0; i < CDEF_INBUF_SIZE; i++)
195 65007100 : inbuf[i] = CDEF_VERY_LARGE;
196 :
197 4303 : int32_t yoff = CDEF_VBORDER * (fbr != 0);
198 4303 : int32_t xoff = CDEF_HBORDER * (fbc != 0);
199 4303 : int32_t ysize = (nvb << mi_high_l2[pli]) + CDEF_VBORDER * ((int32_t)fbr + vb_step < nvfb) + yoff;
200 4303 : int32_t xsize = (nhb << mi_wide_l2[pli]) + CDEF_HBORDER * ((int32_t)fbc + hb_step < nhfb) + xoff;
201 :
202 4303 : copy_sb8_16(
203 4303 : &in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
204 4303 : src[pli],
205 4303 : (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
206 4303 : (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
207 : stride_src[pli], ysize, xsize);
208 4300 : gi_step = get_cdef_gi_step(pPcs->cdef_filter_mode);
209 4296 : mid_gi = pPcs->cdf_ref_frame_strenght;
210 4296 : start_gi = pPcs->use_ref_frame_cdef_strength && pPcs->cdef_filter_mode == 1 ? (AOMMAX(0, mid_gi - gi_step)) : 0;
211 4296 : end_gi = pPcs->use_ref_frame_cdef_strength ? AOMMIN(total_strengths, mid_gi + gi_step) : pPcs->cdef_filter_mode == 1 ? 8 : total_strengths;
212 :
213 105186 : for (gi = start_gi; gi < end_gi; gi++) {
214 : int32_t threshold;
215 : uint64_t curr_mse;
216 : int32_t sec_strength;
217 100826 : threshold = gi / CDEF_SEC_STRENGTHS;
218 100826 : if (fast) threshold = priconv[threshold];
219 : /* We avoid filtering the pixels for which some of the pixels to
220 : average are outside the frame. We could change the filter instead, but it would add special cases for any future vectorization. */
221 100826 : sec_strength = gi % CDEF_SEC_STRENGTHS;
222 :
223 100826 : eb_cdef_filter_fb(tmp_dst, NULL, CDEF_BSTRIDE, in, xdec[pli], ydec[pli],
224 : dir, &dirinit, var, pli, dlist, cdef_count, threshold,
225 100826 : sec_strength + (sec_strength == 3), pri_damping,
226 : sec_damping, coeff_shift);
227 :
228 :
229 100754 : curr_mse = eb_compute_cdef_dist_8bit(
230 100754 : ref_coeff[pli] +
231 100754 : (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride_ref[pli] +
232 100754 : (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]),
233 100754 : stride_ref[pli], tmp_dst, dlist, cdef_count, (BlockSize)bsize[pli], coeff_shift,
234 : pli);
235 :
236 100890 : if (pli < 2)
237 67252 : picture_control_set_ptr->mse_seg[pli][fbr*nhfb + fbc][gi] = curr_mse;
238 : else
239 33638 : picture_control_set_ptr->mse_seg[1][fbr*nhfb + fbc][gi] += curr_mse;
240 : }
241 :
242 : //if (pPcs->picture_number == 15)
243 : // printf(" bs:%i count:%i mse:%I64i\n", bs, cdef_count,picture_control_set_ptr->mse_seg[0][fbr*nhfb + fbc][4]);
244 : }
245 : }
246 : }
247 7238 : }
248 0 : void cdef_seg_search16bit(
249 : PictureControlSet *picture_control_set_ptr,
250 : SequenceControlSet *sequence_control_set_ptr,
251 : uint32_t segment_index)
252 : {
253 0 : EbPictureBufferDesc *input_pic_ptr = picture_control_set_ptr->input_frame16bit;
254 0 : EbPictureBufferDesc *recon_pic_ptr =
255 0 : (picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_TRUE) ?
256 0 : ((EbReferenceObject*)picture_control_set_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture16bit :
257 : picture_control_set_ptr->recon_picture16bit_ptr;
258 :
259 0 : struct PictureParentControlSet *pPcs = picture_control_set_ptr->parent_pcs_ptr;
260 0 : FrameHeader *frm_hdr = &pPcs->frm_hdr;
261 0 : Av1Common* cm = picture_control_set_ptr->parent_pcs_ptr->av1_cm;
262 : uint32_t x_seg_idx;
263 : uint32_t y_seg_idx;
264 0 : uint32_t picture_width_in_b64 = (sequence_control_set_ptr->seq_header.max_frame_width + 64 - 1) / 64;
265 0 : uint32_t picture_height_in_b64 = (sequence_control_set_ptr->seq_header.max_frame_height + 64 - 1) / 64;
266 0 : SEGMENT_CONVERT_IDX_TO_XY(segment_index, x_seg_idx, y_seg_idx, picture_control_set_ptr->cdef_segments_column_count);
267 0 : uint32_t x_b64_start_idx = SEGMENT_START_IDX(x_seg_idx, picture_width_in_b64, picture_control_set_ptr->cdef_segments_column_count);
268 0 : uint32_t x_b64_end_idx = SEGMENT_END_IDX(x_seg_idx, picture_width_in_b64, picture_control_set_ptr->cdef_segments_column_count);
269 0 : uint32_t y_b64_start_idx = SEGMENT_START_IDX(y_seg_idx, picture_height_in_b64, picture_control_set_ptr->cdef_segments_row_count);
270 0 : uint32_t y_b64_end_idx = SEGMENT_END_IDX(y_seg_idx, picture_height_in_b64, picture_control_set_ptr->cdef_segments_row_count);
271 :
272 0 : int32_t fast = 0;
273 0 : int32_t mi_rows = pPcs->av1_cm->mi_rows;
274 0 : int32_t mi_cols = pPcs->av1_cm->mi_cols;
275 :
276 : uint32_t fbr, fbc;
277 : uint16_t *src[3];
278 : uint16_t *ref_coeff[3];
279 : cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
280 0 : int32_t dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
281 0 : int32_t var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
282 : int32_t stride_src[3];
283 : int32_t stride_ref[3];
284 : int32_t bsize[3];
285 : int32_t mi_wide_l2[3];
286 : int32_t mi_high_l2[3];
287 : int32_t xdec[3];
288 : int32_t ydec[3];
289 : int32_t pli;
290 : int32_t cdef_count;
291 0 : int32_t coeff_shift = AOMMAX(sequence_control_set_ptr->static_config.encoder_bit_depth - 8, 0);
292 0 : int32_t nvfb = (mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
293 0 : int32_t nhfb = (mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
294 0 : int32_t pri_damping = 3 + (frm_hdr->quantization_params.base_q_idx >> 6);
295 0 : int32_t sec_damping = 3 + (frm_hdr->quantization_params.base_q_idx >> 6);
296 :
297 0 : const int32_t num_planes = 3;
298 0 : const int32_t total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
299 : DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
300 : uint16_t *in;
301 : DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
302 : int32_t gi_step;
303 : int32_t mid_gi;
304 : int32_t start_gi;
305 : int32_t end_gi;
306 :
307 0 : for (pli = 0; pli < num_planes; pli++) {
308 0 : int32_t subsampling_x = (pli == 0) ? 0 : 1;
309 0 : int32_t subsampling_y = (pli == 0) ? 0 : 1;
310 0 : xdec[pli] = subsampling_x;
311 0 : ydec[pli] = subsampling_y;
312 0 : bsize[pli] = ydec[pli] ? (xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
313 0 : : (xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
314 :
315 0 : mi_wide_l2[pli] = MI_SIZE_LOG2 - subsampling_x;
316 0 : mi_high_l2[pli] = MI_SIZE_LOG2 - subsampling_y;
317 :
318 0 : src[pli] = picture_control_set_ptr->src[pli];
319 0 : ref_coeff[pli] = picture_control_set_ptr->ref_coeff[pli];
320 0 : stride_src[pli] = pli == 0 ? recon_pic_ptr->stride_y : (pli == 1 ? recon_pic_ptr->stride_cb : recon_pic_ptr->stride_cr);
321 0 : stride_ref[pli] = pli == 0 ? input_pic_ptr->stride_y : (pli == 1 ? input_pic_ptr->stride_cb : input_pic_ptr->stride_cr);
322 : }
323 :
324 0 : in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
325 :
326 0 : for (fbr = y_b64_start_idx; fbr < y_b64_end_idx; ++fbr) {
327 0 : for (fbc = x_b64_start_idx; fbc < x_b64_end_idx; ++fbc) {
328 : int32_t nvb, nhb;
329 : int32_t gi;
330 0 : int32_t dirinit = 0;
331 0 : nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
332 0 : nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
333 0 : int32_t hb_step = 1; //these should be all time with 64x64 LCUs
334 0 : int32_t vb_step = 1;
335 0 : BlockSize bs = BLOCK_64X64;
336 0 : ModeInfo **mi = picture_control_set_ptr->mi_grid_base + MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc;
337 0 : const MbModeInfo *mbmi = &mi[0]->mbmi;
338 :
339 0 : if (((fbc & 1) &&
340 0 : (mbmi->block_mi.sb_type == BLOCK_128X128 || mbmi->block_mi.sb_type == BLOCK_128X64)) ||
341 0 : ((fbr & 1) &&
342 0 : (mbmi->block_mi.sb_type == BLOCK_128X128 || mbmi->block_mi.sb_type == BLOCK_64X128)))
343 0 : continue;
344 0 : if (mbmi->block_mi.sb_type == BLOCK_128X128 || mbmi->block_mi.sb_type == BLOCK_128X64 ||
345 0 : mbmi->block_mi.sb_type == BLOCK_64X128)
346 0 : bs = mbmi->block_mi.sb_type;
347 0 : if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
348 0 : nhb = AOMMIN(MI_SIZE_128X128, cm->mi_cols - MI_SIZE_64X64 * fbc);
349 0 : hb_step = 2;
350 : }
351 0 : if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
352 0 : nvb = AOMMIN(MI_SIZE_128X128, cm->mi_rows - MI_SIZE_64X64 * fbr);
353 0 : vb_step = 2;
354 : }
355 :
356 : // No filtering if the entire filter block is skipped
357 0 : if (eb_sb_all_skip(picture_control_set_ptr, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64))
358 0 : continue;
359 :
360 0 : cdef_count = eb_sb_compute_cdef_list(picture_control_set_ptr, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs);
361 :
362 0 : for (pli = 0; pli < num_planes; pli++) {
363 0 : for (int i = 0; i < CDEF_INBUF_SIZE; i++)
364 0 : inbuf[i] = CDEF_VERY_LARGE;
365 :
366 0 : int32_t yoff = CDEF_VBORDER * (fbr != 0);
367 0 : int32_t xoff = CDEF_HBORDER * (fbc != 0);
368 0 : int32_t ysize = (nvb << mi_high_l2[pli]) + CDEF_VBORDER * ((int32_t)fbr + vb_step < nvfb) + yoff;
369 0 : int32_t xsize = (nhb << mi_wide_l2[pli]) + CDEF_HBORDER * ((int32_t)fbc + hb_step < nhfb) + xoff;
370 :
371 0 : copy_sb16_16(
372 0 : &in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
373 0 : src[pli],
374 0 : (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
375 0 : (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
376 : stride_src[pli], ysize, xsize);
377 0 : gi_step = get_cdef_gi_step(pPcs->cdef_filter_mode);
378 0 : mid_gi = pPcs->cdf_ref_frame_strenght;
379 0 : start_gi = pPcs->use_ref_frame_cdef_strength && pPcs->cdef_filter_mode == 1 ? (AOMMAX(0, mid_gi - gi_step)) : 0;
380 0 : end_gi = pPcs->use_ref_frame_cdef_strength ? AOMMIN(total_strengths, mid_gi + gi_step) : pPcs->cdef_filter_mode == 1 ? 8 : total_strengths;
381 :
382 0 : for (gi = start_gi; gi < end_gi; gi++) {
383 : int32_t threshold;
384 : uint64_t curr_mse;
385 : int32_t sec_strength;
386 0 : threshold = gi / CDEF_SEC_STRENGTHS;
387 0 : if (fast) threshold = priconv[threshold];
388 : /* We avoid filtering the pixels for which some of the pixels to
389 : average are outside the frame. We could change the filter instead, but it would add special cases for any future vectorization. */
390 0 : sec_strength = gi % CDEF_SEC_STRENGTHS;
391 :
392 0 : eb_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in, xdec[pli], ydec[pli],
393 : dir, &dirinit, var, pli, dlist, cdef_count, threshold,
394 0 : sec_strength + (sec_strength == 3), pri_damping,
395 : sec_damping, coeff_shift);
396 :
397 0 : curr_mse = eb_compute_cdef_dist(
398 0 : ref_coeff[pli] +
399 0 : (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride_ref[pli] +
400 0 : (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]),
401 0 : stride_ref[pli], tmp_dst, dlist, cdef_count, (BlockSize)bsize[pli], coeff_shift,
402 : pli);
403 :
404 0 : if (pli < 2)
405 0 : picture_control_set_ptr->mse_seg[pli][fbr*nhfb + fbc][gi] = curr_mse;
406 : else
407 0 : picture_control_set_ptr->mse_seg[1][fbr*nhfb + fbc][gi] += curr_mse;
408 : }
409 : }
410 : }
411 : }
412 0 : }
413 :
414 : /******************************************************
415 : * CDEF Kernel
416 : ******************************************************/
417 16 : void* cdef_kernel(void *input_ptr)
418 : {
419 : // Context & SCS & PCS
420 16 : CdefContext_t *context_ptr = (CdefContext_t*)input_ptr;
421 : PictureControlSet *picture_control_set_ptr;
422 : SequenceControlSet *sequence_control_set_ptr;
423 :
424 : FrameHeader *frm_hdr;
425 :
426 : //// Input
427 : EbObjectWrapper *dlf_results_wrapper_ptr;
428 : DlfResults *dlf_results_ptr;
429 :
430 : //// Output
431 : EbObjectWrapper *cdef_results_wrapper_ptr;
432 : CdefResults *cdef_results_ptr;
433 :
434 : // SB Loop variables
435 :
436 7200 : for (;;) {
437 : // Get DLF Results
438 7216 : eb_get_full_object(
439 : context_ptr->cdef_input_fifo_ptr,
440 : &dlf_results_wrapper_ptr);
441 :
442 7153 : dlf_results_ptr = (DlfResults*)dlf_results_wrapper_ptr->object_ptr;
443 7153 : picture_control_set_ptr = (PictureControlSet*)dlf_results_ptr->picture_control_set_wrapper_ptr->object_ptr;
444 7153 : sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
445 :
446 7153 : EbBool is16bit = (EbBool)(sequence_control_set_ptr->static_config.encoder_bit_depth > EB_8BIT);
447 7153 : Av1Common* cm = picture_control_set_ptr->parent_pcs_ptr->av1_cm;
448 7153 : frm_hdr = &picture_control_set_ptr->parent_pcs_ptr->frm_hdr;
449 7153 : int32_t selected_strength_cnt[64] = { 0 };
450 :
451 7153 : if (sequence_control_set_ptr->seq_header.enable_cdef && picture_control_set_ptr->parent_pcs_ptr->cdef_filter_mode)
452 : {
453 7155 : if (is16bit)
454 0 : cdef_seg_search16bit(
455 : picture_control_set_ptr,
456 : sequence_control_set_ptr,
457 : dlf_results_ptr->segment_index);
458 : else
459 7155 : cdef_seg_search(
460 : picture_control_set_ptr,
461 : sequence_control_set_ptr,
462 : dlf_results_ptr->segment_index);
463 : }
464 :
465 : //all seg based search is done. update total processed segments. if all done, finish the search and perfrom application.
466 7156 : eb_block_on_mutex(picture_control_set_ptr->cdef_search_mutex);
467 :
468 7200 : picture_control_set_ptr->tot_seg_searched_cdef++;
469 7200 : if (picture_control_set_ptr->tot_seg_searched_cdef == picture_control_set_ptr->cdef_segments_total_count)
470 : {
471 : // printf(" CDEF all seg here %i\n", picture_control_set_ptr->picture_number);
472 120 : if (sequence_control_set_ptr->seq_header.enable_cdef && picture_control_set_ptr->parent_pcs_ptr->cdef_filter_mode) {
473 120 : finish_cdef_search(
474 : 0,
475 : sequence_control_set_ptr,
476 : picture_control_set_ptr,
477 : selected_strength_cnt);
478 :
479 120 : if (sequence_control_set_ptr->seq_header.enable_restoration != 0 || picture_control_set_ptr->parent_pcs_ptr->is_used_as_reference_flag || sequence_control_set_ptr->static_config.recon_enabled){
480 90 : if (is16bit)
481 0 : av1_cdef_frame16bit(
482 : 0,
483 : sequence_control_set_ptr,
484 : picture_control_set_ptr);
485 : else
486 90 : eb_av1_cdef_frame(
487 : 0,
488 : sequence_control_set_ptr,
489 : picture_control_set_ptr);
490 : }
491 : }
492 : else {
493 0 : frm_hdr->CDEF_params.cdef_bits = 0;
494 0 : frm_hdr->CDEF_params.cdef_y_strength[0] = 0;
495 0 : picture_control_set_ptr->parent_pcs_ptr->nb_cdef_strengths = 1;
496 0 : frm_hdr->CDEF_params.cdef_uv_strength[0] = 0;
497 : }
498 :
499 : //restoration prep
500 :
501 120 : if (sequence_control_set_ptr->seq_header.enable_restoration)
502 : {
503 60 : eb_av1_loop_restoration_save_boundary_lines(
504 60 : cm->frame_to_show,
505 : cm,
506 : 1);
507 :
508 : //are these still needed here?/!!!
509 60 : eb_extend_frame(cm->frame_to_show->buffers[0], cm->frame_to_show->crop_widths[0], cm->frame_to_show->crop_heights[0],
510 60 : cm->frame_to_show->strides[0], RESTORATION_BORDER, RESTORATION_BORDER, is16bit);
511 60 : eb_extend_frame(cm->frame_to_show->buffers[1], cm->frame_to_show->crop_widths[1], cm->frame_to_show->crop_heights[1],
512 60 : cm->frame_to_show->strides[1], RESTORATION_BORDER, RESTORATION_BORDER, is16bit);
513 60 : eb_extend_frame(cm->frame_to_show->buffers[2], cm->frame_to_show->crop_widths[1], cm->frame_to_show->crop_heights[1],
514 60 : cm->frame_to_show->strides[1], RESTORATION_BORDER, RESTORATION_BORDER, is16bit);
515 : }
516 :
517 120 : picture_control_set_ptr->rest_segments_column_count = sequence_control_set_ptr->rest_segment_column_count;
518 120 : picture_control_set_ptr->rest_segments_row_count = sequence_control_set_ptr->rest_segment_row_count;
519 120 : picture_control_set_ptr->rest_segments_total_count = (uint16_t)(picture_control_set_ptr->rest_segments_column_count * picture_control_set_ptr->rest_segments_row_count);
520 120 : picture_control_set_ptr->tot_seg_searched_rest = 0;
521 : uint32_t segment_index;
522 240 : for (segment_index = 0; segment_index < picture_control_set_ptr->rest_segments_total_count; ++segment_index)
523 : {
524 : // Get Empty Cdef Results to Rest
525 120 : eb_get_empty_object(
526 : context_ptr->cdef_output_fifo_ptr,
527 : &cdef_results_wrapper_ptr);
528 120 : cdef_results_ptr = (struct CdefResults*)cdef_results_wrapper_ptr->object_ptr;
529 120 : cdef_results_ptr->picture_control_set_wrapper_ptr = dlf_results_ptr->picture_control_set_wrapper_ptr;
530 120 : cdef_results_ptr->segment_index = segment_index;
531 : // Post Cdef Results
532 120 : eb_post_full_object(cdef_results_wrapper_ptr);
533 : }
534 : }
535 7200 : eb_release_mutex(picture_control_set_ptr->cdef_search_mutex);
536 :
537 : // Release Dlf Results
538 7199 : eb_release_object(dlf_results_wrapper_ptr);
539 : }
540 :
541 : return EB_NULL;
542 : }
|