Line data Source code
1 : /*
2 : * Copyright(c) 2019 Intel Corporation
3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 : */
5 :
6 : /*
7 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
8 : *
9 : * This source code is subject to the terms of the BSD 2 Clause License and
10 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
11 : * was not distributed with this source code in the LICENSE file, you can
12 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
13 : * Media Patent License 1.0 was not distributed with this source code in the
14 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
15 : */
16 :
17 : #include <string.h>
18 :
19 : #include "EbDefinitions.h"
20 : #include "EbUtility.h"
21 : #include "EbPictureControlSet.h"
22 : #include "EbCodingUnit.h"
23 : #include "EbSequenceControlSet.h"
24 : #include "EbReferenceObject.h"
25 : #include "EbDeblockingFilter.h"
26 :
27 : #include "EbCommonUtils.h"
28 :
29 : #define convertToChromaQp(iQpY) ( ((iQpY) < 0) ? (iQpY) : (((iQpY) > 57) ? ((iQpY)-6) : (int32_t)(map_chroma_qp((uint32_t)iQpY))) )
30 :
31 : static const int delta_lf_id_lut[MAX_MB_PLANE][2] = { { 0, 1 },
32 : { 2, 2 },
33 : { 3, 3 } };
34 :
35 : static const SEG_LVL_FEATURES seg_lvl_lf_lut[MAX_MB_PLANE][2] = {
36 : { SEG_LVL_ALT_LF_Y_V, SEG_LVL_ALT_LF_Y_H },
37 : { SEG_LVL_ALT_LF_U, SEG_LVL_ALT_LF_U },
38 : { SEG_LVL_ALT_LF_V, SEG_LVL_ALT_LF_V }
39 : };
40 :
41 : /** setQpArrayBasedOnCU()
42 : is used to set qp in the qp_array on a CU basis.
43 : */
44 0 : void set_qp_array_based_on_cu(
45 : PictureControlSet *picture_control_set_ptr, //input parameter
46 : uint32_t cuPos_x, //input parameter, sample-based horizontal picture-wise locatin of the CU
47 : uint32_t cuPos_y, //input parameter, sample-based vertical picture-wise locatin of the CU
48 : uint32_t cu_size_in_min_cu_size, //input parameter
49 : uint32_t cu_qp) //input parameter, Qp of the CU
50 : {
51 : uint32_t verticalIdx;
52 0 : uint32_t qpArrayIdx = (cuPos_y / MIN_BLOCK_SIZE) * picture_control_set_ptr->qp_array_stride + (cuPos_x / MIN_BLOCK_SIZE);
53 :
54 0 : for (verticalIdx = 0; verticalIdx < cu_size_in_min_cu_size; ++verticalIdx) {
55 0 : EB_MEMSET(picture_control_set_ptr->qp_array + qpArrayIdx + verticalIdx * picture_control_set_ptr->qp_array_stride,
56 : cu_qp, sizeof(uint8_t)*cu_size_in_min_cu_size);
57 : }
58 :
59 0 : return;
60 : }
61 :
62 0 : static INLINE int8_t signed_char_clamp(int32_t t) {
63 0 : return (int8_t)clamp(t, -128, 127);
64 : }
65 :
66 0 : static INLINE int16_t signed_char_clamp_high(int32_t t, int32_t bd) {
67 0 : switch (bd) {
68 0 : case 10: return (int16_t)clamp(t, -128 * 4, 128 * 4 - 1);
69 0 : case 12: return (int16_t)clamp(t, -128 * 16, 128 * 16 - 1);
70 0 : case 8:
71 0 : default: return (int16_t)clamp(t, -128, 128 - 1);
72 : }
73 : }
74 :
75 : // should we apply any filter at all: 11111111 yes, 00000000 no
76 0 : static INLINE int8_t filter_mask2(uint8_t limit, uint8_t blimit, uint8_t p1,
77 : uint8_t p0, uint8_t q0, uint8_t q1) {
78 0 : int8_t mask = 0;
79 0 : mask |= (abs(p1 - p0) > limit) * -1;
80 0 : mask |= (abs(q1 - q0) > limit) * -1;
81 0 : mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
82 0 : return ~mask;
83 : }
84 :
85 0 : static INLINE int8_t filter_mask(uint8_t limit, uint8_t blimit, uint8_t p3,
86 : uint8_t p2, uint8_t p1, uint8_t p0, uint8_t q0,
87 : uint8_t q1, uint8_t q2, uint8_t q3) {
88 0 : int8_t mask = 0;
89 0 : mask |= (abs(p3 - p2) > limit) * -1;
90 0 : mask |= (abs(p2 - p1) > limit) * -1;
91 0 : mask |= (abs(p1 - p0) > limit) * -1;
92 0 : mask |= (abs(q1 - q0) > limit) * -1;
93 0 : mask |= (abs(q2 - q1) > limit) * -1;
94 0 : mask |= (abs(q3 - q2) > limit) * -1;
95 0 : mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
96 0 : return ~mask;
97 : }
98 :
99 0 : static INLINE int8_t filter_mask3_chroma(uint8_t limit, uint8_t blimit,
100 : uint8_t p2, uint8_t p1, uint8_t p0,
101 : uint8_t q0, uint8_t q1, uint8_t q2) {
102 0 : int8_t mask = 0;
103 0 : mask |= (abs(p2 - p1) > limit) * -1;
104 0 : mask |= (abs(p1 - p0) > limit) * -1;
105 0 : mask |= (abs(q1 - q0) > limit) * -1;
106 0 : mask |= (abs(q2 - q1) > limit) * -1;
107 0 : mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
108 0 : return ~mask;
109 : }
110 :
111 0 : static INLINE int8_t flat_mask3_chroma(uint8_t thresh, uint8_t p2, uint8_t p1,
112 : uint8_t p0, uint8_t q0, uint8_t q1,
113 : uint8_t q2) {
114 0 : int8_t mask = 0;
115 0 : mask |= (abs(p1 - p0) > thresh) * -1;
116 0 : mask |= (abs(q1 - q0) > thresh) * -1;
117 0 : mask |= (abs(p2 - p0) > thresh) * -1;
118 0 : mask |= (abs(q2 - q0) > thresh) * -1;
119 0 : return ~mask;
120 : }
121 :
122 0 : static INLINE int8_t flat_mask4(uint8_t thresh, uint8_t p3, uint8_t p2,
123 : uint8_t p1, uint8_t p0, uint8_t q0, uint8_t q1,
124 : uint8_t q2, uint8_t q3) {
125 0 : int8_t mask = 0;
126 0 : mask |= (abs(p1 - p0) > thresh) * -1;
127 0 : mask |= (abs(q1 - q0) > thresh) * -1;
128 0 : mask |= (abs(p2 - p0) > thresh) * -1;
129 0 : mask |= (abs(q2 - q0) > thresh) * -1;
130 0 : mask |= (abs(p3 - p0) > thresh) * -1;
131 0 : mask |= (abs(q3 - q0) > thresh) * -1;
132 0 : return ~mask;
133 : }
134 :
135 : // is there high edge variance internal edge: 11111111 yes, 00000000 no
136 0 : static INLINE int8_t hev_mask(uint8_t thresh, uint8_t p1, uint8_t p0,
137 : uint8_t q0, uint8_t q1) {
138 0 : int8_t hev = 0;
139 0 : hev |= (abs(p1 - p0) > thresh) * -1;
140 0 : hev |= (abs(q1 - q0) > thresh) * -1;
141 0 : return hev;
142 : }
143 :
144 0 : static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1,
145 : uint8_t *op0, uint8_t *oq0, uint8_t *oq1) {
146 : int8_t filter1, filter2;
147 :
148 0 : const int8_t ps1 = (int8_t)*op1 ^ 0x80;
149 0 : const int8_t ps0 = (int8_t)*op0 ^ 0x80;
150 0 : const int8_t qs0 = (int8_t)*oq0 ^ 0x80;
151 0 : const int8_t qs1 = (int8_t)*oq1 ^ 0x80;
152 0 : const uint8_t hev = hev_mask(thresh, *op1, *op0, *oq0, *oq1);
153 :
154 : // add outer taps if we have high edge variance
155 0 : int8_t filter = signed_char_clamp(ps1 - qs1) & hev;
156 :
157 : // inner taps
158 0 : filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask;
159 :
160 : // save bottom 3 bits so that we round one side +4 and the other +3
161 : // if it equals 4 we'll set to adjust by -1 to account for the fact
162 : // we'd round 3 the other way
163 0 : filter1 = signed_char_clamp(filter + 4) >> 3;
164 0 : filter2 = signed_char_clamp(filter + 3) >> 3;
165 :
166 0 : *oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80;
167 0 : *op0 = signed_char_clamp(ps0 + filter2) ^ 0x80;
168 :
169 : // outer tap adjustments
170 0 : filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev;
171 :
172 0 : *oq1 = signed_char_clamp(qs1 - filter) ^ 0x80;
173 0 : *op1 = signed_char_clamp(ps1 + filter) ^ 0x80;
174 0 : }
175 :
176 0 : void aom_lpf_horizontal_4_c(uint8_t *s, int32_t p /* pitch */,
177 : const uint8_t *blimit, const uint8_t *limit,
178 : const uint8_t *thresh) {
179 : int32_t i;
180 0 : int32_t count = 4;
181 :
182 : // loop filter designed to work using chars so that we can make maximum use
183 : // of 8 bit simd instructions.
184 0 : for (i = 0; i < count; ++i) {
185 0 : const uint8_t p1 = s[-2 * p], p0 = s[-p];
186 0 : const uint8_t q0 = s[0 * p], q1 = s[1 * p];
187 0 : const int8_t mask = filter_mask2(*limit, *blimit, p1, p0, q0, q1);
188 0 : filter4(mask, *thresh, s - 2 * p, s - 1 * p, s, s + 1 * p);
189 0 : ++s;
190 : }
191 0 : }
192 :
193 0 : void aom_lpf_vertical_4_c(uint8_t *s, int32_t pitch, const uint8_t *blimit,
194 : const uint8_t *limit, const uint8_t *thresh) {
195 : int32_t i;
196 0 : int32_t count = 4;
197 :
198 : // loop filter designed to work using chars so that we can make maximum use
199 : // of 8 bit simd instructions.
200 0 : for (i = 0; i < count; ++i) {
201 0 : const uint8_t p1 = s[-2], p0 = s[-1];
202 0 : const uint8_t q0 = s[0], q1 = s[1];
203 0 : const int8_t mask = filter_mask2(*limit, *blimit, p1, p0, q0, q1);
204 0 : filter4(mask, *thresh, s - 2, s - 1, s, s + 1);
205 0 : s += pitch;
206 : }
207 0 : }
208 :
209 0 : void aom_lpf_vertical_4_dual_c(uint8_t *s, int32_t pitch, const uint8_t *blimit0,
210 : const uint8_t *limit0, const uint8_t *thresh0,
211 : const uint8_t *blimit1, const uint8_t *limit1,
212 : const uint8_t *thresh1) {
213 0 : aom_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0);
214 0 : aom_lpf_vertical_4_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1);
215 0 : }
216 :
217 0 : static INLINE void filter6(int8_t mask, uint8_t thresh, int8_t flat,
218 : uint8_t *op2, uint8_t *op1, uint8_t *op0,
219 : uint8_t *oq0, uint8_t *oq1, uint8_t *oq2) {
220 0 : if (flat && mask) {
221 0 : const uint8_t p2 = *op2, p1 = *op1, p0 = *op0;
222 0 : const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2;
223 :
224 : // 5-tap filter [1, 2, 2, 2, 1]
225 0 : *op1 = ROUND_POWER_OF_TWO(p2 * 3 + p1 * 2 + p0 * 2 + q0, 3);
226 0 : *op0 = ROUND_POWER_OF_TWO(p2 + p1 * 2 + p0 * 2 + q0 * 2 + q1, 3);
227 0 : *oq0 = ROUND_POWER_OF_TWO(p1 + p0 * 2 + q0 * 2 + q1 * 2 + q2, 3);
228 0 : *oq1 = ROUND_POWER_OF_TWO(p0 + q0 * 2 + q1 * 2 + q2 * 3, 3);
229 : }
230 : else
231 0 : filter4(mask, thresh, op1, op0, oq0, oq1);
232 0 : }
233 :
234 0 : static INLINE void filter8(int8_t mask, uint8_t thresh, int8_t flat,
235 : uint8_t *op3, uint8_t *op2, uint8_t *op1,
236 : uint8_t *op0, uint8_t *oq0, uint8_t *oq1,
237 : uint8_t *oq2, uint8_t *oq3) {
238 0 : if (flat && mask) {
239 0 : const uint8_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;
240 0 : const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3;
241 :
242 : // 7-tap filter [1, 1, 1, 2, 1, 1, 1]
243 0 : *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0, 3);
244 0 : *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1, 3);
245 0 : *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2, 3);
246 0 : *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3, 3);
247 0 : *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3, 3);
248 0 : *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3, 3);
249 : }
250 : else
251 0 : filter4(mask, thresh, op1, op0, oq0, oq1);
252 0 : }
253 :
254 0 : void aom_lpf_horizontal_6_c(uint8_t *s, int32_t p, const uint8_t *blimit,
255 : const uint8_t *limit, const uint8_t *thresh) {
256 : int32_t i;
257 0 : int32_t count = 4;
258 :
259 : // loop filter designed to work using chars so that we can make maximum use
260 : // of 8 bit simd instructions.
261 0 : for (i = 0; i < count; ++i) {
262 0 : const uint8_t p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
263 0 : const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p];
264 :
265 : const int8_t mask =
266 0 : filter_mask3_chroma(*limit, *blimit, p2, p1, p0, q0, q1, q2);
267 0 : const int8_t flat = flat_mask3_chroma(1, p2, p1, p0, q0, q1, q2);
268 0 : filter6(mask, *thresh, flat, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p,
269 0 : s + 2 * p);
270 0 : ++s;
271 : }
272 0 : }
273 :
274 0 : void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit,
275 : const uint8_t *limit, const uint8_t *thresh)
276 : {
277 : int i;
278 0 : int count = 4;
279 :
280 0 : for (i = 0; i < count; ++i) {
281 0 : const uint8_t p2 = s[-3], p1 = s[-2], p0 = s[-1];
282 0 : const uint8_t q0 = s[0], q1 = s[1], q2 = s[2];
283 : const int8_t mask =
284 0 : filter_mask3_chroma(*limit, *blimit, p2, p1, p0, q0, q1, q2);
285 0 : const int8_t flat = flat_mask3_chroma(1, p2, p1, p0, q0, q1, q2);
286 0 : filter6(mask, *thresh, flat, s - 3, s - 2, s - 1, s, s + 1, s + 2);
287 0 : s += pitch;
288 : }
289 0 : }
290 :
291 0 : void aom_lpf_horizontal_8_c(uint8_t *s, int32_t p, const uint8_t *blimit,
292 : const uint8_t *limit, const uint8_t *thresh) {
293 : int32_t i;
294 0 : int32_t count = 4;
295 :
296 : // loop filter designed to work using chars so that we can make maximum use
297 : // of 8 bit simd instructions.
298 0 : for (i = 0; i < count; ++i) {
299 0 : const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
300 0 : const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
301 :
302 : const int8_t mask =
303 0 : filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3);
304 0 : const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
305 0 : filter8(mask, *thresh, flat, s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, s,
306 0 : s + 1 * p, s + 2 * p, s + 3 * p);
307 0 : ++s;
308 : }
309 0 : }
310 :
311 0 : void aom_lpf_horizontal_8_dual_c(uint8_t *s, int32_t p, const uint8_t *blimit0,
312 : const uint8_t *limit0, const uint8_t *thresh0,
313 : const uint8_t *blimit1, const uint8_t *limit1,
314 : const uint8_t *thresh1) {
315 0 : aom_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0);
316 0 : aom_lpf_horizontal_8_c(s + 4, p, blimit1, limit1, thresh1);
317 0 : }
318 :
319 0 : void aom_lpf_vertical_8_c(uint8_t *s, int32_t pitch, const uint8_t *blimit,
320 : const uint8_t *limit, const uint8_t *thresh) {
321 : int32_t i;
322 0 : int32_t count = 4;
323 :
324 0 : for (i = 0; i < count; ++i) {
325 0 : const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
326 0 : const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
327 : const int8_t mask =
328 0 : filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3);
329 0 : const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
330 0 : filter8(mask, *thresh, flat, s - 4, s - 3, s - 2, s - 1, s, s + 1, s + 2,
331 : s + 3);
332 0 : s += pitch;
333 : }
334 0 : }
335 :
336 0 : void aom_lpf_vertical_8_dual_c(uint8_t *s, int32_t pitch, const uint8_t *blimit0,
337 : const uint8_t *limit0, const uint8_t *thresh0,
338 : const uint8_t *blimit1, const uint8_t *limit1,
339 : const uint8_t *thresh1) {
340 0 : aom_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0);
341 0 : aom_lpf_vertical_8_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1);
342 0 : }
343 :
344 0 : static INLINE void filter14(int8_t mask, uint8_t thresh, int8_t flat,
345 : int8_t flat2, uint8_t *op6, uint8_t *op5,
346 : uint8_t *op4, uint8_t *op3, uint8_t *op2,
347 : uint8_t *op1, uint8_t *op0, uint8_t *oq0,
348 : uint8_t *oq1, uint8_t *oq2, uint8_t *oq3,
349 : uint8_t *oq4, uint8_t *oq5, uint8_t *oq6) {
350 0 : if (flat2 && flat && mask) {
351 0 : const uint8_t p6 = *op6, p5 = *op5, p4 = *op4, p3 = *op3, p2 = *op2,
352 0 : p1 = *op1, p0 = *op0;
353 0 : const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3, q4 = *oq4,
354 0 : q5 = *oq5, q6 = *oq6;
355 :
356 : // 13-tap filter [1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1]
357 0 : *op5 = ROUND_POWER_OF_TWO(p6 * 7 + p5 * 2 + p4 * 2 + p3 + p2 + p1 + p0 + q0,
358 : 4);
359 0 : *op4 = ROUND_POWER_OF_TWO(
360 : p6 * 5 + p5 * 2 + p4 * 2 + p3 * 2 + p2 + p1 + p0 + q0 + q1, 4);
361 0 : *op3 = ROUND_POWER_OF_TWO(
362 : p6 * 4 + p5 + p4 * 2 + p3 * 2 + p2 * 2 + p1 + p0 + q0 + q1 + q2, 4);
363 0 : *op2 = ROUND_POWER_OF_TWO(
364 : p6 * 3 + p5 + p4 + p3 * 2 + p2 * 2 + p1 * 2 + p0 + q0 + q1 + q2 + q3,
365 : 4);
366 0 : *op1 = ROUND_POWER_OF_TWO(p6 * 2 + p5 + p4 + p3 + p2 * 2 + p1 * 2 + p0 * 2 +
367 : q0 + q1 + q2 + q3 + q4,
368 : 4);
369 0 : *op0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 * 2 +
370 : q0 * 2 + q1 + q2 + q3 + q4 + q5,
371 : 4);
372 0 : *oq0 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 * 2 + q0 * 2 +
373 : q1 * 2 + q2 + q3 + q4 + q5 + q6,
374 : 4);
375 0 : *oq1 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + q0 * 2 + q1 * 2 +
376 : q2 * 2 + q3 + q4 + q5 + q6 * 2,
377 : 4);
378 0 : *oq2 = ROUND_POWER_OF_TWO(
379 : p3 + p2 + p1 + p0 + q0 + q1 * 2 + q2 * 2 + q3 * 2 + q4 + q5 + q6 * 3,
380 : 4);
381 0 : *oq3 = ROUND_POWER_OF_TWO(
382 : p2 + p1 + p0 + q0 + q1 + q2 * 2 + q3 * 2 + q4 * 2 + q5 + q6 * 4, 4);
383 0 : *oq4 = ROUND_POWER_OF_TWO(
384 : p1 + p0 + q0 + q1 + q2 + q3 * 2 + q4 * 2 + q5 * 2 + q6 * 5, 4);
385 0 : *oq5 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q3 + q4 * 2 + q5 * 2 + q6 * 7,
386 : 4);
387 : }
388 : else
389 0 : filter8(mask, thresh, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3);
390 0 : }
391 :
392 0 : static void mb_lpf_vertical_edge_w(uint8_t *s, int32_t p, const uint8_t *blimit,
393 : const uint8_t *limit, const uint8_t *thresh,
394 : int32_t count) {
395 : int32_t i;
396 :
397 0 : for (i = 0; i < count; ++i) {
398 0 : const uint8_t p6 = s[-7], p5 = s[-6], p4 = s[-5], p3 = s[-4], p2 = s[-3],
399 0 : p1 = s[-2], p0 = s[-1];
400 0 : const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3], q4 = s[4],
401 0 : q5 = s[5], q6 = s[6];
402 : const int8_t mask =
403 0 : filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3);
404 0 : const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
405 0 : const int8_t flat2 = flat_mask4(1, p6, p5, p4, p0, q0, q4, q5, q6);
406 :
407 0 : filter14(mask, *thresh, flat, flat2, s - 7, s - 6, s - 5, s - 4, s - 3,
408 : s - 2, s - 1, s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6);
409 0 : s += p;
410 : }
411 0 : }
412 :
413 0 : void aom_lpf_vertical_14_dual_c(uint8_t *s, int32_t p, const uint8_t *blimit,
414 : const uint8_t *limit, const uint8_t *thresh) {
415 0 : mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
416 0 : }
417 :
418 : // Should we apply any filter at all: 11111111 yes, 00000000 no ?
419 0 : static INLINE int8_t highbd_filter_mask2(uint8_t limit, uint8_t blimit,
420 : uint16_t p1, uint16_t p0, uint16_t q0,
421 : uint16_t q1, int32_t bd) {
422 0 : int8_t mask = 0;
423 0 : int16_t limit16 = (uint16_t)limit << (bd - 8);
424 0 : int16_t blimit16 = (uint16_t)blimit << (bd - 8);
425 0 : mask |= (abs(p1 - p0) > limit16) * -1;
426 0 : mask |= (abs(q1 - q0) > limit16) * -1;
427 0 : mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit16) * -1;
428 0 : return ~mask;
429 : }
430 :
431 : // Should we apply any filter at all: 11111111 yes, 00000000 no ?
432 0 : static INLINE int8_t highbd_filter_mask(uint8_t limit, uint8_t blimit,
433 : uint16_t p3, uint16_t p2, uint16_t p1,
434 : uint16_t p0, uint16_t q0, uint16_t q1,
435 : uint16_t q2, uint16_t q3, int32_t bd) {
436 0 : int8_t mask = 0;
437 0 : int16_t limit16 = (uint16_t)limit << (bd - 8);
438 0 : int16_t blimit16 = (uint16_t)blimit << (bd - 8);
439 0 : mask |= (abs(p3 - p2) > limit16) * -1;
440 0 : mask |= (abs(p2 - p1) > limit16) * -1;
441 0 : mask |= (abs(p1 - p0) > limit16) * -1;
442 0 : mask |= (abs(q1 - q0) > limit16) * -1;
443 0 : mask |= (abs(q2 - q1) > limit16) * -1;
444 0 : mask |= (abs(q3 - q2) > limit16) * -1;
445 0 : mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit16) * -1;
446 0 : return ~mask;
447 : }
448 :
449 0 : static INLINE int8_t highbd_flat_mask4(uint8_t thresh, uint16_t p3, uint16_t p2,
450 : uint16_t p1, uint16_t p0, uint16_t q0,
451 : uint16_t q1, uint16_t q2, uint16_t q3,
452 : int32_t bd) {
453 0 : int8_t mask = 0;
454 0 : int16_t thresh16 = (uint16_t)thresh << (bd - 8);
455 0 : mask |= (abs(p1 - p0) > thresh16) * -1;
456 0 : mask |= (abs(q1 - q0) > thresh16) * -1;
457 0 : mask |= (abs(p2 - p0) > thresh16) * -1;
458 0 : mask |= (abs(q2 - q0) > thresh16) * -1;
459 0 : mask |= (abs(p3 - p0) > thresh16) * -1;
460 0 : mask |= (abs(q3 - q0) > thresh16) * -1;
461 0 : return ~mask;
462 : }
463 :
464 : // Is there high edge variance internal edge:
465 : // 11111111_11111111 yes, 00000000_00000000 no ?
466 0 : static INLINE int16_t highbd_hev_mask(uint8_t thresh, uint16_t p1, uint16_t p0,
467 : uint16_t q0, uint16_t q1, int32_t bd) {
468 0 : int16_t hev = 0;
469 0 : int16_t thresh16 = (uint16_t)thresh << (bd - 8);
470 0 : hev |= (abs(p1 - p0) > thresh16) * -1;
471 0 : hev |= (abs(q1 - q0) > thresh16) * -1;
472 0 : return hev;
473 : }
474 :
475 0 : static INLINE void highbd_filter4(int8_t mask, uint8_t thresh, uint16_t *op1,
476 : uint16_t *op0, uint16_t *oq0, uint16_t *oq1,
477 : int32_t bd) {
478 : int16_t filter1, filter2;
479 : // ^0x80 equivalent to subtracting 0x80 from the values to turn them
480 : // into -128 to +127 instead of 0 to 255.
481 0 : int32_t shift = bd - 8;
482 0 : const int16_t ps1 = (int16_t)*op1 - (0x80 << shift);
483 0 : const int16_t ps0 = (int16_t)*op0 - (0x80 << shift);
484 0 : const int16_t qs0 = (int16_t)*oq0 - (0x80 << shift);
485 0 : const int16_t qs1 = (int16_t)*oq1 - (0x80 << shift);
486 0 : const uint16_t hev = highbd_hev_mask(thresh, *op1, *op0, *oq0, *oq1, bd);
487 :
488 : // Add outer taps if we have high edge variance.
489 0 : int16_t filter = signed_char_clamp_high(ps1 - qs1, bd) & hev;
490 :
491 : // Inner taps.
492 0 : filter = signed_char_clamp_high(filter + 3 * (qs0 - ps0), bd) & mask;
493 :
494 : // Save bottom 3 bits so that we round one side +4 and the other +3
495 : // if it equals 4 we'll set to adjust by -1 to account for the fact
496 : // we'd round 3 the other way.
497 0 : filter1 = signed_char_clamp_high(filter + 4, bd) >> 3;
498 0 : filter2 = signed_char_clamp_high(filter + 3, bd) >> 3;
499 :
500 0 : *oq0 = signed_char_clamp_high(qs0 - filter1, bd) + (0x80 << shift);
501 0 : *op0 = signed_char_clamp_high(ps0 + filter2, bd) + (0x80 << shift);
502 :
503 : // Outer tap adjustments.
504 0 : filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev;
505 :
506 0 : *oq1 = signed_char_clamp_high(qs1 - filter, bd) + (0x80 << shift);
507 0 : *op1 = signed_char_clamp_high(ps1 + filter, bd) + (0x80 << shift);
508 0 : }
509 :
510 0 : void aom_highbd_lpf_horizontal_4_c(uint16_t *s, int32_t p /* pitch */,
511 : const uint8_t *blimit, const uint8_t *limit,
512 : const uint8_t *thresh, int32_t bd) {
513 : int32_t i;
514 0 : int32_t count = 4;
515 :
516 : // loop filter designed to work using chars so that we can make maximum use
517 : // of 8 bit simd instructions.
518 0 : for (i = 0; i < count; ++i) {
519 0 : const uint16_t p1 = s[-2 * p];
520 0 : const uint16_t p0 = s[-p];
521 0 : const uint16_t q0 = s[0 * p];
522 0 : const uint16_t q1 = s[1 * p];
523 : const int8_t mask =
524 0 : highbd_filter_mask2(*limit, *blimit, p1, p0, q0, q1, bd);
525 0 : highbd_filter4(mask, *thresh, s - 2 * p, s - 1 * p, s, s + 1 * p, bd);
526 0 : ++s;
527 : }
528 0 : }
529 :
530 0 : void aom_highbd_lpf_vertical_4_c(uint16_t *s, int32_t pitch, const uint8_t *blimit,
531 : const uint8_t *limit, const uint8_t *thresh,
532 : int32_t bd) {
533 : int32_t i;
534 0 : int32_t count = 4;
535 :
536 : // loop filter designed to work using chars so that we can make maximum use
537 : // of 8 bit simd instructions.
538 0 : for (i = 0; i < count; ++i) {
539 0 : const uint16_t p1 = s[-2], p0 = s[-1];
540 0 : const uint16_t q0 = s[0], q1 = s[1];
541 : const int8_t mask =
542 0 : highbd_filter_mask2(*limit, *blimit, p1, p0, q0, q1, bd);
543 0 : highbd_filter4(mask, *thresh, s - 2, s - 1, s, s + 1, bd);
544 0 : s += pitch;
545 : }
546 0 : }
547 :
548 0 : static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, int8_t flat,
549 : uint16_t *op3, uint16_t *op2, uint16_t *op1,
550 : uint16_t *op0, uint16_t *oq0, uint16_t *oq1,
551 : uint16_t *oq2, uint16_t *oq3, int32_t bd) {
552 0 : if (flat && mask) {
553 0 : const uint16_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;
554 0 : const uint16_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3;
555 :
556 : // 7-tap filter [1, 1, 1, 2, 1, 1, 1]
557 0 : *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0, 3);
558 0 : *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1, 3);
559 0 : *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2, 3);
560 0 : *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3, 3);
561 0 : *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3, 3);
562 0 : *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3, 3);
563 : }
564 : else
565 0 : highbd_filter4(mask, thresh, op1, op0, oq0, oq1, bd);
566 0 : }
567 :
568 0 : void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int32_t p, const uint8_t *blimit,
569 : const uint8_t *limit, const uint8_t *thresh,
570 : int32_t bd) {
571 : int32_t i;
572 0 : int32_t count = 4;
573 :
574 : // loop filter designed to work using chars so that we can make maximum use
575 : // of 8 bit simd instructions.
576 0 : for (i = 0; i < count; ++i) {
577 0 : const uint16_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
578 0 : const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
579 :
580 : const int8_t mask =
581 0 : highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd);
582 : const int8_t flat =
583 0 : highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, bd);
584 0 : highbd_filter8(mask, *thresh, flat, s - 4 * p, s - 3 * p, s - 2 * p,
585 0 : s - 1 * p, s, s + 1 * p, s + 2 * p, s + 3 * p, bd);
586 0 : ++s;
587 : }
588 0 : }
589 :
590 0 : void aom_highbd_lpf_vertical_8_c(uint16_t *s, int32_t pitch, const uint8_t *blimit,
591 : const uint8_t *limit, const uint8_t *thresh,
592 : int32_t bd) {
593 : int32_t i;
594 0 : int32_t count = 4;
595 :
596 0 : for (i = 0; i < count; ++i) {
597 0 : const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
598 0 : const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
599 : const int8_t mask =
600 0 : highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd);
601 : const int8_t flat =
602 0 : highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, bd);
603 0 : highbd_filter8(mask, *thresh, flat, s - 4, s - 3, s - 2, s - 1, s, s + 1,
604 : s + 2, s + 3, bd);
605 0 : s += pitch;
606 : }
607 0 : }
608 :
609 : //**********************************************************************************************************************//
610 :
611 : //static const SEG_LVL_FEATURES seg_lvl_lf_lut[MAX_MB_PLANE][2] = {
612 : // { SEG_LVL_ALT_LF_Y_V, SEG_LVL_ALT_LF_Y_H },
613 : // { SEG_LVL_ALT_LF_U, SEG_LVL_ALT_LF_U },
614 : // { SEG_LVL_ALT_LF_V, SEG_LVL_ALT_LF_V }
615 : //};
616 :
617 : const int32_t mode_lf_lut[] = {
618 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
619 : 1, 1, 0, 1, // INTER_MODES (GLOBALMV == 0)
620 : 1, 1, 1, 1, 1, 1, 0, 1 // INTER_COMPOUND_MODES (GLOBAL_GLOBALMV == 0)
621 : };
622 :
623 1228 : void update_sharpness(LoopFilterInfoN *lfi, int32_t sharpness_lvl) {
624 : int32_t lvl;
625 :
626 : // For each possible value for the loop filter fill out limits
627 79820 : for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
628 : // Set loop filter parameters that control sharpness.
629 78592 : int32_t block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
630 :
631 78592 : if (sharpness_lvl > 0) {
632 0 : if (block_inside_limit > (9 - sharpness_lvl))
633 0 : block_inside_limit = (9 - sharpness_lvl);
634 : }
635 :
636 78592 : if (block_inside_limit < 1) block_inside_limit = 1;
637 :
638 78592 : memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
639 78592 : memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
640 : SIMD_WIDTH);
641 : }
642 1228 : }
643 :
644 18336 : static int seg_feature_active(SegmentationParams *seg, int segment_id,
645 : SEG_LVL_FEATURES feature_id)
646 : {
647 18336 : return seg->segmentation_enabled && seg->feature_enabled[segment_id][feature_id];
648 : }
649 :
650 5231240 : uint8_t get_filter_level(
651 : FrameHeader* frm_hdr,
652 : const LoopFilterInfoN *lfi_n,
653 : const int32_t dir_idx, int32_t plane,
654 : int32_t *sb_delta_lf, uint8_t seg_id,
655 : PredictionMode pred_mode, MvReferenceFrame ref_frame_0)
656 : {
657 5231240 : const int32_t segment_id = seg_id; /* const int32_t segment_id = 0; might cause encoder problem */
658 : PredictionMode mode; // Added to address 4x4 problem
659 5231240 : mode = (pred_mode == INTRA_MODE_4x4) ? DC_PRED : pred_mode;
660 5231240 : if (frm_hdr->delta_lf_params.delta_lf_present) {
661 0 : printf("ERROR[AN]: delta_lf_present not supported yet\n");
662 0 : int32_t delta_lf = -1;
663 0 : if (frm_hdr->delta_lf_params.delta_lf_multi) {
664 0 : const int32_t delta_lf_idx = delta_lf_id_lut[plane][dir_idx];
665 0 : delta_lf = sb_delta_lf[delta_lf_idx];
666 : }
667 : else {
668 0 : delta_lf = sb_delta_lf[0];
669 : }
670 : int32_t base_level;
671 0 : if (plane == 0)
672 0 : base_level = frm_hdr->loop_filter_params.filter_level[dir_idx];
673 0 : else if (plane == 1)
674 0 : base_level = frm_hdr->loop_filter_params.filter_level_u;
675 : else
676 0 : base_level = frm_hdr->loop_filter_params.filter_level_v;
677 0 : int32_t lvl_seg = clamp(delta_lf + base_level, 0, MAX_LOOP_FILTER);
678 0 : assert(plane >= 0 && plane <= 2);
679 0 : const int32_t seg_lf_feature_id = seg_lvl_lf_lut[plane][dir_idx];
680 0 : if (seg_feature_active(&frm_hdr->segmentation_params, segment_id,
681 : seg_lf_feature_id))
682 : {
683 0 : const int32_t data = get_segdata(&frm_hdr->segmentation_params,
684 : segment_id, seg_lf_feature_id);
685 0 : lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER);
686 : }
687 :
688 0 : if (frm_hdr->loop_filter_params.mode_ref_delta_enabled) {
689 0 : const int32_t scale = 1 << (lvl_seg >> 5);
690 0 : lvl_seg += frm_hdr->loop_filter_params.ref_deltas[ref_frame_0] * scale;
691 0 : if (ref_frame_0 > INTRA_FRAME)
692 0 : lvl_seg += frm_hdr->loop_filter_params.
693 0 : mode_deltas[mode_lf_lut[mode]] * scale;
694 0 : lvl_seg = clamp(lvl_seg, 0, MAX_LOOP_FILTER);
695 : }
696 0 : return lvl_seg;
697 : }
698 : else {
699 : ASSERT(mode < MB_MODE_COUNT);
700 : return lfi_n->lvl[plane][segment_id][dir_idx][ref_frame_0]
701 5231240 : [mode_lf_lut[mode]];
702 : }
703 : }
704 :
705 90 : void eb_av1_loop_filter_init(PictureControlSet *pcs_ptr) {
706 : //assert(MB_MODE_COUNT == n_elements(mode_lf_lut));
707 90 : LoopFilterInfoN *lfi = &pcs_ptr->parent_pcs_ptr->lf_info;
708 90 : struct LoopFilter *lf = &pcs_ptr->parent_pcs_ptr->frm_hdr.loop_filter_params;
709 : int32_t lvl;
710 :
711 90 : lf->combine_vert_horz_lf = 1;
712 :
713 : // init limits for given sharpness
714 90 : update_sharpness(lfi, lf->sharpness_level);
715 :
716 : // init hev threshold const vectors
717 5850 : for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
718 5760 : memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
719 90 : }
720 :
721 : // Update the loop filter for the current frame.
722 : // This should be called before loop_filter_rows(),
723 : // eb_av1_loop_filter_frame() calls this function directly.
724 1138 : void eb_av1_loop_filter_frame_init(FrameHeader *frm_hdr,
725 : LoopFilterInfoN *lfi, int32_t plane_start, int32_t plane_end)
726 : {
727 : int32_t filt_lvl[MAX_MB_PLANE], filt_lvl_r[MAX_MB_PLANE];
728 : int32_t plane;
729 : int32_t seg_id;
730 : // n_shift is the multiplier for lf_deltas
731 : // the multiplier is 1 for when filter_lvl is between 0 and 31;
732 : // 2 when filter_lvl is between 32 and 63
733 :
734 1138 : struct LoopFilter *const lf = &frm_hdr->loop_filter_params;
735 : // const struct segmentation *const seg = &pcs_ptr->parent_pcs_ptr->seg;
736 :
737 : // update sharpness limits
738 1138 : update_sharpness(lfi, lf->sharpness_level);
739 :
740 1138 : filt_lvl[0] = frm_hdr->loop_filter_params.filter_level[0];
741 1138 : filt_lvl[1] = frm_hdr->loop_filter_params.filter_level_u;
742 1138 : filt_lvl[2] = frm_hdr->loop_filter_params.filter_level_v;
743 :
744 1138 : filt_lvl_r[0] = frm_hdr->loop_filter_params.filter_level[1];
745 1138 : filt_lvl_r[1] = frm_hdr->loop_filter_params.filter_level_u;
746 1138 : filt_lvl_r[2] = frm_hdr->loop_filter_params.filter_level_v;
747 :
748 2506 : for (plane = plane_start; plane < plane_end; plane++) {
749 1430 : if (plane == 0 && !filt_lvl[0] && !filt_lvl_r[0])
750 : break;
751 1368 : else if (plane == 1 && !filt_lvl[1])
752 92 : continue;
753 1276 : else if (plane == 2 && !filt_lvl[2])
754 130 : continue;
755 :
756 10314 : for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
757 27504 : for (int32_t dir = 0; dir < 2; ++dir) {
758 18336 : int32_t lvl_seg = (dir == 0) ? filt_lvl[plane] : filt_lvl_r[plane];
759 18336 : assert(plane >= 0 && plane <= 2);
760 18336 : const int32_t seg_lf_feature_id = seg_lvl_lf_lut[plane][dir];
761 18336 : if (seg_feature_active(&frm_hdr->segmentation_params, seg_id,
762 : seg_lf_feature_id))
763 : {
764 0 : const int32_t data = get_segdata(&frm_hdr->segmentation_params,
765 : seg_id, seg_lf_feature_id);
766 0 : lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER);
767 : }
768 :
769 18336 : if (!lf->mode_ref_delta_enabled) {
770 : // we could get rid of this if we assume that deltas are set to
771 : // zero when not in use; encoder always uses deltas
772 18336 : memset(lfi->lvl[plane][seg_id][dir], lvl_seg,
773 : sizeof(lfi->lvl[plane][seg_id][dir]));
774 : }
775 : else {
776 : int32_t ref, mode;
777 0 : const int32_t scale = 1 << (lvl_seg >> 5);
778 0 : const int32_t intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
779 0 : lfi->lvl[plane][seg_id][dir][INTRA_FRAME][0] =
780 0 : (uint8_t)clamp(intra_lvl, 0, MAX_LOOP_FILTER);
781 :
782 0 : for (ref = LAST_FRAME; ref < REF_FRAMES; ++ref) {
783 0 : for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
784 0 : const int32_t inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
785 0 : lf->mode_deltas[mode] * scale;
786 0 : lfi->lvl[plane][seg_id][dir][ref][mode] =
787 0 : (uint8_t)clamp(inter_lvl, 0, MAX_LOOP_FILTER);
788 : }
789 : }
790 : }
791 : }
792 : }
793 : }
794 1138 : }
795 : //***************************************************************************************************//
796 :
797 122755 : static INLINE int32_t scaled_buffer_offset(int32_t x_offset, int32_t y_offset, int32_t stride/*,
798 : const struct scale_factors *sf*/) {
799 122755 : const int32_t x =
800 : /*sf ? sf->scale_value_x(x_offset, sf) >> SCALE_EXTRA_BITS :*/ x_offset;
801 122755 : const int32_t y =
802 : /*sf ? sf->scale_value_y(y_offset, sf) >> SCALE_EXTRA_BITS :*/ y_offset;
803 122755 : return y * stride + x;
804 : }
805 122755 : static INLINE void setup_pred_plane(struct Buf2d *dst, BlockSize bsize,
806 : uint8_t *src, int32_t width, int32_t height,
807 : int32_t stride, int32_t mi_row, int32_t mi_col,
808 : /*const struct scale_factors *scale,*/
809 : int32_t subsampling_x, int32_t subsampling_y,
810 : int32_t is16Bit) {
811 : // Offset the buffer pointer
812 122755 : if (subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1))
813 0 : mi_row -= 1;
814 122755 : if (subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1))
815 0 : mi_col -= 1;
816 :
817 122755 : const int32_t x = (MI_SIZE * mi_col) >> subsampling_x;
818 122755 : const int32_t y = (MI_SIZE * mi_row) >> subsampling_y;
819 122755 : dst->buf = src + (scaled_buffer_offset(x, y, stride/*, scale*/) << is16Bit);
820 122756 : dst->buf0 = src;
821 122756 : dst->width = width;
822 122756 : dst->height = height;
823 122756 : dst->stride = stride;
824 122756 : }
825 122757 : void eb_av1_setup_dst_planes(struct MacroblockdPlane *planes, BlockSize bsize,
826 : //const Yv12BufferConfig *src,
827 : const EbPictureBufferDesc *src,
828 : int32_t mi_row, int32_t mi_col,
829 : const int32_t plane_start, const int32_t plane_end) {
830 : // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
831 : // the static analysis warnings.
832 : //for (int32_t i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
833 : // struct MacroblockdPlane *const pd = &planes[i];
834 : // const int32_t is_uv = i > 0;
835 : // setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
836 : // src->crop_heights[is_uv], src->strides[is_uv], mi_row,
837 : // mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
838 : //}
839 245512 : for (int32_t i = plane_start; i < AOMMIN(plane_end, 3); ++i) {
840 122759 : if (i == 0) {
841 56399 : struct MacroblockdPlane *const pd = &planes[0];
842 56399 : setup_pred_plane(&pd->dst, bsize, &src->buffer_y[(src->origin_x + src->origin_y*src->stride_y) << pd->is16Bit], src->width,
843 56399 : src->height, src->stride_y, mi_row,
844 : mi_col, /*NULL,*/ pd->subsampling_x, pd->subsampling_y, pd->is16Bit); //AMIR: Updated to point to the right location
845 : }
846 66360 : else if (i == 1) {
847 35040 : struct MacroblockdPlane *const pd = &planes[1];
848 35040 : setup_pred_plane(&pd->dst, bsize, &src->buffer_cb[((src->origin_x + src->origin_y*src->stride_cb) << pd->is16Bit) / 2], src->width / 2,
849 35040 : src->height / 2, src->stride_cb, mi_row,
850 : mi_col, /*NULL,*/ pd->subsampling_x, pd->subsampling_y, pd->is16Bit);
851 : }
852 31320 : else if (i == 2) {
853 31320 : struct MacroblockdPlane *const pd = &planes[2];
854 31320 : setup_pred_plane(&pd->dst, bsize, &src->buffer_cr[((src->origin_x + src->origin_y*src->stride_cr) << pd->is16Bit) / 2], src->width / 2,
855 31320 : src->height / 2, src->stride_cr, mi_row,
856 : mi_col,/* NULL,*/ pd->subsampling_x, pd->subsampling_y, pd->is16Bit);
857 : }
858 : }
859 122753 : }
860 :
861 : #define INTER_TX_SIZE_BUF_LEN 16
862 :
863 : //***************************************************************************************************//
864 :
865 4699940 : static TxSize get_transform_size(const MacroBlockD *const xd,
866 : const MbModeInfo *const mbmi,
867 : const EDGE_DIR edge_dir, const int32_t mi_row,
868 : const int32_t mi_col, const int32_t plane,
869 : const struct MacroblockdPlane *plane_ptr) {
870 4699940 : assert(mbmi != NULL);
871 : (void)mi_row;
872 : (void)mi_col;
873 : (void)xd;
874 : //if (xd->lossless[mbmi->segment_id]) return TX_4X4;
875 :
876 4700400 : TxSize tx_size = (plane == COMPONENT_LUMA)
877 3009750 : ? (is_inter_block_no_intrabc(mbmi->block_mi.ref_frame[0])
878 2467260 : ? tx_depth_to_tx_size[0][mbmi->block_mi.sb_type]
879 542551 : : tx_depth_to_tx_size[mbmi->tx_depth][mbmi->block_mi.sb_type]) // use max_tx_size
880 7709750 : : av1_get_max_uv_txsize(mbmi->block_mi.sb_type,
881 : plane_ptr->subsampling_x, plane_ptr->subsampling_y);
882 4700400 : assert(tx_size < TX_SIZES_ALL);
883 7710380 : if (((plane == COMPONENT_LUMA) &&
884 3009990 : is_inter_block_no_intrabc(mbmi->block_mi.ref_frame[0]) &&
885 2467300 : !mbmi->block_mi.skip)) { // if split tx is used
886 :
887 350152 : const TxSize mb_tx_size =
888 350152 : tx_depth_to_tx_size[mbmi->tx_depth][mbmi->block_mi.sb_type]; // tx_size
889 350152 : assert(mb_tx_size < TX_SIZES_ALL);
890 350152 : tx_size = mb_tx_size;
891 : }
892 : // since in case of chrominance or non-square transorm need to convert
893 : // transform size into transform size in particular direction.
894 : // for vertical edge, filter direction is horizontal, for horizontal
895 : // edge, filter direction is vertical.
896 4700390 : tx_size = (VERT_EDGE == edge_dir) ? txsize_horz_map[tx_size]
897 4700390 : : txsize_vert_map[tx_size];
898 4700390 : return tx_size;
899 : }
900 :
901 : // Return TxSize from get_transform_size(), so it is plane and direction
902 : // awared
903 3192120 : static TxSize set_lpf_parameters(
904 : AV1_DEBLOCKING_PARAMETERS *const params, const uint64_t mode_step,
905 : const PictureControlSet *const pcs_ptr, const MacroBlockD *const xd,
906 : const EDGE_DIR edge_dir, const uint32_t x, const uint32_t y,
907 : const int32_t plane, const struct MacroblockdPlane *const plane_ptr) {
908 : // reset to initial values
909 3192120 : params->filter_length = 0;
910 :
911 : // no deblocking is required
912 3192120 : const uint32_t width = plane_ptr->dst.width;
913 3192120 : const uint32_t height = plane_ptr->dst.height;
914 3192120 : if ((width <= x) || (height <= y)) {
915 : // just return the smallest transform unit size
916 745879 : return TX_4X4;
917 : }
918 :
919 2446240 : const uint32_t scale_horz = plane_ptr->subsampling_x;
920 2446240 : const uint32_t scale_vert = plane_ptr->subsampling_y;
921 : // for sub8x8 block, chroma prediction mode is obtained from the bottom/right
922 : // mi structure of the co-located 8x8 luma block. so for chroma plane, mi_row
923 : // and mi_col should map to the bottom/right mi structure, i.e, both mi_row
924 : // and mi_col should be odd number for chroma plane.
925 :
926 2446240 : const int32_t mi_row = scale_vert | ((y << scale_vert) >> MI_SIZE_LOG2);
927 2446240 : const int32_t mi_col = scale_horz | ((x << scale_horz) >> MI_SIZE_LOG2);
928 2446240 : uint32_t mi_stride = pcs_ptr->mi_stride;
929 2446240 : const int32_t offset = mi_row * mi_stride + mi_col;
930 2446240 : ModeInfo **mi = (pcs_ptr->mi_grid_base + offset);
931 : //MbModeInfo **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
932 2446240 : const MbModeInfo *mbmi = &mi[0]->mbmi;
933 :
934 : // If current mbmi is not correctly setup, return an invalid value to stop
935 : // filtering. One example is that if this tile is not coded, then its mbmi
936 : // it not set up.
937 2446240 : if (mbmi == NULL) return TX_INVALID;
938 :
939 : const TxSize ts =
940 2446240 : get_transform_size(xd, mbmi/*mi[0]*/, edge_dir, mi_row, mi_col, plane, plane_ptr);
941 2445860 : assert(ts < TX_SIZES_ALL);
942 :
943 : {
944 2447120 : const uint32_t coord = (VERT_EDGE == edge_dir) ? (x) : (y);
945 2447120 : const uint32_t transform_masks =
946 2447120 : edge_dir == VERT_EDGE ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1;
947 2447120 : const int32_t tu_edge = (coord & transform_masks) ? (0) : (1);
948 :
949 2447120 : if (!tu_edge) return ts;
950 :
951 : // prepare outer edge parameters. deblock the edge if it's an edge of a TU
952 : {
953 2445860 : const uint32_t curr_level =
954 2447120 : get_filter_level(&pcs_ptr->parent_pcs_ptr->frm_hdr,
955 2447120 : &pcs_ptr->parent_pcs_ptr->lf_info, edge_dir, plane,
956 2447120 : pcs_ptr->parent_pcs_ptr->curr_delta_lf, 0 /*segment_id*/,
957 2447120 : mbmi->block_mi.mode, mbmi->block_mi.ref_frame[0]);
958 :
959 4248760 : const int32_t curr_skipped = mbmi->block_mi.skip &&
960 1800970 : is_inter_block_no_intrabc(mbmi->block_mi.ref_frame[0]);
961 2447790 : uint32_t level = curr_level;
962 2447790 : if (coord) {
963 : {
964 : //const ModeInfo *const mi_prev = *(mi - mode_step);
965 2258660 : const ModeInfo *const mi_prevTemp = *(mi - mode_step);
966 2258660 : const MbModeInfo *const mi_prev = &mi_prevTemp[0].mbmi;
967 : //
968 2258660 : if (mi_prev == NULL) return TX_INVALID;
969 2258660 : const int32_t pv_row =
970 2258660 : (VERT_EDGE == edge_dir) ? (mi_row) : (mi_row - (1 << scale_vert));
971 2258660 : const int32_t pv_col =
972 2258660 : (VERT_EDGE == edge_dir) ? (mi_col - (1 << scale_horz)) : (mi_col);
973 2258660 : const TxSize pv_ts = get_transform_size(
974 : xd, mi_prev, edge_dir, pv_row, pv_col, plane, plane_ptr);
975 2261140 : const uint32_t pv_lvl =
976 2257780 : get_filter_level(&pcs_ptr->parent_pcs_ptr->frm_hdr,
977 2257780 : &pcs_ptr->parent_pcs_ptr->lf_info, edge_dir, plane,
978 2257780 : pcs_ptr->parent_pcs_ptr->curr_delta_lf, 0 /*segment_id*/,
979 2257780 : mi_prev->block_mi.mode, mi_prev->block_mi.ref_frame[0]);
980 :
981 3905680 : const int32_t pv_skip = mi_prev->block_mi.skip &&
982 1644610 : is_inter_block_no_intrabc(mi_prev->block_mi.ref_frame[0]);
983 :
984 : const BlockSize bsize =
985 2261070 : get_plane_block_size(mbmi->block_mi.sb_type, plane_ptr->subsampling_x, plane_ptr->subsampling_y);
986 2257310 : assert(bsize < BlockSizeS_ALL);
987 2259600 : const int32_t prediction_masks = edge_dir == VERT_EDGE
988 1166790 : ? block_size_wide[bsize] - 1
989 2259600 : : block_size_high[bsize] - 1;
990 2259600 : const int32_t pu_edge = !(coord & prediction_masks);
991 : // if the current and the previous blocks are skipped,
992 : // deblock the edge if the edge belongs to a PU's edge only.
993 2259600 : if ((curr_level || pv_lvl) &&
994 1587780 : (!pv_skip || !curr_skipped || pu_edge)) {
995 2259200 : const TxSize min_ts = AOMMIN(ts, pv_ts);
996 2259200 : if (TX_4X4 >= min_ts)
997 374528 : params->filter_length = 4;
998 1884670 : else if (TX_8X8 == min_ts) {
999 456621 : if (plane != 0)
1000 173027 : params->filter_length = 6;
1001 : else
1002 283594 : params->filter_length = 8;
1003 : }
1004 : else {
1005 1428050 : params->filter_length = 14;
1006 : // No wide filtering for chroma plane
1007 1428050 : if (plane != 0)
1008 426416 : params->filter_length = 6;
1009 : }
1010 :
1011 : // update the level if the current block is skipped,
1012 : // but the previous one is not
1013 2259200 : level = (curr_level) ? (curr_level) : (pv_lvl);
1014 : }
1015 : }
1016 : }
1017 : // prepare common parameters
1018 2448730 : if (params->filter_length) {
1019 2257250 : const LoopFilterThresh *const limits = pcs_ptr->parent_pcs_ptr->lf_info.lfthr + level;
1020 2257250 : params->lim = limits->lim;
1021 2257250 : params->mblim = limits->mblim;
1022 2257250 : params->hev_thr = limits->hev_thr;
1023 : }
1024 : }
1025 : }
1026 :
1027 2448730 : return ts;
1028 : }
1029 :
1030 61377 : void eb_av1_filter_block_plane_vert(
1031 : const PictureControlSet *const pcs_ptr,
1032 : const MacroBlockD *const xd, const int32_t plane,
1033 : const MacroblockdPlane *const plane_ptr,
1034 : const uint32_t mi_row, const uint32_t mi_col) {
1035 61377 : SequenceControlSet *scs_ptr = (SequenceControlSet*)pcs_ptr->parent_pcs_ptr->sequence_control_set_wrapper_ptr->object_ptr;
1036 61377 : EbBool is16bit = scs_ptr->static_config.encoder_bit_depth > 8;
1037 61377 : const int32_t row_step = MI_SIZE >> MI_SIZE_LOG2;
1038 61377 : const uint32_t scale_horz = plane_ptr->subsampling_x;
1039 61377 : const uint32_t scale_vert = plane_ptr->subsampling_y;
1040 61377 : uint8_t *const dst_ptr = plane_ptr->dst.buf;
1041 61377 : const int32_t dst_stride = plane_ptr->dst.stride;
1042 61377 : const int32_t y_range = scs_ptr->seq_header.sb_size == BLOCK_128X128 ? (MAX_MIB_SIZE >> scale_vert) : (SB64_MIB_SIZE >> scale_vert);
1043 61377 : const int32_t x_range = scs_ptr->seq_header.sb_size == BLOCK_128X128 ? (MAX_MIB_SIZE >> scale_horz) : (SB64_MIB_SIZE >> scale_horz);
1044 777348 : for (int32_t y = 0; y < y_range; y += row_step) {
1045 716566 : uint8_t *p = dst_ptr + ((y * MI_SIZE * dst_stride) << plane_ptr->is16Bit);
1046 2534320 : for (int32_t x = 0; x < x_range;) {
1047 : // inner loop always filter vertical edges in a MI block. If MI size
1048 : // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
1049 : // If 4x4 trasnform is used, it will then filter the internal edge
1050 : // aligned with a 4x4 block
1051 1818340 : const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
1052 1818340 : const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
1053 : uint32_t advance_units;
1054 : TxSize tx_size;
1055 : AV1_DEBLOCKING_PARAMETERS params;
1056 1818340 : memset(¶ms, 0, sizeof(params));
1057 :
1058 : tx_size =
1059 1818340 : set_lpf_parameters(¶ms, ((uint64_t)1 << scale_horz), pcs_ptr, xd,
1060 : VERT_EDGE, curr_x, curr_y, plane, plane_ptr);
1061 1817650 : if (tx_size == TX_INVALID) {
1062 0 : params.filter_length = 0;
1063 0 : tx_size = TX_4X4;
1064 : }
1065 :
1066 1817650 : switch (params.filter_length) {
1067 : // apply 4-tap filtering
1068 176995 : case 4:
1069 176995 : if (is16bit)
1070 0 : aom_highbd_lpf_vertical_4(
1071 : (uint16_t*)(p),//CONVERT_TO_SHORTPTR(p),
1072 : dst_stride,
1073 : params.mblim,
1074 : params.lim,
1075 : params.hev_thr,
1076 0 : scs_ptr->static_config.encoder_bit_depth);
1077 : else
1078 176995 : aom_lpf_vertical_4(
1079 : p,
1080 : dst_stride,
1081 : params.mblim,
1082 : params.lim,
1083 : params.hev_thr);
1084 177015 : break;
1085 316184 : case 6: // apply 6-tap filter for chroma plane only
1086 316184 : assert(plane != 0);
1087 316184 : if (is16bit)
1088 0 : aom_highbd_lpf_vertical_6(
1089 : (uint16_t*)(p),//CONVERT_TO_SHORTPTR(p),
1090 : dst_stride,
1091 : params.mblim,
1092 : params.lim,
1093 : params.hev_thr,
1094 0 : scs_ptr->static_config.encoder_bit_depth);
1095 : else
1096 316184 : aom_lpf_vertical_6(
1097 : p,
1098 : dst_stride,
1099 : params.mblim,
1100 : params.lim,
1101 : params.hev_thr);
1102 316209 : break;
1103 : // apply 8-tap filtering
1104 145391 : case 8:
1105 145391 : if (is16bit)
1106 0 : aom_highbd_lpf_vertical_8(
1107 : (uint16_t*)(p),//CONVERT_TO_SHORTPTR(p),
1108 : dst_stride,
1109 : params.mblim,
1110 : params.lim,
1111 : params.hev_thr,
1112 0 : scs_ptr->static_config.encoder_bit_depth);
1113 : else
1114 145391 : aom_lpf_vertical_8(
1115 : p,
1116 : dst_stride,
1117 : params.mblim,
1118 : params.lim,
1119 : params.hev_thr);
1120 145407 : break;
1121 : // apply 14-tap filtering
1122 529157 : case 14:
1123 529157 : if (is16bit)
1124 0 : aom_highbd_lpf_vertical_14(
1125 : (uint16_t*)(p),//CONVERT_TO_SHORTPTR(p),
1126 : dst_stride,
1127 : params.mblim,
1128 : params.lim,
1129 : params.hev_thr,
1130 0 : scs_ptr->static_config.encoder_bit_depth);
1131 : else
1132 529157 : aom_lpf_vertical_14(
1133 : p,
1134 : dst_stride,
1135 : params.mblim,
1136 : params.lim,
1137 : params.hev_thr);
1138 529195 : break;
1139 : // no filtering
1140 649923 : default: break;
1141 : }
1142 : // advance the destination pointer
1143 1817750 : assert(tx_size < TX_SIZES_ALL);
1144 1817750 : advance_units = tx_size_wide_unit[tx_size];
1145 1817750 : x += advance_units;
1146 1817750 : p += ((advance_units * MI_SIZE) << plane_ptr->is16Bit);
1147 : }
1148 : }
1149 60782 : }
1150 :
1151 61380 : void eb_av1_filter_block_plane_horz(
1152 : const PictureControlSet *const pcs_ptr,
1153 : const MacroBlockD *const xd, const int32_t plane,
1154 : const MacroblockdPlane *const plane_ptr,
1155 : const uint32_t mi_row, const uint32_t mi_col) {
1156 61380 : SequenceControlSet *scs_ptr = (SequenceControlSet*)pcs_ptr->parent_pcs_ptr->sequence_control_set_wrapper_ptr->object_ptr;
1157 61380 : EbBool is16bit = scs_ptr->static_config.encoder_bit_depth > 8;
1158 61380 : const int32_t col_step = MI_SIZE >> MI_SIZE_LOG2;
1159 61380 : const uint32_t scale_horz = plane_ptr->subsampling_x;
1160 61380 : const uint32_t scale_vert = plane_ptr->subsampling_y;
1161 61380 : uint8_t *const dst_ptr = plane_ptr->dst.buf;
1162 61380 : const int32_t dst_stride = plane_ptr->dst.stride;
1163 61380 : const int32_t y_range = scs_ptr->seq_header.sb_size == BLOCK_128X128 ? (MAX_MIB_SIZE >> scale_vert) : (SB64_MIB_SIZE >> scale_vert);
1164 61380 : const int32_t x_range = scs_ptr->seq_header.sb_size == BLOCK_128X128 ? (MAX_MIB_SIZE >> scale_horz) : (SB64_MIB_SIZE >> scale_horz);
1165 61380 : uint32_t mi_stride = pcs_ptr->mi_stride;
1166 777460 : for (int32_t x = 0; x < x_range; x += col_step) {
1167 716545 : uint8_t *p = dst_ptr + ((x * MI_SIZE) << plane_ptr->is16Bit);
1168 2091240 : for (int32_t y = 0; y < y_range;) {
1169 : // inner loop always filter vertical edges in a MI block. If MI size
1170 : // is 8x8, it will first filter the vertical edge aligned with a 8x8
1171 : // block. If 4x4 trasnform is used, it will then filter the internal
1172 : // edge aligned with a 4x4 block
1173 1375160 : const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
1174 1375160 : const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
1175 : uint32_t advance_units;
1176 : TxSize tx_size;
1177 : AV1_DEBLOCKING_PARAMETERS params;
1178 1375160 : memset(¶ms, 0, sizeof(params));
1179 :
1180 : tx_size =
1181 1375160 : set_lpf_parameters(
1182 : ¶ms,
1183 : //(pcs_ptr->parent_pcs_ptr->av1_cm->mi_stride << scale_vert),
1184 1375160 : (mi_stride << scale_vert),
1185 : pcs_ptr,
1186 : xd,
1187 : HORZ_EDGE,
1188 : curr_x,
1189 : curr_y,
1190 : plane,
1191 : plane_ptr);
1192 1374680 : if (tx_size == TX_INVALID) {
1193 0 : params.filter_length = 0;
1194 0 : tx_size = TX_4X4;
1195 : }
1196 :
1197 1374680 : switch (params.filter_length) {
1198 : // apply 4-tap filtering
1199 197647 : case 4:
1200 197647 : if (is16bit)
1201 0 : aom_highbd_lpf_horizontal_4(
1202 : (uint16_t*)(p),//CONVERT_TO_SHORTPTR(p),
1203 : dst_stride,
1204 : params.mblim,
1205 : params.lim,
1206 : params.hev_thr,
1207 0 : scs_ptr->static_config.encoder_bit_depth);
1208 : else
1209 197647 : aom_lpf_horizontal_4(
1210 : p,
1211 : dst_stride,
1212 : params.mblim,
1213 : params.lim,
1214 : params.hev_thr);
1215 197682 : break;
1216 : // apply 6-tap filtering
1217 283243 : case 6:
1218 283243 : assert(plane != 0);
1219 283243 : if (is16bit)
1220 0 : aom_highbd_lpf_horizontal_6(
1221 : (uint16_t*)(p),//CONVERT_TO_SHORTPTR(p),
1222 : dst_stride,
1223 : params.mblim,
1224 : params.lim,
1225 : params.hev_thr,
1226 0 : scs_ptr->static_config.encoder_bit_depth);
1227 : else
1228 283243 : aom_lpf_horizontal_6(
1229 : p,
1230 : dst_stride,
1231 : params.mblim,
1232 : params.lim,
1233 : params.hev_thr);
1234 283244 : break;
1235 : // apply 8-tap filtering
1236 138627 : case 8:
1237 138627 : if (is16bit)
1238 0 : aom_highbd_lpf_horizontal_8(
1239 : (uint16_t*)(p),//CONVERT_TO_SHORTPTR(p),
1240 : dst_stride,
1241 : params.mblim,
1242 : params.lim,
1243 : params.hev_thr,
1244 0 : scs_ptr->static_config.encoder_bit_depth);
1245 : else
1246 138627 : aom_lpf_horizontal_8(
1247 : p,
1248 : dst_stride,
1249 : params.mblim,
1250 : params.lim,
1251 : params.hev_thr);
1252 138632 : break;
1253 : // apply 14-tap filtering
1254 474165 : case 14:
1255 474165 : if (is16bit)
1256 0 : aom_highbd_lpf_horizontal_14(
1257 : (uint16_t*)(p),//CONVERT_TO_SHORTPTR(p),
1258 : dst_stride,
1259 : params.mblim,
1260 : params.lim,
1261 : params.hev_thr,
1262 0 : scs_ptr->static_config.encoder_bit_depth);
1263 : else
1264 474165 : aom_lpf_horizontal_14(
1265 : p,
1266 : dst_stride,
1267 : params.mblim,
1268 : params.lim,
1269 : params.hev_thr);
1270 474141 : break;
1271 : // no filtering
1272 280997 : default: break;
1273 : }
1274 :
1275 : // advance the destination pointer
1276 1374700 : assert(tx_size < TX_SIZES_ALL);
1277 1374700 : advance_units = tx_size_high_unit[tx_size];
1278 1374700 : y += advance_units;
1279 1374700 : p += ((advance_units * dst_stride * MI_SIZE) << plane_ptr->is16Bit);
1280 : }
1281 : }
1282 60915 : }
1283 :
1284 : // New function to filter each sb (64x64)
1285 64799 : void loop_filter_sb(
1286 : EbPictureBufferDesc *frame_buffer,//reconpicture,
1287 : //Yv12BufferConfig *frame_buffer,
1288 : PictureControlSet *pcs_ptr,
1289 : MacroBlockD *xd, int32_t mi_row, int32_t mi_col,
1290 : int32_t plane_start, int32_t plane_end,
1291 : uint8_t LastCol) {
1292 64799 : FrameHeader *frm_hdr = &pcs_ptr->parent_pcs_ptr->frm_hdr;
1293 : struct MacroblockdPlane pd[3];
1294 : int32_t plane;
1295 :
1296 64799 : pd[0].subsampling_x = 0;
1297 64799 : pd[0].subsampling_y = 0;
1298 64799 : pd[0].plane_type = PLANE_TYPE_Y;
1299 64799 : pd[0].is16Bit = frame_buffer->bit_depth > 8;
1300 64799 : pd[1].subsampling_x = 1;
1301 64799 : pd[1].subsampling_y = 1;
1302 64799 : pd[1].plane_type = PLANE_TYPE_UV;
1303 64799 : pd[1].is16Bit = frame_buffer->bit_depth > 8;
1304 64799 : pd[2].subsampling_x = 1;
1305 64799 : pd[2].subsampling_y = 1;
1306 64799 : pd[2].plane_type = PLANE_TYPE_UV;
1307 64799 : pd[2].is16Bit = frame_buffer->bit_depth > 8;
1308 :
1309 136438 : for (plane = plane_start; plane < plane_end; plane++) {
1310 75359 : if (plane == 0 && !(frm_hdr->loop_filter_params.filter_level[0]) && !(frm_hdr->loop_filter_params.filter_level[1]))
1311 : break;
1312 71639 : else if (plane == 1 && !(frm_hdr->loop_filter_params.filter_level_u))
1313 4560 : continue;
1314 67079 : else if (plane == 2 && !(frm_hdr->loop_filter_params.filter_level_v))
1315 5700 : continue;
1316 :
1317 61379 : if (frm_hdr->loop_filter_params.combine_vert_horz_lf) {
1318 : // filter all vertical and horizontal edges in every 64x64 super block
1319 : // filter vertical edges
1320 61379 : eb_av1_setup_dst_planes(pd, pcs_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.sb_size, frame_buffer, mi_row,
1321 : mi_col, plane, plane + 1);
1322 61377 : eb_av1_filter_block_plane_vert(pcs_ptr, xd, plane, &pd[plane], mi_row,
1323 : mi_col);
1324 : // filter horizontal edges
1325 61380 : int32_t max_mib_size = pcs_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.sb_size == BLOCK_128X128 ? MAX_MIB_SIZE : SB64_MIB_SIZE;
1326 :
1327 61380 : if (mi_col - max_mib_size >= 0) {
1328 55242 : eb_av1_setup_dst_planes(pd, pcs_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.sb_size, frame_buffer,
1329 : mi_row, mi_col - max_mib_size, plane,
1330 : plane + 1);
1331 55242 : eb_av1_filter_block_plane_horz(pcs_ptr, xd, plane, &pd[plane], mi_row,
1332 55242 : mi_col - max_mib_size);
1333 : }
1334 : // Filter the horizontal edges of the last lcu in each row
1335 61380 : if (LastCol) {
1336 6138 : eb_av1_setup_dst_planes(pd, pcs_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.sb_size, frame_buffer,
1337 : mi_row, mi_col, plane,
1338 : plane + 1);
1339 6138 : eb_av1_filter_block_plane_horz(pcs_ptr, xd, plane, &pd[plane], mi_row,
1340 : mi_col);
1341 : }
1342 : }
1343 : else {
1344 : // filter all vertical edges in every 64x64 super block
1345 0 : eb_av1_setup_dst_planes(pd, pcs_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.sb_size, frame_buffer, mi_row,
1346 : mi_col, plane, plane + 1);
1347 :
1348 0 : eb_av1_filter_block_plane_vert(pcs_ptr, xd, plane, &pd[plane], mi_row,
1349 : mi_col);
1350 :
1351 : // filter all horizontal edges in every 64x64 super block
1352 0 : eb_av1_setup_dst_planes(pd, pcs_ptr->parent_pcs_ptr->sequence_control_set_ptr->seq_header.sb_size, frame_buffer, mi_row,
1353 : mi_col, plane, plane + 1);
1354 0 : eb_av1_filter_block_plane_horz(pcs_ptr, xd, plane, &pd[plane], mi_row,
1355 : mi_col);
1356 : }
1357 : }
1358 64799 : }
1359 :
1360 1050 : void eb_av1_loop_filter_frame(
1361 : EbPictureBufferDesc *frame_buffer,
1362 : PictureControlSet *picture_control_set_ptr,
1363 : int32_t plane_start, int32_t plane_end) {
1364 1050 : SequenceControlSet *scs_ptr = (SequenceControlSet*)picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_wrapper_ptr->object_ptr;
1365 : //LargestCodingUnit *sb_ptr;
1366 : //uint16_t sb_index;
1367 1050 : uint8_t sb_size_Log2 = (uint8_t)Log2f(scs_ptr->sb_size_pix);
1368 : uint32_t x_lcu_index;
1369 : uint32_t y_lcu_index;
1370 : uint32_t sb_origin_x;
1371 : uint32_t sb_origin_y;
1372 : EbBool endOfRowFlag;
1373 :
1374 1050 : uint32_t picture_width_in_sb = (scs_ptr->seq_header.max_frame_width + scs_ptr->sb_size_pix - 1) / scs_ptr->sb_size_pix;
1375 1050 : uint32_t picture_height_in_sb = (scs_ptr->seq_header.max_frame_height + scs_ptr->sb_size_pix - 1) / scs_ptr->sb_size_pix;
1376 :
1377 1050 : eb_av1_loop_filter_frame_init(&picture_control_set_ptr->parent_pcs_ptr->frm_hdr,
1378 1050 : &picture_control_set_ptr->parent_pcs_ptr->lf_info, plane_start, plane_end);
1379 :
1380 7350 : for (y_lcu_index = 0; y_lcu_index < picture_height_in_sb; ++y_lcu_index) {
1381 69299 : for (x_lcu_index = 0; x_lcu_index < picture_width_in_sb; ++x_lcu_index) {
1382 : //sb_index = (uint16_t)(y_lcu_index * picture_width_in_sb + x_lcu_index);
1383 : //sb_ptr = picture_control_set_ptr->sb_ptr_array[sb_index];
1384 62999 : sb_origin_x = x_lcu_index << sb_size_Log2;
1385 62999 : sb_origin_y = y_lcu_index << sb_size_Log2;
1386 62999 : endOfRowFlag = (x_lcu_index == picture_width_in_sb - 1) ? EB_TRUE : EB_FALSE;
1387 :
1388 62999 : loop_filter_sb(
1389 : frame_buffer,
1390 : picture_control_set_ptr,
1391 : NULL,
1392 62999 : sb_origin_y >> 2,
1393 62999 : sb_origin_x >> 2,
1394 : plane_start,
1395 : plane_end,
1396 : endOfRowFlag);
1397 : }
1398 : }
1399 1050 : }
1400 : extern int16_t eb_av1_ac_quant_Q3(int32_t qindex, int32_t delta, AomBitDepth bit_depth);
1401 :
1402 1170 : void EbCopyBuffer(
1403 : EbPictureBufferDesc *srcBuffer,
1404 : EbPictureBufferDesc *dstBuffer,
1405 : PictureControlSet *pcs_ptr,
1406 : uint8_t plane) {
1407 1170 : EbBool is16bit = (EbBool)(pcs_ptr->parent_pcs_ptr->sequence_control_set_ptr->static_config.encoder_bit_depth > EB_8BIT);
1408 1170 : dstBuffer->origin_x = srcBuffer->origin_x;
1409 1170 : dstBuffer->origin_y = srcBuffer->origin_y;
1410 1170 : dstBuffer->width = srcBuffer->width;
1411 1170 : dstBuffer->height = srcBuffer->height;
1412 1170 : dstBuffer->max_width = srcBuffer->max_width;
1413 1170 : dstBuffer->max_height = srcBuffer->max_height;
1414 1170 : dstBuffer->bit_depth = srcBuffer->bit_depth;
1415 1170 : dstBuffer->luma_size = srcBuffer->luma_size;
1416 1170 : dstBuffer->chroma_size = srcBuffer->chroma_size;
1417 1170 : dstBuffer->packedFlag = srcBuffer->packedFlag;
1418 :
1419 1170 : uint32_t lumaBufferOffset = (srcBuffer->origin_x + srcBuffer->origin_y*srcBuffer->stride_y) << is16bit;
1420 1170 : uint16_t luma_width = (uint16_t)(srcBuffer->width - pcs_ptr->parent_pcs_ptr->sequence_control_set_ptr->pad_right) << is16bit;
1421 1170 : uint16_t luma_height = (uint16_t)(srcBuffer->height - pcs_ptr->parent_pcs_ptr->sequence_control_set_ptr->pad_bottom);
1422 1170 : uint16_t chroma_width = (luma_width >> 1);
1423 1170 : if (plane == 0) {
1424 502 : uint16_t stride_y = srcBuffer->stride_y << is16bit;
1425 :
1426 502 : dstBuffer->stride_y = srcBuffer->stride_y;
1427 502 : dstBuffer->stride_bit_inc_y = srcBuffer->stride_bit_inc_y;
1428 :
1429 181222 : for (int32_t inputRowIndex = 0; inputRowIndex < luma_height; inputRowIndex++) {
1430 180720 : EB_MEMCPY((dstBuffer->buffer_y + lumaBufferOffset + stride_y * inputRowIndex),
1431 : (srcBuffer->buffer_y + lumaBufferOffset + stride_y * inputRowIndex),
1432 : luma_width);
1433 : }
1434 : }
1435 668 : else if (plane == 1) {
1436 340 : uint16_t stride_cb = srcBuffer->stride_cb << is16bit;
1437 340 : dstBuffer->stride_cb = srcBuffer->stride_cb;
1438 340 : dstBuffer->stride_bit_inc_cb = srcBuffer->stride_bit_inc_cb;
1439 :
1440 340 : uint32_t chromaBufferOffset = (srcBuffer->origin_x / 2 + srcBuffer->origin_y / 2 * srcBuffer->stride_cb) << is16bit;
1441 :
1442 61540 : for (int32_t inputRowIndex = 0; inputRowIndex < luma_height >> 1; inputRowIndex++) {
1443 61200 : EB_MEMCPY((dstBuffer->buffer_cb + chromaBufferOffset + stride_cb * inputRowIndex),
1444 : (srcBuffer->buffer_cb + chromaBufferOffset + stride_cb * inputRowIndex),
1445 : chroma_width);
1446 : }
1447 : }
1448 328 : else if (plane == 2) {
1449 328 : uint16_t stride_cr = srcBuffer->stride_cr << is16bit;
1450 :
1451 328 : dstBuffer->stride_cr = srcBuffer->stride_cr;
1452 328 : dstBuffer->stride_bit_inc_cr = srcBuffer->stride_bit_inc_cr;
1453 :
1454 328 : uint32_t chromaBufferOffset = (srcBuffer->origin_x / 2 + srcBuffer->origin_y / 2 * srcBuffer->stride_cr) << is16bit;
1455 :
1456 59368 : for (int32_t inputRowIndex = 0; inputRowIndex < luma_height >> 1; inputRowIndex++) {
1457 59040 : EB_MEMCPY((dstBuffer->buffer_cr + chromaBufferOffset + stride_cr * inputRowIndex),
1458 : (srcBuffer->buffer_cr + chromaBufferOffset + stride_cr * inputRowIndex),
1459 : chroma_width);
1460 : }
1461 : }
1462 1170 : }
1463 :
1464 : //int32_t av1_get_max_filter_level(const Av1Comp *cpi) {
1465 : // if (cpi->oxcf.pass == 2) {
1466 : // return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
1467 : // : MAX_LOOP_FILTER;
1468 : // }
1469 : // else {
1470 : // return MAX_LOOP_FILTER;
1471 : // }
1472 : //}
1473 :
1474 990 : uint64_t PictureSseCalculations(
1475 : PictureControlSet *picture_control_set_ptr,
1476 : EbPictureBufferDesc *recon_ptr,
1477 : int32_t plane)
1478 :
1479 : {
1480 990 : SequenceControlSet *sequence_control_set_ptr = picture_control_set_ptr->parent_pcs_ptr->sequence_control_set_ptr;
1481 990 : EbBool is16bit = (sequence_control_set_ptr->static_config.encoder_bit_depth > EB_8BIT);
1482 :
1483 990 : if (!is16bit) {
1484 990 : EbPictureBufferDesc *input_picture_ptr = (EbPictureBufferDesc*)picture_control_set_ptr->parent_pcs_ptr->enhanced_picture_ptr;
1485 :
1486 : uint32_t columnIndex;
1487 990 : uint32_t row_index = 0;
1488 990 : uint64_t residualDistortion = 0;
1489 : EbByte inputBuffer;
1490 : EbByte reconCoeffBuffer;
1491 990 : if (plane == 0) {
1492 442 : reconCoeffBuffer = &((recon_ptr->buffer_y)[recon_ptr->origin_x + recon_ptr->origin_y * recon_ptr->stride_y]);
1493 442 : inputBuffer = &((input_picture_ptr->buffer_y)[input_picture_ptr->origin_x + input_picture_ptr->origin_y * input_picture_ptr->stride_y]);
1494 :
1495 442 : residualDistortion = 0;
1496 :
1497 159562 : while (row_index < sequence_control_set_ptr->seq_header.max_frame_height) {
1498 159120 : columnIndex = 0;
1499 101915000 : while (columnIndex < sequence_control_set_ptr->seq_header.max_frame_width) {
1500 101755000 : residualDistortion += (int64_t)SQR((int64_t)(inputBuffer[columnIndex]) - (reconCoeffBuffer[columnIndex]));
1501 101755000 : ++columnIndex;
1502 : }
1503 159120 : inputBuffer += input_picture_ptr->stride_y;
1504 159120 : reconCoeffBuffer += recon_ptr->stride_y;
1505 159120 : ++row_index;
1506 : }
1507 :
1508 442 : return residualDistortion;
1509 : }
1510 :
1511 548 : else if (plane == 1) {
1512 280 : reconCoeffBuffer = &((recon_ptr->buffer_cb)[recon_ptr->origin_x / 2 + recon_ptr->origin_y / 2 * recon_ptr->stride_cb]);
1513 280 : inputBuffer = &((input_picture_ptr->buffer_cb)[input_picture_ptr->origin_x / 2 + input_picture_ptr->origin_y / 2 * input_picture_ptr->stride_cb]);
1514 :
1515 280 : residualDistortion = 0;
1516 280 : row_index = 0;
1517 50680 : while (row_index < sequence_control_set_ptr->chroma_height) {
1518 50400 : columnIndex = 0;
1519 16178400 : while (columnIndex < sequence_control_set_ptr->chroma_width) {
1520 16128000 : residualDistortion += (int64_t)SQR((int64_t)(inputBuffer[columnIndex]) - (reconCoeffBuffer[columnIndex]));
1521 16128000 : ++columnIndex;
1522 : }
1523 :
1524 50400 : inputBuffer += input_picture_ptr->stride_cb;
1525 50400 : reconCoeffBuffer += recon_ptr->stride_cb;
1526 50400 : ++row_index;
1527 : }
1528 :
1529 280 : return residualDistortion;
1530 : }
1531 268 : else if (plane == 2) {
1532 268 : reconCoeffBuffer = &((recon_ptr->buffer_cr)[recon_ptr->origin_x / 2 + recon_ptr->origin_y / 2 * recon_ptr->stride_cr]);
1533 268 : inputBuffer = &((input_picture_ptr->buffer_cr)[input_picture_ptr->origin_x / 2 + input_picture_ptr->origin_y / 2 * input_picture_ptr->stride_cr]);
1534 268 : residualDistortion = 0;
1535 268 : row_index = 0;
1536 :
1537 48508 : while (row_index < sequence_control_set_ptr->chroma_height) {
1538 48240 : columnIndex = 0;
1539 15485000 : while (columnIndex < sequence_control_set_ptr->chroma_width) {
1540 15436800 : residualDistortion += (int64_t)SQR((int64_t)(inputBuffer[columnIndex]) - (reconCoeffBuffer[columnIndex]));
1541 15436800 : ++columnIndex;
1542 : }
1543 :
1544 48240 : inputBuffer += input_picture_ptr->stride_cr;
1545 48240 : reconCoeffBuffer += recon_ptr->stride_cr;
1546 48240 : ++row_index;
1547 : }
1548 :
1549 268 : return residualDistortion;
1550 : }
1551 0 : return 0;
1552 : }
1553 : else {
1554 0 : EbPictureBufferDesc *input_picture_ptr = (EbPictureBufferDesc*)picture_control_set_ptr->input_frame16bit;
1555 :
1556 : uint32_t columnIndex;
1557 0 : uint32_t row_index = 0;
1558 0 : uint64_t residualDistortion = 0;
1559 : uint16_t* inputBuffer;
1560 : uint16_t* reconCoeffBuffer;
1561 0 : if (plane == 0) {
1562 0 : reconCoeffBuffer = (uint16_t*)&((recon_ptr->buffer_y)[(recon_ptr->origin_x + recon_ptr->origin_y * recon_ptr->stride_y) << is16bit]);
1563 0 : inputBuffer = (uint16_t*)&((input_picture_ptr->buffer_y)[(input_picture_ptr->origin_x + input_picture_ptr->origin_y * input_picture_ptr->stride_y) << is16bit]);
1564 :
1565 0 : residualDistortion = 0;
1566 :
1567 0 : while (row_index < sequence_control_set_ptr->seq_header.max_frame_height) {
1568 0 : columnIndex = 0;
1569 0 : while (columnIndex < sequence_control_set_ptr->seq_header.max_frame_width) {
1570 0 : residualDistortion += (int64_t)SQR(((int64_t)inputBuffer[columnIndex]) - (int64_t)(reconCoeffBuffer[columnIndex]));
1571 0 : ++columnIndex;
1572 : }
1573 :
1574 0 : inputBuffer += input_picture_ptr->stride_y;
1575 0 : reconCoeffBuffer += recon_ptr->stride_y;
1576 0 : ++row_index;
1577 : }
1578 :
1579 0 : return residualDistortion;
1580 : }
1581 :
1582 0 : else if (plane == 1) {
1583 0 : reconCoeffBuffer = (uint16_t*)&((recon_ptr->buffer_cb)[(recon_ptr->origin_x / 2 + recon_ptr->origin_y / 2 * recon_ptr->stride_cb) << is16bit]);
1584 0 : inputBuffer = (uint16_t*)&((input_picture_ptr->buffer_cb)[(input_picture_ptr->origin_x / 2 + input_picture_ptr->origin_y / 2 * input_picture_ptr->stride_cb) << is16bit]);
1585 :
1586 0 : residualDistortion = 0;
1587 0 : row_index = 0;
1588 0 : while (row_index < sequence_control_set_ptr->chroma_height) {
1589 0 : columnIndex = 0;
1590 0 : while (columnIndex < sequence_control_set_ptr->chroma_width) {
1591 0 : residualDistortion += (int64_t)SQR(((int64_t)inputBuffer[columnIndex]) - (int64_t)(reconCoeffBuffer[columnIndex]));
1592 0 : ++columnIndex;
1593 : }
1594 :
1595 0 : inputBuffer += input_picture_ptr->stride_cb;
1596 0 : reconCoeffBuffer += recon_ptr->stride_cb;
1597 0 : ++row_index;
1598 : }
1599 :
1600 0 : return residualDistortion;
1601 : }
1602 0 : else if (plane == 2) {
1603 0 : reconCoeffBuffer = (uint16_t*)&((recon_ptr->buffer_cr)[(recon_ptr->origin_x / 2 + recon_ptr->origin_y / 2 * recon_ptr->stride_cr) << is16bit]);
1604 0 : inputBuffer = (uint16_t*)&((input_picture_ptr->buffer_cr)[(input_picture_ptr->origin_x / 2 + input_picture_ptr->origin_y / 2 * input_picture_ptr->stride_cr) << is16bit]);
1605 0 : residualDistortion = 0;
1606 0 : row_index = 0;
1607 :
1608 0 : while (row_index < sequence_control_set_ptr->chroma_height) {
1609 0 : columnIndex = 0;
1610 0 : while (columnIndex < sequence_control_set_ptr->chroma_width) {
1611 0 : residualDistortion += (int64_t)SQR(((int64_t)inputBuffer[columnIndex]) - (int64_t)(reconCoeffBuffer[columnIndex]));
1612 0 : ++columnIndex;
1613 : }
1614 :
1615 0 : inputBuffer += input_picture_ptr->stride_cr;
1616 0 : reconCoeffBuffer += recon_ptr->stride_cr;
1617 0 : ++row_index;
1618 : }
1619 :
1620 0 : return residualDistortion;
1621 : }
1622 :
1623 0 : return 0;
1624 : }
1625 : }
1626 :
1627 990 : static int64_t try_filter_frame(
1628 : //const Yv12BufferConfig *sd,
1629 : //Av1Comp *const cpi,
1630 : const EbPictureBufferDesc *sd,
1631 : EbPictureBufferDesc *tempLfReconBuffer,
1632 : PictureControlSet *pcs_ptr,
1633 : int32_t filt_level,
1634 : int32_t partial_frame, int32_t plane, int32_t dir) {
1635 : (void)sd;
1636 : (void)partial_frame;
1637 : (void)sd;
1638 : int64_t filt_err;
1639 990 : FrameHeader *frm_hdr = &pcs_ptr->parent_pcs_ptr->frm_hdr;
1640 990 : assert(plane >= 0 && plane <= 2);
1641 990 : int32_t filter_level[2] = { filt_level, filt_level };
1642 990 : if (plane == 0 && dir == 0) filter_level[1] = frm_hdr->loop_filter_params.filter_level[1];
1643 990 : if (plane == 0 && dir == 1) filter_level[0] = frm_hdr->loop_filter_params.filter_level[0];
1644 :
1645 990 : EbBool is16bit = (EbBool)(pcs_ptr->parent_pcs_ptr->sequence_control_set_ptr->static_config.encoder_bit_depth > EB_8BIT);
1646 990 : EbPictureBufferDesc *recon_buffer = is16bit ? pcs_ptr->recon_picture16bit_ptr : pcs_ptr->recon_picture_ptr;
1647 990 : if (pcs_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_TRUE) {
1648 : //get the 16bit form of the input LCU
1649 657 : if (is16bit)
1650 0 : recon_buffer = ((EbReferenceObject*)pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture16bit;
1651 : else
1652 657 : recon_buffer = ((EbReferenceObject*)pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture;
1653 : }
1654 : else { // non ref pictures
1655 333 : recon_buffer = is16bit ? pcs_ptr->recon_picture16bit_ptr : pcs_ptr->recon_picture_ptr;
1656 : }
1657 :
1658 : // set base filters for use of get_filter_level when in DELTA_Q_LF mode
1659 990 : switch (plane) {
1660 442 : case 0:
1661 442 : frm_hdr->loop_filter_params.filter_level[0] = filter_level[0];
1662 442 : frm_hdr->loop_filter_params.filter_level[1] = filter_level[1];
1663 442 : break;
1664 280 : case 1: frm_hdr->loop_filter_params.filter_level_u = filter_level[0]; break;
1665 268 : case 2: frm_hdr->loop_filter_params.filter_level_v = filter_level[0]; break;
1666 : }
1667 :
1668 990 : eb_av1_loop_filter_frame(recon_buffer, pcs_ptr, plane, plane + 1);
1669 :
1670 990 : filt_err = PictureSseCalculations(pcs_ptr, recon_buffer, plane);
1671 :
1672 : // Re-instate the unfiltered frame
1673 990 : EbCopyBuffer(tempLfReconBuffer/*cpi->last_frame_uf*/, recon_buffer /*cm->frame_to_show*/, pcs_ptr, (uint8_t)plane);
1674 :
1675 990 : return filt_err;
1676 : }
1677 180 : static int32_t search_filter_level(
1678 : //const Yv12BufferConfig *sd, Av1Comp *cpi,
1679 : EbPictureBufferDesc *sd, // source
1680 : EbPictureBufferDesc *tempLfReconBuffer,
1681 : PictureControlSet *pcs_ptr,
1682 : int32_t partial_frame,
1683 : const int32_t *last_frame_filter_level,
1684 : double *best_cost_ret, int32_t plane, int32_t dir) {
1685 180 : const int32_t min_filter_level = 0;
1686 180 : const int32_t max_filter_level = MAX_LOOP_FILTER;// av1_get_max_filter_level(cpi);
1687 180 : int32_t filt_direction = 0;
1688 : int64_t best_err;
1689 : int32_t filt_best;
1690 180 : FrameHeader *frm_hdr = &pcs_ptr->parent_pcs_ptr->frm_hdr;
1691 : //Macroblock *x = &cpi->td.mb;
1692 :
1693 : // Start the search at the previous frame filter level unless it is now out of
1694 : // range.
1695 : int32_t lvl;
1696 180 : switch (plane) {
1697 60 : case 0: lvl = last_frame_filter_level[dir]; break;
1698 60 : case 1: lvl = last_frame_filter_level[2]; break;
1699 60 : case 2: lvl = last_frame_filter_level[3]; break;
1700 0 : default: assert(plane >= 0 && plane <= 2); return 0;
1701 : }
1702 180 : int32_t filt_mid = clamp(lvl, min_filter_level, max_filter_level);
1703 180 : int32_t filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
1704 :
1705 180 : EbBool is16bit = (EbBool)(pcs_ptr->parent_pcs_ptr->sequence_control_set_ptr->static_config.encoder_bit_depth > EB_8BIT);
1706 180 : EbPictureBufferDesc *recon_buffer = is16bit ? pcs_ptr->recon_picture16bit_ptr : pcs_ptr->recon_picture_ptr;
1707 :
1708 180 : if (pcs_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_TRUE) {
1709 : //get the 16bit form of the input LCU
1710 114 : if (is16bit)
1711 0 : recon_buffer = ((EbReferenceObject*)pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture16bit;
1712 : else
1713 114 : recon_buffer = ((EbReferenceObject*)pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)->reference_picture;
1714 : }
1715 : else { // non ref pictures
1716 66 : recon_buffer = is16bit ? pcs_ptr->recon_picture16bit_ptr : pcs_ptr->recon_picture_ptr;
1717 : }
1718 : // Sum squared error at each filter level
1719 : int64_t ss_err[MAX_LOOP_FILTER + 1];
1720 :
1721 : // Set each entry to -1
1722 180 : memset(ss_err, 0xFF, sizeof(ss_err));
1723 : // make a copy of recon_buffer
1724 180 : EbCopyBuffer(recon_buffer/*cm->frame_to_show*/, tempLfReconBuffer/*&cpi->last_frame_uf*/, pcs_ptr, (uint8_t)plane);
1725 :
1726 180 : best_err = try_filter_frame(sd, tempLfReconBuffer, pcs_ptr, filt_mid, partial_frame, plane, dir);
1727 180 : filt_best = filt_mid;
1728 180 : ss_err[filt_mid] = best_err;
1729 :
1730 180 : if (pcs_ptr->parent_pcs_ptr->loop_filter_mode <= 2) {
1731 0 : filter_step = 2;
1732 0 : const int32_t filt_high = AOMMIN(filt_mid + filter_step, max_filter_level);
1733 0 : const int32_t filt_low = AOMMAX(filt_mid - filter_step, min_filter_level);
1734 :
1735 : // Bias against raising loop filter in favor of lowering it.
1736 0 : int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
1737 :
1738 : //if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20))
1739 : // bias = (bias * cpi->twopass.section_intra_rating) / 20;
1740 :
1741 : // yx, bias less for large block size
1742 0 : if (frm_hdr->tx_mode != ONLY_4X4) bias >>= 1;
1743 :
1744 0 : if (filt_direction <= 0 && filt_low != filt_mid) {
1745 : // Get Low filter error score
1746 0 : if (ss_err[filt_low] < 0) {
1747 0 : ss_err[filt_low] =
1748 0 : try_filter_frame(sd, tempLfReconBuffer, pcs_ptr, filt_low, partial_frame, plane, dir);
1749 : }
1750 : // If value is close to the best so far then bias towards a lower loop
1751 : // filter value.
1752 0 : if (ss_err[filt_low] < (best_err + bias)) {
1753 : // Was it actually better than the previous best?
1754 0 : if (ss_err[filt_low] < best_err)
1755 0 : best_err = ss_err[filt_low];
1756 0 : filt_best = filt_low;
1757 : }
1758 : }
1759 :
1760 : // Now look at filt_high
1761 0 : if (filt_direction >= 0 && filt_high != filt_mid) {
1762 0 : if (ss_err[filt_high] < 0) {
1763 0 : ss_err[filt_high] =
1764 0 : try_filter_frame(sd, tempLfReconBuffer, pcs_ptr, filt_high, partial_frame, plane, dir);
1765 : }
1766 : // If value is significantly better than previous best, bias added against
1767 : // raising filter value
1768 0 : if (ss_err[filt_high] < (best_err - bias)) {
1769 0 : best_err = ss_err[filt_high];
1770 0 : filt_best = filt_high;
1771 : }
1772 : }
1773 : }
1774 : else {
1775 983 : while (filter_step > 0) {
1776 803 : const int32_t filt_high = AOMMIN(filt_mid + filter_step, max_filter_level);
1777 803 : const int32_t filt_low = AOMMAX(filt_mid - filter_step, min_filter_level);
1778 :
1779 : // Bias against raising loop filter in favor of lowering it.
1780 803 : int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
1781 :
1782 : //if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20))
1783 : // bias = (bias * cpi->twopass.section_intra_rating) / 20;
1784 :
1785 : // yx, bias less for large block size
1786 803 : if (frm_hdr->tx_mode != ONLY_4X4) bias >>= 1;
1787 :
1788 803 : if (filt_direction <= 0 && filt_low != filt_mid) {
1789 : // Get Low filter error score
1790 236 : if (ss_err[filt_low] < 0) {
1791 162 : ss_err[filt_low] =
1792 162 : try_filter_frame(sd, tempLfReconBuffer, pcs_ptr, filt_low, partial_frame, plane, dir);
1793 : }
1794 : // If value is close to the best so far then bias towards a lower loop
1795 : // filter value.
1796 236 : if (ss_err[filt_low] < (best_err + bias)) {
1797 : // Was it actually better than the previous best?
1798 84 : if (ss_err[filt_low] < best_err)
1799 40 : best_err = ss_err[filt_low];
1800 84 : filt_best = filt_low;
1801 : }
1802 : }
1803 :
1804 : // Now look at filt_high
1805 803 : if (filt_direction >= 0 && filt_high != filt_mid) {
1806 729 : if (ss_err[filt_high] < 0) {
1807 648 : ss_err[filt_high] =
1808 648 : try_filter_frame(sd, tempLfReconBuffer, pcs_ptr, filt_high, partial_frame, plane, dir);
1809 : }
1810 : // If value is significantly better than previous best, bias added against
1811 : // raising filter value
1812 729 : if (ss_err[filt_high] < (best_err - bias)) {
1813 189 : best_err = ss_err[filt_high];
1814 189 : filt_best = filt_high;
1815 : }
1816 : }
1817 :
1818 : // Half the step distance if the best filter value was the same as last time
1819 803 : if (filt_best == filt_mid) {
1820 540 : filter_step /= 2;
1821 540 : filt_direction = 0;
1822 : }
1823 : else {
1824 263 : filt_direction = (filt_best < filt_mid) ? -1 : 1;
1825 263 : filt_mid = filt_best;
1826 : }
1827 : }
1828 : }
1829 : // Update best error
1830 180 : best_err = ss_err[filt_best];
1831 :
1832 180 : if (best_cost_ret) *best_cost_ret = (double)best_err;//RDCOST_DBL(x->rdmult, 0, best_err);
1833 180 : return filt_best;
1834 : }
1835 :
1836 90 : void eb_av1_pick_filter_level(
1837 : DlfContext *context_ptr,
1838 : EbPictureBufferDesc *srcBuffer, // source input
1839 : PictureControlSet *pcs_ptr,
1840 : LpfPickMethod method) {
1841 90 : SequenceControlSet *scs_ptr = (SequenceControlSet*)pcs_ptr->parent_pcs_ptr->sequence_control_set_wrapper_ptr->object_ptr;
1842 90 : FrameHeader *frm_hdr = &pcs_ptr->parent_pcs_ptr->frm_hdr;
1843 :
1844 90 : const int32_t num_planes = 3;
1845 : (void)srcBuffer;
1846 90 : struct LoopFilter *const lf = &frm_hdr->loop_filter_params;
1847 90 : lf->sharpness_level = frm_hdr->frame_type == KEY_FRAME ? 0 : LF_SHARPNESS;
1848 :
1849 90 : if (method == LPF_PICK_MINIMAL_LPF) {
1850 0 : lf->filter_level[0] = 0;
1851 0 : lf->filter_level[1] = 0;
1852 : }
1853 90 : else if (method >= LPF_PICK_FROM_Q) {
1854 30 : const int32_t min_filter_level = 0;
1855 30 : const int32_t max_filter_level = MAX_LOOP_FILTER;// av1_get_max_filter_level(cpi);
1856 30 : const int32_t q = eb_av1_ac_quant_Q3(frm_hdr->quantization_params.base_q_idx, 0, (AomBitDepth)scs_ptr->static_config.encoder_bit_depth);
1857 : // These values were determined by linear fitting the result of the
1858 : // searched level for 8 bit depth:
1859 : // Keyframes: filt_guess = q * 0.06699 - 1.60817
1860 : // Other frames: filt_guess = q * 0.02295 + 2.48225
1861 : //
1862 : // And high bit depth separately:
1863 : // filt_guess = q * 0.316206 + 3.87252
1864 : int32_t filt_guess;
1865 30 : switch (scs_ptr->static_config.encoder_bit_depth) {
1866 30 : case EB_8BIT:
1867 60 : filt_guess = (frm_hdr->frame_type == KEY_FRAME)
1868 1 : ? ROUND_POWER_OF_TWO(q * 17563 - 421574, 18)
1869 30 : : ROUND_POWER_OF_TWO(q * 6017 + 650707, 18);
1870 30 : break;
1871 0 : case EB_10BIT:
1872 0 : filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20);
1873 0 : break;
1874 0 : case EB_12BIT:
1875 0 : filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22);
1876 0 : break;
1877 0 : default:
1878 0 : assert(0 &&
1879 : "bit_depth should be AOM_BITS_8, AOM_BITS_10 "
1880 : "or AOM_BITS_12");
1881 : return;
1882 : }
1883 30 : if (scs_ptr->static_config.encoder_bit_depth != EB_8BIT && frm_hdr->frame_type == KEY_FRAME)
1884 0 : filt_guess -= 4;
1885 :
1886 30 : filt_guess = filt_guess > 2 ? filt_guess - 2 : filt_guess > 1 ? filt_guess - 1 : filt_guess;
1887 30 : int32_t filt_guess_chroma = filt_guess > 1 ? filt_guess / 2 : filt_guess;
1888 :
1889 : // TODO(chengchen): retrain the model for Y, U, V filter levels
1890 30 : lf->filter_level[0] = clamp(filt_guess, min_filter_level, max_filter_level);
1891 30 : lf->filter_level[1] = clamp(filt_guess, min_filter_level, max_filter_level);
1892 30 : lf->filter_level_u = clamp(filt_guess_chroma, min_filter_level, max_filter_level);
1893 30 : lf->filter_level_v = clamp(filt_guess_chroma, min_filter_level, max_filter_level);
1894 : }
1895 : else {
1896 60 : const int32_t last_frame_filter_level[4] = { lf->filter_level[0],
1897 60 : lf->filter_level[1],
1898 60 : lf->filter_level_u,
1899 60 : lf->filter_level_v };
1900 60 : EbPictureBufferDesc *tempLfReconBuffer = (scs_ptr->static_config.encoder_bit_depth != EB_8BIT) ? context_ptr->temp_lf_recon_picture16bit_ptr : context_ptr->temp_lf_recon_picture_ptr;
1901 :
1902 60 : lf->filter_level[0] = lf->filter_level[1] =
1903 60 : search_filter_level(srcBuffer, tempLfReconBuffer, pcs_ptr, method == LPF_PICK_FROM_SUBIMAGE,
1904 : last_frame_filter_level, NULL, 0, 2);
1905 :
1906 60 : if (num_planes > 1) {
1907 60 : lf->filter_level_u =
1908 60 : search_filter_level(srcBuffer, tempLfReconBuffer, pcs_ptr, method == LPF_PICK_FROM_SUBIMAGE,
1909 : last_frame_filter_level, NULL, 1, 0);
1910 60 : lf->filter_level_v =
1911 60 : search_filter_level(srcBuffer, tempLfReconBuffer, pcs_ptr, method == LPF_PICK_FROM_SUBIMAGE,
1912 : last_frame_filter_level, NULL, 2, 0);
1913 : }
1914 : }
1915 : }
|