Line data Source code
1 : /*
2 : * Copyright(c) 2019 Intel Corporation
3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 : */
5 :
6 : /*
7 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
8 : *
9 : * This source code is subject to the terms of the BSD 2 Clause License and
10 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
11 : * was not distributed with this source code in the LICENSE file, you can
12 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
13 : * Media Patent License 1.0 was not distributed with this source code in the
14 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
15 : */
16 :
17 : #include <stdlib.h>
18 : #include "EbUtility.h"
19 : #include "EbPictureOperators.h"
20 : #include "EbDefinitions.h"
21 : #include "EbTransforms.h"
22 : #include "aom_dsp_rtcd.h"
23 :
24 0 : uint32_t CheckNZero4x4(
25 : int16_t *coeff,
26 : uint32_t coeff_stride){
27 0 : const uint32_t stride = coeff_stride / 4;
28 :
29 0 : uint64_t * coefPtr = (uint64_t *)coeff;
30 :
31 0 : if (coefPtr[0] > 0)
32 0 : return 1;
33 0 : else if (coefPtr[stride] > 0)
34 0 : return 1;
35 0 : else if (coefPtr[2 * stride] > 0)
36 0 : return 1;
37 0 : else if (coefPtr[3 * stride] > 0)
38 0 : return 1;
39 :
40 0 : return 0;
41 : }
42 :
43 : const int8_t *eb_inv_txfm_shift_ls[TX_SIZES_ALL] = {
44 : inv_shift_4x4, inv_shift_8x8, inv_shift_16x16, inv_shift_32x32,
45 : inv_shift_64x64, inv_shift_4x8, inv_shift_8x4, inv_shift_8x16,
46 : inv_shift_16x8, inv_shift_16x32, inv_shift_32x16, inv_shift_32x64,
47 : inv_shift_64x32, inv_shift_4x16, inv_shift_16x4, inv_shift_8x32,
48 : inv_shift_32x8, inv_shift_16x64, inv_shift_64x16,
49 : };
50 :
51 : static const int8_t *fwd_txfm_range_mult2_list[TXFM_TYPES] = {
52 : fdct4_range_mult2, fdct8_range_mult2, fdct16_range_mult2,
53 : fdct32_range_mult2, fdct64_range_mult2, fadst4_range_mult2,
54 : fadst8_range_mult2, fadst16_range_mult2, fadst32_range_mult2,
55 : fidtx4_range_mult2, fidtx8_range_mult2, fidtx16_range_mult2,
56 : fidtx32_range_mult2, fidtx64_range_mult2
57 : };
58 :
59 : static const int8_t *fwd_txfm_shift_ls[TX_SIZES_ALL] = {
60 : fwd_shift_4x4, fwd_shift_8x8, fwd_shift_16x16, fwd_shift_32x32,
61 : fwd_shift_64x64, fwd_shift_4x8, fwd_shift_8x4, fwd_shift_8x16,
62 : fwd_shift_16x8, fwd_shift_16x32, fwd_shift_32x16, fwd_shift_32x64,
63 : fwd_shift_64x32, fwd_shift_4x16, fwd_shift_16x4, fwd_shift_8x32,
64 : fwd_shift_32x8, fwd_shift_16x64, fwd_shift_64x16,
65 : };
66 :
67 0 : void mat_mult_out(
68 : int16_t *coeff,
69 : const uint32_t coeff_stride,
70 : int16_t* coeff_out,
71 : const uint32_t coeff_out_stride,
72 : const uint16_t *masking_matrix,
73 : const uint32_t masking_matrix_stride,
74 : const uint32_t compute_size,
75 : const int32_t offset,
76 : const int32_t shift_num,
77 : uint32_t *nonzerocoeff) {
78 0 : uint32_t coeffLocation = 0, coeffOutLocation = 0;
79 : uint32_t row_index, colIndex;
80 : int32_t coeffTemp;
81 :
82 0 : *nonzerocoeff = 0;
83 :
84 0 : for (row_index = 0; row_index < compute_size; ++row_index) {
85 0 : for (colIndex = 0; colIndex < compute_size; ++colIndex) {
86 0 : coeffTemp = (ABS(coeff[coeffLocation]) * masking_matrix[colIndex + row_index * masking_matrix_stride] + offset) >> shift_num;
87 0 : coeffTemp = (coeff[coeffLocation] < 0) ? -coeffTemp : coeffTemp;
88 :
89 0 : coeff_out[coeffOutLocation] = (int16_t)CLIP3(MIN_NEG_16BIT_NUM, MAX_POS_16BIT_NUM, coeffTemp);
90 :
91 0 : (*nonzerocoeff) += (coeffTemp != 0);
92 0 : ++coeffLocation;
93 0 : ++coeffOutLocation;
94 : }
95 0 : coeffLocation += coeff_stride - compute_size;
96 0 : coeffOutLocation += coeff_out_stride - compute_size;
97 : }
98 0 : }
99 :
100 : /*****************************
101 : * function header
102 : *****************************/
103 :
104 : uint64_t GetPMCost(
105 : uint64_t lambda,
106 : uint64_t tuDistortion,
107 : uint64_t y_tu_coeff_bits
108 : );
109 :
110 : /*****************************
111 : * Defines
112 : *****************************/
113 :
114 : #define BETA_P 1
115 : #define BETA_N 3
116 :
117 : /********************************************
118 : * Constants
119 : ********************************************/
120 :
121 : #define ALPHA_0000 0
122 : #define ALPHA_0050 50
123 :
124 : #define ALPHA_0100 100
125 : #define ALPHA_0200 200
126 : #define ALPHA_0300 300
127 : #define ALPHA_0500 500
128 : #define ALPHA_1000 1000
129 :
130 : EB_EXTERN EB_ALIGN(16) const int16_t TransformAsmConst[] = {
131 : 2, 0, 2, 0, 2, 0, 2, 0,
132 : 4, 0, 4, 0, 4, 0, 4, 0,
133 : 8, 0, 8, 0, 8, 0, 8, 0,
134 : 9, 0, 9, 0, 9, 0, 9, 0,
135 : 13, 0, 13, 0, 13, 0, 13, 0,
136 : 16, 0, 16, 0, 16, 0, 16, 0,
137 : 18, 0, 18, 0, 18, 0, 18, 0,
138 : 22, 0, 22, 0, 22, 0, 22, 0,
139 : 25, 0, 25, 0, 25, 0, 25, 0,
140 : 31, 0, 31, 0, 31, 0, 31, 0,
141 : 36, 0, 36, 0, 36, 0, 36, 0,
142 : 38, 0, 38, 0, 38, 0, 38, 0,
143 : 43, 0, 43, 0, 43, 0, 43, 0,
144 : 46, 0, 46, 0, 46, 0, 46, 0,
145 : 50, 0, 50, 0, 50, 0, 50, 0,
146 : 54, 0, 54, 0, 54, 0, 54, 0,
147 : 57, 0, 57, 0, 57, 0, 57, 0,
148 : 61, 0, 61, 0, 61, 0, 61, 0,
149 : 67, 0, 67, 0, 67, 0, 67, 0,
150 : 70, 0, 70, 0, 70, 0, 70, 0,
151 : 73, 0, 73, 0, 73, 0, 73, 0,
152 : 75, 0, 75, 0, 75, 0, 75, 0,
153 : 78, 0, 78, 0, 78, 0, 78, 0,
154 : 80, 0, 80, 0, 80, 0, 80, 0,
155 : 82, 0, 82, 0, 82, 0, 82, 0,
156 : 83, 0, 83, 0, 83, 0, 83, 0,
157 : 85, 0, 85, 0, 85, 0, 85, 0,
158 : 87, 0, 87, 0, 87, 0, 87, 0,
159 : 88, 0, 88, 0, 88, 0, 88, 0,
160 : 89, 0, 89, 0, 89, 0, 89, 0,
161 : 90, 0, 90, 0, 90, 0, 90, 0,
162 : 256, 0, 256, 0, 256, 0, 256, 0,
163 : 512, 0, 512, 0, 512, 0, 512, 0,
164 : 1024, 0, 1024, 0, 1024, 0, 1024, 0,
165 : 83, 36, 83, 36, 83, 36, 83, 36,
166 : 36, -83, 36, -83, 36, -83, 36, -83,
167 : 89, 75, 89, 75, 89, 75, 89, 75,
168 : 50, 18, 50, 18, 50, 18, 50, 18,
169 : 75, -18, 75, -18, 75, -18, 75, -18,
170 : -89, -50, -89, -50, -89, -50, -89, -50,
171 : 50, -89, 50, -89, 50, -89, 50, -89,
172 : 18, 75, 18, 75, 18, 75, 18, 75,
173 : 18, -50, 18, -50, 18, -50, 18, -50,
174 : 75, -89, 75, -89, 75, -89, 75, -89,
175 : 90, 87, 90, 87, 90, 87, 90, 87, // 16x16
176 : 80, 70, 80, 70, 80, 70, 80, 70,
177 : 57, 43, 57, 43, 57, 43, 57, 43,
178 : 25, 9, 25, 9, 25, 9, 25, 9,
179 : 87, 57, 87, 57, 87, 57, 87, 57,
180 : 9, -43, 9, -43, 9, -43, 9, -43,
181 : -80, -90, -80, -90, -80, -90, -80, -90,
182 : -70, -25, -70, -25, -70, -25, -70, -25,
183 : 80, 9, 80, 9, 80, 9, 80, 9,
184 : -70, -87, -70, -87, -70, -87, -70, -87,
185 : -25, 57, -25, 57, -25, 57, -25, 57,
186 : 90, 43, 90, 43, 90, 43, 90, 43,
187 : 70, -43, 70, -43, 70, -43, 70, -43,
188 : -87, 9, -87, 9, -87, 9, -87, 9,
189 : 90, 25, 90, 25, 90, 25, 90, 25,
190 : -80, -57, -80, -57, -80, -57, -80, -57,
191 : 57, -80, 57, -80, 57, -80, 57, -80,
192 : -25, 90, -25, 90, -25, 90, -25, 90,
193 : -9, -87, -9, -87, -9, -87, -9, -87,
194 : 43, 70, 43, 70, 43, 70, 43, 70,
195 : 43, -90, 43, -90, 43, -90, 43, -90,
196 : 57, 25, 57, 25, 57, 25, 57, 25,
197 : -87, 70, -87, 70, -87, 70, -87, 70,
198 : 9, -80, 9, -80, 9, -80, 9, -80,
199 : 25, -70, 25, -70, 25, -70, 25, -70,
200 : 90, -80, 90, -80, 90, -80, 90, -80,
201 : 43, 9, 43, 9, 43, 9, 43, 9,
202 : -57, 87, -57, 87, -57, 87, -57, 87,
203 : 9, -25, 9, -25, 9, -25, 9, -25,
204 : 43, -57, 43, -57, 43, -57, 43, -57,
205 : 70, -80, 70, -80, 70, -80, 70, -80,
206 : 87, -90, 87, -90, 87, -90, 87, -90,
207 : };
208 :
209 : EB_ALIGN(16) const int16_t transform_asm_const_sse4_1[] = {
210 : 2, 0, 2, 0, 2, 0, 2, 0,
211 : 4, 0, 4, 0, 4, 0, 4, 0,
212 : 8, 0, 8, 0, 8, 0, 8, 0,
213 : 9, 0, 9, 0, 9, 0, 9, 0,
214 : 13, 0, 13, 0, 13, 0, 13, 0,
215 : 16, 0, 16, 0, 16, 0, 16, 0,
216 : 18, 0, 18, 0, 18, 0, 18, 0,
217 : 22, 0, 22, 0, 22, 0, 22, 0,
218 : 25, 0, 25, 0, 25, 0, 25, 0,
219 : 31, 0, 31, 0, 31, 0, 31, 0,
220 : 36, 0, 36, 0, 36, 0, 36, 0,
221 : 38, 0, 38, 0, 38, 0, 38, 0,
222 : 43, 0, 43, 0, 43, 0, 43, 0,
223 : 46, 0, 46, 0, 46, 0, 46, 0,
224 : 50, 0, 50, 0, 50, 0, 50, 0,
225 : 54, 0, 54, 0, 54, 0, 54, 0,
226 : 57, 0, 57, 0, 57, 0, 57, 0,
227 : 61, 0, 61, 0, 61, 0, 61, 0,
228 : 67, 0, 67, 0, 67, 0, 67, 0,
229 : 70, 0, 70, 0, 70, 0, 70, 0,
230 : 73, 0, 73, 0, 73, 0, 73, 0,
231 : 75, 0, 75, 0, 75, 0, 75, 0,
232 : 78, 0, 78, 0, 78, 0, 78, 0,
233 : 80, 0, 80, 0, 80, 0, 80, 0,
234 : 82, 0, 82, 0, 82, 0, 82, 0,
235 : 83, 0, 83, 0, 83, 0, 83, 0,
236 : 85, 0, 85, 0, 85, 0, 85, 0,
237 : 87, 0, 87, 0, 87, 0, 87, 0,
238 : 88, 0, 88, 0, 88, 0, 88, 0,
239 : 89, 0, 89, 0, 89, 0, 89, 0,
240 : 90, 0, 90, 0, 90, 0, 90, 0,
241 : 256, 0, 256, 0, 256, 0, 256, 0,
242 : 512, 0, 512, 0, 512, 0, 512, 0,
243 : 1024, 0, 1024, 0, 1024, 0, 1024, 0,
244 : 83, 36, 83, 36, 83, 36, 83, 36,
245 : 36, -83, 36, -83, 36, -83, 36, -83,
246 : 89, 75, 89, 75, 89, 75, 89, 75,
247 : 50, 18, 50, 18, 50, 18, 50, 18,
248 : 75, -18, 75, -18, 75, -18, 75, -18,
249 : -89, -50, -89, -50, -89, -50, -89, -50,
250 : 50, -89, 50, -89, 50, -89, 50, -89,
251 : 18, 75, 18, 75, 18, 75, 18, 75,
252 : 18, -50, 18, -50, 18, -50, 18, -50,
253 : 75, -89, 75, -89, 75, -89, 75, -89,
254 : 90, 87, 90, 87, 90, 87, 90, 87, // 16x16
255 : 80, 70, 80, 70, 80, 70, 80, 70,
256 : 57, 43, 57, 43, 57, 43, 57, 43,
257 : 25, 9, 25, 9, 25, 9, 25, 9,
258 : 87, 57, 87, 57, 87, 57, 87, 57,
259 : 9, -43, 9, -43, 9, -43, 9, -43,
260 : -80, -90, -80, -90, -80, -90, -80, -90,
261 : -70, -25, -70, -25, -70, -25, -70, -25,
262 : 80, 9, 80, 9, 80, 9, 80, 9,
263 : -70, -87, -70, -87, -70, -87, -70, -87,
264 : -25, 57, -25, 57, -25, 57, -25, 57,
265 : 90, 43, 90, 43, 90, 43, 90, 43,
266 : 70, -43, 70, -43, 70, -43, 70, -43,
267 : -87, 9, -87, 9, -87, 9, -87, 9,
268 : 90, 25, 90, 25, 90, 25, 90, 25,
269 : -80, -57, -80, -57, -80, -57, -80, -57,
270 : 57, -80, 57, -80, 57, -80, 57, -80,
271 : -25, 90, -25, 90, -25, 90, -25, 90,
272 : -9, -87, -9, -87, -9, -87, -9, -87,
273 : 43, 70, 43, 70, 43, 70, 43, 70,
274 : 43, -90, 43, -90, 43, -90, 43, -90,
275 : 57, 25, 57, 25, 57, 25, 57, 25,
276 : -87, 70, -87, 70, -87, 70, -87, 70,
277 : 9, -80, 9, -80, 9, -80, 9, -80,
278 : 25, -70, 25, -70, 25, -70, 25, -70,
279 : 90, -80, 90, -80, 90, -80, 90, -80,
280 : 43, 9, 43, 9, 43, 9, 43, 9,
281 : -57, 87, -57, 87, -57, 87, -57, 87,
282 : 9, -25, 9, -25, 9, -25, 9, -25,
283 : 43, -57, 43, -57, 43, -57, 43, -57,
284 : 70, -80, 70, -80, 70, -80, 70, -80,
285 : 87, -90, 87, -90, 87, -90, 87, -90,
286 : 90, 90, 90, 90, 90, 90, 90, 90, // 32x32
287 : 88, 85, 88, 85, 88, 85, 88, 85,
288 : 82, 78, 82, 78, 82, 78, 82, 78,
289 : 73, 67, 73, 67, 73, 67, 73, 67,
290 : 61, 54, 61, 54, 61, 54, 61, 54,
291 : 46, 38, 46, 38, 46, 38, 46, 38,
292 : 31, 22, 31, 22, 31, 22, 31, 22,
293 : 13, 4, 13, 4, 13, 4, 13, 4,
294 : 90, 82, 90, 82, 90, 82, 90, 82,
295 : 67, 46, 67, 46, 67, 46, 67, 46,
296 : 22, -4, 22, -4, 22, -4, 22, -4,
297 : -31, -54, -31, -54, -31, -54, -31, -54,
298 : -73, -85, -73, -85, -73, -85, -73, -85,
299 : -90, -88, -90, -88, -90, -88, -90, -88,
300 : -78, -61, -78, -61, -78, -61, -78, -61,
301 : -38, -13, -38, -13, -38, -13, -38, -13,
302 : 88, 67, 88, 67, 88, 67, 88, 67,
303 : 31, -13, 31, -13, 31, -13, 31, -13,
304 : -54, -82, -54, -82, -54, -82, -54, -82,
305 : -90, -78, -90, -78, -90, -78, -90, -78,
306 : -46, -4, -46, -4, -46, -4, -46, -4,
307 : 38, 73, 38, 73, 38, 73, 38, 73,
308 : 90, 85, 90, 85, 90, 85, 90, 85,
309 : 61, 22, 61, 22, 61, 22, 61, 22,
310 : 85, 46, 85, 46, 85, 46, 85, 46,
311 : -13, -67, -13, -67, -13, -67, -13, -67,
312 : -90, -73, -90, -73, -90, -73, -90, -73,
313 : -22, 38, -22, 38, -22, 38, -22, 38,
314 : 82, 88, 82, 88, 82, 88, 82, 88,
315 : 54, -4, 54, -4, 54, -4, 54, -4,
316 : -61, -90, -61, -90, -61, -90, -61, -90,
317 : -78, -31, -78, -31, -78, -31, -78, -31,
318 : 82, 22, 82, 22, 82, 22, 82, 22,
319 : -54, -90, -54, -90, -54, -90, -54, -90,
320 : -61, 13, -61, 13, -61, 13, -61, 13,
321 : 78, 85, 78, 85, 78, 85, 78, 85,
322 : 31, -46, 31, -46, 31, -46, 31, -46,
323 : -90, -67, -90, -67, -90, -67, -90, -67,
324 : 4, 73, 4, 73, 4, 73, 4, 73,
325 : 88, 38, 88, 38, 88, 38, 88, 38,
326 : 78, -4, 78, -4, 78, -4, 78, -4,
327 : -82, -73, -82, -73, -82, -73, -82, -73,
328 : 13, 85, 13, 85, 13, 85, 13, 85,
329 : 67, -22, 67, -22, 67, -22, 67, -22,
330 : -88, -61, -88, -61, -88, -61, -88, -61,
331 : 31, 90, 31, 90, 31, 90, 31, 90,
332 : 54, -38, 54, -38, 54, -38, 54, -38,
333 : -90, -46, -90, -46, -90, -46, -90, -46,
334 : 73, -31, 73, -31, 73, -31, 73, -31,
335 : -90, -22, -90, -22, -90, -22, -90, -22,
336 : 78, 67, 78, 67, 78, 67, 78, 67,
337 : -38, -90, -38, -90, -38, -90, -38, -90,
338 : -13, 82, -13, 82, -13, 82, -13, 82,
339 : 61, -46, 61, -46, 61, -46, 61, -46,
340 : -88, -4, -88, -4, -88, -4, -88, -4,
341 : 85, 54, 85, 54, 85, 54, 85, 54,
342 : 67, -54, 67, -54, 67, -54, 67, -54,
343 : -78, 38, -78, 38, -78, 38, -78, 38,
344 : 85, -22, 85, -22, 85, -22, 85, -22,
345 : -90, 4, -90, 4, -90, 4, -90, 4,
346 : 90, 13, 90, 13, 90, 13, 90, 13,
347 : -88, -31, -88, -31, -88, -31, -88, -31,
348 : 82, 46, 82, 46, 82, 46, 82, 46,
349 : -73, -61, -73, -61, -73, -61, -73, -61,
350 : 61, -73, 61, -73, 61, -73, 61, -73,
351 : -46, 82, -46, 82, -46, 82, -46, 82,
352 : 31, -88, 31, -88, 31, -88, 31, -88,
353 : -13, 90, -13, 90, -13, 90, -13, 90,
354 : -4, -90, -4, -90, -4, -90, -4, -90,
355 : 22, 85, 22, 85, 22, 85, 22, 85,
356 : -38, -78, -38, -78, -38, -78, -38, -78,
357 : 54, 67, 54, 67, 54, 67, 54, 67,
358 : 54, -85, 54, -85, 54, -85, 54, -85,
359 : -4, 88, -4, 88, -4, 88, -4, 88,
360 : -46, -61, -46, -61, -46, -61, -46, -61,
361 : 82, 13, 82, 13, 82, 13, 82, 13,
362 : -90, 38, -90, 38, -90, 38, -90, 38,
363 : 67, -78, 67, -78, 67, -78, 67, -78,
364 : -22, 90, -22, 90, -22, 90, -22, 90,
365 : -31, -73, -31, -73, -31, -73, -31, -73,
366 : 46, -90, 46, -90, 46, -90, 46, -90,
367 : 38, 54, 38, 54, 38, 54, 38, 54,
368 : -90, 31, -90, 31, -90, 31, -90, 31,
369 : 61, -88, 61, -88, 61, -88, 61, -88,
370 : 22, 67, 22, 67, 22, 67, 22, 67,
371 : -85, 13, -85, 13, -85, 13, -85, 13,
372 : 73, -82, 73, -82, 73, -82, 73, -82,
373 : 4, 78, 4, 78, 4, 78, 4, 78,
374 : 38, -88, 38, -88, 38, -88, 38, -88,
375 : 73, -4, 73, -4, 73, -4, 73, -4,
376 : -67, 90, -67, 90, -67, 90, -67, 90,
377 : -46, -31, -46, -31, -46, -31, -46, -31,
378 : 85, -78, 85, -78, 85, -78, 85, -78,
379 : 13, 61, 13, 61, 13, 61, 13, 61,
380 : -90, 54, -90, 54, -90, 54, -90, 54,
381 : 22, -82, 22, -82, 22, -82, 22, -82,
382 : 31, -78, 31, -78, 31, -78, 31, -78,
383 : 90, -61, 90, -61, 90, -61, 90, -61,
384 : 4, 54, 4, 54, 4, 54, 4, 54,
385 : -88, 82, -88, 82, -88, 82, -88, 82,
386 : -38, -22, -38, -22, -38, -22, -38, -22,
387 : 73, -90, 73, -90, 73, -90, 73, -90,
388 : 67, -13, 67, -13, 67, -13, 67, -13,
389 : -46, 85, -46, 85, -46, 85, -46, 85,
390 : 22, -61, 22, -61, 22, -61, 22, -61,
391 : 85, -90, 85, -90, 85, -90, 85, -90,
392 : 73, -38, 73, -38, 73, -38, 73, -38,
393 : -4, 46, -4, 46, -4, 46, -4, 46,
394 : -78, 90, -78, 90, -78, 90, -78, 90,
395 : -82, 54, -82, 54, -82, 54, -82, 54,
396 : -13, -31, -13, -31, -13, -31, -13, -31,
397 : 67, -88, 67, -88, 67, -88, 67, -88,
398 : 13, -38, 13, -38, 13, -38, 13, -38,
399 : 61, -78, 61, -78, 61, -78, 61, -78,
400 : 88, -90, 88, -90, 88, -90, 88, -90,
401 : 85, -73, 85, -73, 85, -73, 85, -73,
402 : 54, -31, 54, -31, 54, -31, 54, -31,
403 : 4, 22, 4, 22, 4, 22, 4, 22,
404 : -46, 67, -46, 67, -46, 67, -46, 67,
405 : -82, 90, -82, 90, -82, 90, -82, 90,
406 : 4, -13, 4, -13, 4, -13, 4, -13,
407 : 22, -31, 22, -31, 22, -31, 22, -31,
408 : 38, -46, 38, -46, 38, -46, 38, -46,
409 : 54, -61, 54, -61, 54, -61, 54, -61,
410 : 67, -73, 67, -73, 67, -73, 67, -73,
411 : 78, -82, 78, -82, 78, -82, 78, -82,
412 : 85, -88, 85, -88, 85, -88, 85, -88,
413 : 90, -90, 90, -90, 90, -90, 90, -90,
414 : };
415 :
416 : #define PMP_PRECISION 8
417 : #define PMP_MAX (1<<PMP_PRECISION)
418 :
419 : #define M_100 100*PMP_MAX/100
420 : #define M_90 90*PMP_MAX/100
421 : #define M_80 80*PMP_MAX/100
422 : #define M_70 70*PMP_MAX/100
423 : #define M_60 60*PMP_MAX/100
424 : #define M_50 50*PMP_MAX/100
425 : #define M_40 40*PMP_MAX/100
426 : #define M_30 30*PMP_MAX/100
427 : #define M_25 25*PMP_MAX/100
428 : #define M_20 20*PMP_MAX/100
429 : #define M_10 10*PMP_MAX/100
430 : #define M_0 0*PMP_MAX/100
431 :
432 : // Level0
433 : // 4K
434 : // 4x4
435 : static const uint16_t MaskingMatrix4x4_Level0_4K[] = {
436 : M_100, M_100, M_100, M_100,
437 : M_100, M_100, M_100, M_100,
438 : M_100, M_100, M_100, M_100,
439 : M_100, M_100, M_100, M_100
440 : };
441 : // 8x8
442 : static const uint16_t MaskingMatrix8x8_Level0_4K[] = {
443 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
444 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
445 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
446 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
447 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
448 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
449 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
450 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100
451 : };
452 : // 16x16
453 : static const uint16_t MaskingMatrix16x16_Level0_4K[] = {
454 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
455 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
456 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
457 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
458 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
459 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
460 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
461 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
462 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
463 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
464 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
465 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
466 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
467 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
468 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
469 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100
470 : };
471 : // 32x32
472 : static const uint16_t MaskingMatrix32x32_Level0_4K[] = {
473 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
474 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
475 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
476 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
477 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
478 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
479 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
480 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
481 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
482 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
483 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
484 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
485 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
486 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
487 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
488 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
489 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
490 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
491 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
492 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
493 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
494 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
495 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
496 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
497 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
498 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
499 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
500 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
501 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
502 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
503 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
504 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100
505 : };
506 :
507 : // 1080
508 : // 4x4
509 : static const uint16_t MaskingMatrix4x4_Level0_1080p[] = {
510 : M_100, M_100, M_100, M_100,
511 : M_100, M_100, M_100, M_100,
512 : M_100, M_100, M_100, M_100,
513 : M_100, M_100, M_100, M_100
514 : };
515 : // 8x8
516 : static const uint16_t MaskingMatrix8x8_Level0_1080p[] = {
517 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
518 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
519 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
520 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
521 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
522 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
523 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
524 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100
525 : };
526 : // 16x16
527 : static const uint16_t MaskingMatrix16x16_Level0_1080p[] = {
528 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
529 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
530 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
531 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
532 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
533 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
534 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
535 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
536 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
537 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
538 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
539 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
540 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
541 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
542 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
543 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100
544 : };
545 : // 32x32
546 : static const uint16_t MaskingMatrix32x32_Level0_1080p[] = {
547 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
548 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
549 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
550 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
551 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
552 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
553 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
554 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
555 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
556 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
557 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
558 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
559 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
560 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
561 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
562 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
563 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
564 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
565 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
566 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
567 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
568 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
569 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
570 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
571 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
572 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
573 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
574 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
575 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
576 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
577 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
578 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100
579 : };
580 :
581 : // Level1
582 : // 4K
583 :
584 : // 4x4
585 : static const uint16_t MaskingMatrix4x4_Level1_4K[] = {
586 : M_100, M_100, M_50, M_50,
587 : M_100, M_100, M_50, M_50,
588 : M_50, M_50, M_50, M_50,
589 : M_50, M_50, M_50, M_50,
590 : };
591 : // 8x8
592 : static const uint16_t MaskingMatrix8x8_Level1_4K[] = {
593 : M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60,
594 : M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60,
595 : M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50,
596 : M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50,
597 : M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50,
598 : M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50,
599 : M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50,
600 : M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50,
601 : };
602 :
603 : static const uint16_t MaskingMatrix8x8_Level1_MOD1_4K[] = {
604 : M_100, M_100, M_100, M_100, M_80, M_80, M_80, M_80,
605 : M_100, M_100, M_100, M_100, M_80, M_80, M_80, M_80,
606 : M_100, M_100, M_100, M_100, M_70, M_70, M_70, M_70,
607 : M_100, M_100, M_100, M_100, M_70, M_70, M_70, M_70,
608 : M_80, M_80, M_70, M_70, M_70, M_70, M_70, M_70,
609 : M_80, M_80, M_70, M_70, M_70, M_70, M_70, M_70,
610 : M_80, M_80, M_70, M_70, M_70, M_70, M_70, M_70,
611 : M_80, M_80, M_70, M_70, M_70, M_70, M_70, M_70,
612 : };
613 :
614 : // 16x16
615 : static const uint16_t MaskingMatrix16x16_Level1_4K[] = {
616 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
617 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
618 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
619 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
620 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
621 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
622 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
623 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
624 : M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
625 : M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
626 : M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
627 : M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
628 : M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
629 : M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
630 : M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
631 : M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
632 : };
633 : // 32x32
634 : static const uint16_t MaskingMatrix32x32_Level1_4K[] = {
635 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
636 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
637 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
638 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
639 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
640 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
641 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
642 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
643 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
644 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
645 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
646 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
647 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
648 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
649 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
650 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
651 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
652 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
653 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
654 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
655 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
656 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
657 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
658 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
659 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
660 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
661 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
662 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
663 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
664 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
665 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
666 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
667 : };
668 :
669 : // 1080
670 :
671 : // 4x4
672 : static const uint16_t MaskingMatrix4x4_Level1_1080p[] = {
673 : M_100, M_100, M_100, M_100,
674 : M_100, M_100, M_100, M_100,
675 : M_100, M_100, M_100, M_100,
676 : M_100, M_100, M_100, M_100
677 : };
678 : // 8x8
679 : static const uint16_t MaskingMatrix8x8_Level1_1080p[] = {
680 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
681 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
682 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
683 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
684 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
685 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
686 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
687 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100
688 : };
689 : // 16x16
690 : static const uint16_t MaskingMatrix16x16_Level1_1080p[] = {
691 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
692 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
693 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
694 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
695 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
696 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
697 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
698 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
699 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
700 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
701 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
702 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
703 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
704 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
705 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
706 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100
707 : };
708 : // 32x32
709 : static const uint16_t MaskingMatrix32x32_Level1_1080p[] = {
710 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
711 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
712 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
713 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
714 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
715 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
716 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
717 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
718 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
719 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
720 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
721 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
722 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
723 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
724 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
725 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
726 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
727 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
728 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
729 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
730 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
731 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
732 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
733 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
734 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
735 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
736 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
737 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
738 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
739 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
740 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
741 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100
742 : };
743 :
744 : // Level2
745 : // 4K
746 : // 4x4
747 : static const uint16_t MaskingMatrix4x4_Level2_4K[] = {
748 : M_100, M_100, M_0, M_0,
749 : M_100, M_100, M_0, M_0,
750 : M_0, M_0, M_0, M_0,
751 : M_0, M_0, M_0, M_0,
752 : };
753 : // 8x8
754 : static const uint16_t MaskingMatrix8x8_Level2_4K[] = {
755 : M_100, M_100, M_100, M_100, M_0, M_0, M_0, M_0,
756 : M_100, M_100, M_100, M_100, M_0, M_0, M_0, M_0,
757 : M_100, M_100, M_100, M_100, M_0, M_0, M_0, M_0,
758 : M_100, M_100, M_100, M_100, M_0, M_0, M_0, M_0,
759 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
760 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
761 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
762 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
763 : };
764 : // 16x16
765 : static const uint16_t MaskingMatrix16x16_Level2_4K[] = {
766 : M_100, M_100, M_100, M_100, M_90, M_90, M_90, M_90, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
767 : M_100, M_100, M_100, M_100, M_90, M_90, M_90, M_90, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
768 : M_100, M_100, M_100, M_100, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
769 : M_100, M_100, M_100, M_100, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
770 : M_90, M_90, M_80, M_80, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
771 : M_90, M_90, M_80, M_80, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
772 : M_90, M_90, M_80, M_80, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
773 : M_90, M_90, M_80, M_80, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
774 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
775 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
776 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
777 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
778 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
779 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
780 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
781 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
782 : };
783 : // 32x32
784 : static const uint16_t MaskingMatrix32x32_Level2_4K[] = {
785 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_90, M_90, M_90, M_90, M_90, M_90, M_90, M_90, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
786 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_90, M_90, M_90, M_90, M_90, M_90, M_90, M_90, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
787 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_90, M_90, M_90, M_90, M_90, M_90, M_90, M_90, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
788 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_90, M_90, M_90, M_90, M_90, M_90, M_90, M_90, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
789 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
790 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
791 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
792 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
793 : M_90, M_90, M_90, M_90, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
794 : M_90, M_90, M_90, M_90, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
795 : M_90, M_90, M_90, M_90, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
796 : M_90, M_90, M_90, M_90, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
797 : M_90, M_90, M_90, M_90, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
798 : M_90, M_90, M_90, M_90, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
799 : M_90, M_90, M_90, M_90, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
800 : M_90, M_90, M_90, M_90, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_80, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
801 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
802 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
803 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
804 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
805 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
806 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
807 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
808 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
809 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
810 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
811 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
812 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
813 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
814 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
815 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
816 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
817 : };
818 :
819 : // 1080
820 : // 4x4
821 : static const uint16_t MaskingMatrix4x4_Level2_1080p[] = {
822 : M_100, M_100, M_100, M_100,
823 : M_100, M_100, M_100, M_100,
824 : M_100, M_100, M_100, M_100,
825 : M_100, M_100, M_100, M_100
826 : };
827 : // 8x8
828 : static const uint16_t MaskingMatrix8x8_Level2_1080p[] = {
829 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
830 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
831 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
832 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
833 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
834 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
835 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
836 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100
837 : };
838 : // 16x16
839 : static const uint16_t MaskingMatrix16x16_Level2_1080p[] = {
840 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
841 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
842 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
843 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
844 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
845 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
846 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
847 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
848 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
849 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
850 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
851 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
852 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
853 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
854 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
855 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100
856 : };
857 : // 32x32
858 : static const uint16_t MaskingMatrix32x32_Level2_1080p[] = {
859 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
860 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
861 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
862 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
863 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
864 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
865 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
866 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
867 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
868 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
869 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
870 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
871 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
872 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
873 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
874 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
875 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
876 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
877 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
878 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
879 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
880 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
881 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
882 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
883 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
884 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
885 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
886 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
887 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
888 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
889 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100,
890 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100
891 : };
892 : // Level3
893 : // 4K
894 : // 4x4
895 : static const uint16_t MaskingMatrix4x4_Level3_4K[] = {
896 : M_100, M_90, M_0, M_0,
897 : M_90, M_90, M_0, M_0,
898 : M_0, M_0, M_0, M_0,
899 : M_0, M_0, M_0, M_0,
900 : };
901 :
902 : // 4x4
903 : static const uint16_t MaskingMatrix4x4_Level3_1080p[] = {
904 : M_100, M_100, M_50, M_50,
905 : M_100, M_100, M_50, M_50,
906 : M_50, M_50, M_50, M_50,
907 : M_50, M_50, M_50, M_50,
908 : };
909 : // 8x8
910 : static const uint16_t MaskingMatrix8x8_Level3_1080p[] = {
911 : M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60,
912 : M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60,
913 : M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50,
914 : M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50,
915 : M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50,
916 : M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50,
917 : M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50,
918 : M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50,
919 : };
920 : // 16x16
921 : static const uint16_t MaskingMatrix16x16_Level3_1080p[] = {
922 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
923 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
924 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
925 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
926 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
927 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
928 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
929 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
930 : M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
931 : M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
932 : M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
933 : M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
934 : M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
935 : M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
936 : M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
937 : M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
938 : };
939 : // 32x32
940 : static const uint16_t MaskingMatrix32x32_Level3_1080p[] = {
941 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
942 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
943 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
944 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
945 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
946 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
947 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
948 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60,
949 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
950 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
951 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
952 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
953 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
954 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
955 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
956 : M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_100, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
957 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
958 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
959 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
960 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
961 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
962 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
963 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
964 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
965 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
966 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
967 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
968 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
969 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
970 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
971 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
972 : M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50, M_50,
973 : };
974 :
975 : // Set 1
976 : // 4x4
977 : static const uint16_t MaskingMatrix4x4_Level4_4K_Set1[] = {
978 : M_100, M_60, M_0, M_0,
979 : M_60, M_60, M_0, M_0,
980 : M_0, M_0, M_0, M_0,
981 : M_0, M_0, M_0, M_0,
982 : };
983 :
984 : // Set 1
985 : // 4x4
986 : static const uint16_t MaskingMatrix4x4_Level5_4K_Set1[] = {
987 : M_100, M_50, M_0, M_0,
988 : M_50, M_50, M_0, M_0,
989 : M_0, M_0, M_0, M_0,
990 : M_0, M_0, M_0, M_0,
991 : };
992 : // 8x8
993 :
994 : // Set 1
995 : // 4x4
996 : static const uint16_t MaskingMatrix4x4_Level6_4K_Set1[] = {
997 : M_100, M_25, M_0, M_0,
998 : M_25, M_25, M_0, M_0,
999 : M_0, M_0, M_0, M_0,
1000 : M_0, M_0, M_0, M_0,
1001 : };
1002 : // 8x8
1003 : static const uint16_t MaskingMatrix8x8_Level6_4K_Set1[] = {
1004 : M_100, M_25, M_25, M_25, M_0, M_0, M_0, M_0,
1005 : M_25, M_25, M_25, M_25, M_0, M_0, M_0, M_0,
1006 : M_25, M_25, M_25, M_25, M_0, M_0, M_0, M_0,
1007 : M_25, M_25, M_25, M_25, M_0, M_0, M_0, M_0,
1008 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1009 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1010 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1011 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1012 : };
1013 :
1014 : // Set 2
1015 : // 16x16
1016 : static const uint16_t MaskingMatrix16x16_Level6_4K_Set2[] = {
1017 : M_100, M_80, M_80, M_80, M_40, M_40, M_40, M_40, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1018 : M_80, M_80, M_80, M_80, M_40, M_40, M_40, M_40, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1019 : M_80, M_80, M_80, M_80, M_25, M_25, M_25, M_25, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1020 : M_80, M_80, M_80, M_80, M_25, M_25, M_25, M_25, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1021 : M_40, M_40, M_25, M_25, M_25, M_25, M_25, M_25, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1022 : M_40, M_40, M_25, M_25, M_25, M_25, M_25, M_25, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1023 : M_40, M_40, M_25, M_25, M_25, M_25, M_25, M_25, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1024 : M_40, M_40, M_25, M_25, M_25, M_25, M_25, M_25, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1025 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1026 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1027 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1028 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1029 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1030 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1031 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1032 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1033 : };
1034 : // 32x32
1035 : static const uint16_t MaskingMatrix32x32_Level6_4K_Set2[] = {
1036 : M_100, M_90, M_90, M_90, M_60, M_60, M_60, M_60, M_20, M_20, M_20, M_20, M_20, M_20, M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1037 : M_90, M_90, M_90, M_90, M_60, M_60, M_60, M_60, M_20, M_20, M_20, M_20, M_20, M_20, M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1038 : M_90, M_90, M_90, M_90, M_50, M_50, M_50, M_50, M_20, M_20, M_20, M_20, M_20, M_20, M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1039 : M_90, M_90, M_90, M_90, M_50, M_50, M_50, M_50, M_20, M_20, M_20, M_20, M_20, M_20, M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1040 : M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1041 : M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1042 : M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1043 : M_60, M_60, M_50, M_50, M_50, M_50, M_50, M_50, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1044 : M_20, M_20, M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1045 : M_20, M_20, M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1046 : M_20, M_20, M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1047 : M_20, M_20, M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1048 : M_20, M_20, M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1049 : M_20, M_20, M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1050 : M_20, M_20, M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1051 : M_20, M_20, M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1052 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1053 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1054 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1055 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1056 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1057 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1058 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1059 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1060 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1061 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1062 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1063 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1064 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1065 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1066 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1067 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1068 : };
1069 :
1070 : // Set 1
1071 : // 4x4
1072 : static const uint16_t MaskingMatrix4x4_Level7_4K_Set1[] = {
1073 : M_100, M_0, M_0, M_0,
1074 : M_0, M_0, M_0, M_0,
1075 : M_0, M_0, M_0, M_0,
1076 : M_0, M_0, M_0, M_0,
1077 : };
1078 :
1079 : // Set 2
1080 : // 16x16
1081 : // 32x32
1082 : static const uint16_t MaskingMatrix32x32_Level7_4K_Set2[] = {
1083 : M_100, M_100, M_100, M_100, M_20, M_20, M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1084 : M_100, M_100, M_100, M_100, M_20, M_20, M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1085 : M_100, M_100, M_100, M_100, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1086 : M_100, M_100, M_100, M_100, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1087 : M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1088 : M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1089 : M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1090 : M_20, M_20, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1091 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1092 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1093 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1094 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1095 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1096 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1097 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1098 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1099 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1100 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1101 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1102 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1103 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1104 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1105 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1106 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1107 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1108 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1109 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1110 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1111 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1112 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1113 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1114 : M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0, M_0,
1115 : };
1116 :
1117 : static const uint16_t *masking_matrix[2][8][4] =//
1118 : {
1119 : /****************** 4K ************************/
1120 : {
1121 : { MaskingMatrix4x4_Level0_4K, MaskingMatrix8x8_Level0_4K, MaskingMatrix16x16_Level0_4K, MaskingMatrix32x32_Level0_4K }, // Level 0 OFF
1122 : { MaskingMatrix4x4_Level1_4K, MaskingMatrix8x8_Level1_4K, MaskingMatrix16x16_Level1_4K, MaskingMatrix32x32_Level1_4K }, // Level 1 I_SLICE
1123 : /************************* L23_SETTING *************************/
1124 : { MaskingMatrix4x4_Level2_4K, MaskingMatrix8x8_Level2_4K, MaskingMatrix16x16_Level2_4K, MaskingMatrix32x32_Level2_4K }, // Level 2 Base Intra
1125 : { MaskingMatrix4x4_Level3_4K, MaskingMatrix8x8_Level1_MOD1_4K, MaskingMatrix16x16_Level1_4K, MaskingMatrix32x32_Level1_4K }, // Level 3 Base Inter
1126 : /************************* L45_SETTING *************************/
1127 : { MaskingMatrix4x4_Level4_4K_Set1, MaskingMatrix8x8_Level2_4K, MaskingMatrix16x16_Level2_4K, MaskingMatrix32x32_Level2_4K }, // Level 4 Ref Intra
1128 : { MaskingMatrix4x4_Level5_4K_Set1, MaskingMatrix8x8_Level1_MOD1_4K, MaskingMatrix16x16_Level1_4K, MaskingMatrix32x32_Level1_4K }, // Level 5 Ref Inter
1129 : /************************* L67_SETTING *************************/
1130 : { MaskingMatrix4x4_Level6_4K_Set1, MaskingMatrix8x8_Level6_4K_Set1, MaskingMatrix16x16_Level6_4K_Set2, MaskingMatrix32x32_Level6_4K_Set2 }, // Level 6 Non Ref Intra
1131 : { MaskingMatrix4x4_Level7_4K_Set1, MaskingMatrix8x8_Level1_MOD1_4K, MaskingMatrix16x16_Level1_4K, MaskingMatrix32x32_Level7_4K_Set2 } // Level 7 Non Ref Inter
1132 : },
1133 : /****************** 1080P ************************/
1134 : {
1135 : { MaskingMatrix4x4_Level0_1080p, MaskingMatrix8x8_Level0_1080p, MaskingMatrix16x16_Level0_1080p, MaskingMatrix32x32_Level0_1080p }, // Level 0 OFF
1136 : { MaskingMatrix4x4_Level1_1080p, MaskingMatrix8x8_Level1_1080p, MaskingMatrix16x16_Level1_1080p, MaskingMatrix32x32_Level1_1080p }, // Level 1 I_SLICE
1137 : /************************* L23_SETTING *************************/
1138 : { MaskingMatrix4x4_Level2_1080p, MaskingMatrix8x8_Level2_1080p, MaskingMatrix16x16_Level2_1080p, MaskingMatrix32x32_Level2_1080p }, // Level 2 Base Intra
1139 : { MaskingMatrix4x4_Level2_1080p, MaskingMatrix8x8_Level2_1080p, MaskingMatrix16x16_Level2_1080p, MaskingMatrix32x32_Level2_1080p }, // Level 3 Base Inter
1140 : /************************* L45_SETTING *************************/
1141 : { MaskingMatrix4x4_Level2_1080p, MaskingMatrix8x8_Level2_1080p, MaskingMatrix16x16_Level2_1080p, MaskingMatrix32x32_Level2_1080p }, // Level 4 Ref Intra
1142 : { MaskingMatrix4x4_Level3_1080p, MaskingMatrix8x8_Level3_1080p, MaskingMatrix16x16_Level3_1080p, MaskingMatrix32x32_Level3_1080p }, // Level 5 Ref Inter
1143 : /************************* L67_SETTING *************************/
1144 : { MaskingMatrix4x4_Level3_1080p, MaskingMatrix8x8_Level3_1080p, MaskingMatrix16x16_Level3_1080p, MaskingMatrix32x32_Level3_1080p }, // Level 6 Non Ref Intra
1145 : { MaskingMatrix4x4_Level3_1080p, MaskingMatrix8x8_Level3_1080p, MaskingMatrix16x16_Level3_1080p, MaskingMatrix32x32_Level3_1080p }, // Level 7 Non Ref Inter
1146 : },
1147 : };
1148 :
1149 0 : void mat_mult(
1150 : int16_t *coeff,
1151 : const uint32_t coeff_stride,
1152 : const uint16_t *masking_matrix,
1153 : const uint32_t masking_matrix_stride,
1154 : const uint32_t compute_size,
1155 : const int32_t offset,
1156 : const int32_t shift_num,
1157 : uint32_t *nonzerocoeff) {
1158 0 : uint32_t coeffLocation = 0;
1159 : uint32_t row_index, colIndex;
1160 : int32_t coeffTemp;
1161 :
1162 0 : *nonzerocoeff = 0;
1163 :
1164 0 : for (row_index = 0; row_index < compute_size; ++row_index) {
1165 0 : for (colIndex = 0; colIndex < compute_size; ++colIndex) {
1166 0 : coeffTemp = (ABS(coeff[coeffLocation]) * masking_matrix[colIndex + row_index * masking_matrix_stride] + offset) >> shift_num;
1167 0 : coeffTemp = (coeff[coeffLocation] < 0) ? -coeffTemp : coeffTemp;
1168 :
1169 0 : coeff[coeffLocation] = (int16_t)CLIP3(MIN_NEG_16BIT_NUM, MAX_POS_16BIT_NUM, coeffTemp);
1170 0 : (*nonzerocoeff) += (coeffTemp != 0);
1171 0 : ++coeffLocation;
1172 : }
1173 0 : coeffLocation += coeff_stride - compute_size;
1174 : }
1175 0 : }
1176 :
1177 1916660 : void eb_av1_gen_fwd_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
1178 : const Txfm2DFlipCfg *cfg, int32_t bd) {
1179 : // Take the shift from the larger dimension in the rectangular case.
1180 1916660 : const int8_t *shift = cfg->shift;
1181 : // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
1182 17249700 : for (int32_t i = 0; i < cfg->stage_num_col && i < MAX_TXFM_STAGE_NUM; ++i)
1183 15333100 : stage_range_col[i] = (int8_t)(cfg->stage_range_col[i] + shift[0] + bd + 1);
1184 : // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
1185 21082900 : for (int32_t i = 0; i < cfg->stage_num_row && i < MAX_TXFM_STAGE_NUM; ++i)
1186 19166300 : stage_range_row[i] = (int8_t)(cfg->stage_range_row[i] + shift[0] + shift[1] + bd + 1);
1187 1916660 : }
1188 :
1189 : typedef void(*TxfmFunc)(const int32_t *input, int32_t *output, int8_t cos_bit,
1190 : const int8_t *stage_range);
1191 :
1192 : #define range_check(stage, input, buf, size, bit) \
1193 : { \
1194 : (void)stage; \
1195 : (void)input; \
1196 : (void)buf; \
1197 : (void)size; \
1198 : (void)bit; \
1199 : }
1200 :
1201 : // av1_cospi_arr[i][j] = (int32_t)round(cos(M_PI*j/128) * (1<<(cos_bit_min+i)));
1202 : const int32_t eb_av1_cospi_arr_data[7][64] = {
1203 : { 1024, 1024, 1023, 1021, 1019, 1016, 1013, 1009, 1004, 999, 993, 987, 980,
1204 : 972, 964, 955, 946, 936, 926, 915, 903, 891, 878, 865, 851, 837,
1205 : 822, 807, 792, 775, 759, 742, 724, 706, 688, 669, 650, 630, 610,
1206 : 590, 569, 548, 526, 505, 483, 460, 438, 415, 392, 369, 345, 321,
1207 : 297, 273, 249, 224, 200, 175, 150, 125, 100, 75, 50, 25 },
1208 : { 2048, 2047, 2046, 2042, 2038, 2033, 2026, 2018, 2009, 1998, 1987,
1209 : 1974, 1960, 1945, 1928, 1911, 1892, 1872, 1851, 1829, 1806, 1782,
1210 : 1757, 1730, 1703, 1674, 1645, 1615, 1583, 1551, 1517, 1483, 1448,
1211 : 1412, 1375, 1338, 1299, 1260, 1220, 1179, 1138, 1096, 1053, 1009,
1212 : 965, 921, 876, 830, 784, 737, 690, 642, 595, 546, 498,
1213 : 449, 400, 350, 301, 251, 201, 151, 100, 50 },
1214 : { 4096, 4095, 4091, 4085, 4076, 4065, 4052, 4036, 4017, 3996, 3973,
1215 : 3948, 3920, 3889, 3857, 3822, 3784, 3745, 3703, 3659, 3612, 3564,
1216 : 3513, 3461, 3406, 3349, 3290, 3229, 3166, 3102, 3035, 2967, 2896,
1217 : 2824, 2751, 2675, 2598, 2520, 2440, 2359, 2276, 2191, 2106, 2019,
1218 : 1931, 1842, 1751, 1660, 1567, 1474, 1380, 1285, 1189, 1092, 995,
1219 : 897, 799, 700, 601, 501, 401, 301, 201, 101 },
1220 : { 8192, 8190, 8182, 8170, 8153, 8130, 8103, 8071, 8035, 7993, 7946,
1221 : 7895, 7839, 7779, 7713, 7643, 7568, 7489, 7405, 7317, 7225, 7128,
1222 : 7027, 6921, 6811, 6698, 6580, 6458, 6333, 6203, 6070, 5933, 5793,
1223 : 5649, 5501, 5351, 5197, 5040, 4880, 4717, 4551, 4383, 4212, 4038,
1224 : 3862, 3683, 3503, 3320, 3135, 2948, 2760, 2570, 2378, 2185, 1990,
1225 : 1795, 1598, 1401, 1202, 1003, 803, 603, 402, 201 },
1226 : { 16384, 16379, 16364, 16340, 16305, 16261, 16207, 16143, 16069, 15986, 15893,
1227 : 15791, 15679, 15557, 15426, 15286, 15137, 14978, 14811, 14635, 14449, 14256,
1228 : 14053, 13842, 13623, 13395, 13160, 12916, 12665, 12406, 12140, 11866, 11585,
1229 : 11297, 11003, 10702, 10394, 10080, 9760, 9434, 9102, 8765, 8423, 8076,
1230 : 7723, 7366, 7005, 6639, 6270, 5897, 5520, 5139, 4756, 4370, 3981,
1231 : 3590, 3196, 2801, 2404, 2006, 1606, 1205, 804, 402 },
1232 : { 32768, 32758, 32729, 32679, 32610, 32522, 32413, 32286, 32138, 31972, 31786,
1233 : 31581, 31357, 31114, 30853, 30572, 30274, 29957, 29622, 29269, 28899, 28511,
1234 : 28106, 27684, 27246, 26791, 26320, 25833, 25330, 24812, 24279, 23732, 23170,
1235 : 22595, 22006, 21403, 20788, 20160, 19520, 18868, 18205, 17531, 16846, 16151,
1236 : 15447, 14733, 14010, 13279, 12540, 11793, 11039, 10279, 9512, 8740, 7962,
1237 : 7180, 6393, 5602, 4808, 4011, 3212, 2411, 1608, 804 },
1238 : { 65536, 65516, 65457, 65358, 65220, 65043, 64827, 64571, 64277, 63944, 63572,
1239 : 63162, 62714, 62228, 61705, 61145, 60547, 59914, 59244, 58538, 57798, 57022,
1240 : 56212, 55368, 54491, 53581, 52639, 51665, 50660, 49624, 48559, 47464, 46341,
1241 : 45190, 44011, 42806, 41576, 40320, 39040, 37736, 36410, 35062, 33692, 32303,
1242 : 30893, 29466, 28020, 26558, 25080, 23586, 22078, 20557, 19024, 17479, 15924,
1243 : 14359, 12785, 11204, 9616, 8022, 6424, 4821, 3216, 1608 }
1244 : };
1245 5360390000 : static INLINE int32_t round_shift(int64_t value, int32_t bit) {
1246 5360390000 : assert(bit >= 1);
1247 5360390000 : return (int32_t)((value + (1ll << (bit - 1))) >> bit);
1248 : }
1249 3506400000 : static INLINE int32_t half_btf(int32_t w0, int32_t in0, int32_t w1, int32_t in1,
1250 : int32_t bit) {
1251 3506400000 : int64_t result_64 = (int64_t)(w0 * in0) + (int64_t)(w1 * in1);
1252 : #if CONFIG_COEFFICIENT_RANGE_CHECKING
1253 : assert(result_64 >= INT32_MIN && result_64 <= INT32_MAX);
1254 : #endif
1255 3506400000 : return round_shift(result_64, bit);
1256 : }
1257 :
1258 : // eb_av1_sinpi_arr_data[i][j] = (int32_t)round((sqrt(2) * sin(j*Pi/9) * 2 / 3) * (1
1259 : // << (cos_bit_min + i))) modified so that elements j=1,2 sum to element j=4.
1260 : const int32_t eb_av1_sinpi_arr_data[7][5] = {
1261 : { 0, 330, 621, 836, 951 }, { 0, 660, 1241, 1672, 1901 },
1262 : { 0, 1321, 2482, 3344, 3803 }, { 0, 2642, 4964, 6689, 7606 },
1263 : { 0, 5283, 9929, 13377, 15212 }, { 0, 10566, 19858, 26755, 30424 },
1264 : { 0, 21133, 39716, 53510, 60849 }
1265 : };
1266 :
1267 0 : void eb_av1_fdct4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
1268 : const int8_t *stage_range) {
1269 0 : const int32_t size = 4;
1270 : const int32_t *cospi;
1271 :
1272 0 : int32_t stage = 0;
1273 : int32_t *bf0, *bf1;
1274 : int32_t step[4];
1275 :
1276 : // stage 0;
1277 0 : range_check(stage, input, input, size, stage_range[stage]);
1278 :
1279 : // stage 1;
1280 0 : stage++;
1281 0 : bf1 = output;
1282 0 : bf1[0] = input[0] + input[3];
1283 0 : bf1[1] = input[1] + input[2];
1284 0 : bf1[2] = -input[2] + input[1];
1285 0 : bf1[3] = -input[3] + input[0];
1286 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1287 :
1288 : // stage 2
1289 0 : stage++;
1290 0 : cospi = cospi_arr(cos_bit);
1291 0 : bf0 = output;
1292 0 : bf1 = step;
1293 0 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
1294 0 : bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit);
1295 0 : bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit);
1296 0 : bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit);
1297 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1298 :
1299 : // stage 3
1300 0 : stage++;
1301 0 : bf0 = step;
1302 0 : bf1 = output;
1303 0 : bf1[0] = bf0[0];
1304 0 : bf1[1] = bf0[2];
1305 0 : bf1[2] = bf0[1];
1306 0 : bf1[3] = bf0[3];
1307 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1308 0 : }
1309 :
1310 0 : void eb_av1_fdct8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
1311 : const int8_t *stage_range) {
1312 0 : const int32_t size = 8;
1313 : const int32_t *cospi;
1314 :
1315 0 : int32_t stage = 0;
1316 : int32_t *bf0, *bf1;
1317 : int32_t step[8];
1318 :
1319 : // stage 0;
1320 0 : range_check(stage, input, input, size, stage_range[stage]);
1321 :
1322 : // stage 1;
1323 0 : stage++;
1324 0 : bf1 = output;
1325 0 : bf1[0] = input[0] + input[7];
1326 0 : bf1[1] = input[1] + input[6];
1327 0 : bf1[2] = input[2] + input[5];
1328 0 : bf1[3] = input[3] + input[4];
1329 0 : bf1[4] = -input[4] + input[3];
1330 0 : bf1[5] = -input[5] + input[2];
1331 0 : bf1[6] = -input[6] + input[1];
1332 0 : bf1[7] = -input[7] + input[0];
1333 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1334 :
1335 : // stage 2
1336 0 : stage++;
1337 0 : cospi = cospi_arr(cos_bit);
1338 0 : bf0 = output;
1339 0 : bf1 = step;
1340 0 : bf1[0] = bf0[0] + bf0[3];
1341 0 : bf1[1] = bf0[1] + bf0[2];
1342 0 : bf1[2] = -bf0[2] + bf0[1];
1343 0 : bf1[3] = -bf0[3] + bf0[0];
1344 0 : bf1[4] = bf0[4];
1345 0 : bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
1346 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit);
1347 0 : bf1[7] = bf0[7];
1348 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1349 :
1350 : // stage 3
1351 0 : stage++;
1352 0 : cospi = cospi_arr(cos_bit);
1353 0 : bf0 = step;
1354 0 : bf1 = output;
1355 0 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
1356 0 : bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit);
1357 0 : bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit);
1358 0 : bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit);
1359 0 : bf1[4] = bf0[4] + bf0[5];
1360 0 : bf1[5] = -bf0[5] + bf0[4];
1361 0 : bf1[6] = -bf0[6] + bf0[7];
1362 0 : bf1[7] = bf0[7] + bf0[6];
1363 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1364 :
1365 : // stage 4
1366 0 : stage++;
1367 0 : cospi = cospi_arr(cos_bit);
1368 0 : bf0 = output;
1369 0 : bf1 = step;
1370 0 : bf1[0] = bf0[0];
1371 0 : bf1[1] = bf0[1];
1372 0 : bf1[2] = bf0[2];
1373 0 : bf1[3] = bf0[3];
1374 0 : bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit);
1375 0 : bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit);
1376 0 : bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit);
1377 0 : bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit);
1378 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1379 :
1380 : // stage 5
1381 0 : stage++;
1382 0 : bf0 = step;
1383 0 : bf1 = output;
1384 0 : bf1[0] = bf0[0];
1385 0 : bf1[1] = bf0[4];
1386 0 : bf1[2] = bf0[2];
1387 0 : bf1[3] = bf0[6];
1388 0 : bf1[4] = bf0[1];
1389 0 : bf1[5] = bf0[5];
1390 0 : bf1[6] = bf0[3];
1391 0 : bf1[7] = bf0[7];
1392 0 : range_check(stage, input, bf1, size, stage_range[stage]);
1393 0 : }
1394 :
1395 61226300 : void eb_av1_fdct16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
1396 : const int8_t *stage_range) {
1397 61226300 : const int32_t size = 16;
1398 : const int32_t *cospi;
1399 :
1400 61226300 : int32_t stage = 0;
1401 : int32_t *bf0, *bf1;
1402 : int32_t step[16];
1403 :
1404 : // stage 0;
1405 61226300 : range_check(stage, input, input, size, stage_range[stage]);
1406 :
1407 : // stage 1;
1408 61226300 : stage++;
1409 61226300 : bf1 = output;
1410 61226300 : bf1[0] = input[0] + input[15];
1411 61226300 : bf1[1] = input[1] + input[14];
1412 61226300 : bf1[2] = input[2] + input[13];
1413 61226300 : bf1[3] = input[3] + input[12];
1414 61226300 : bf1[4] = input[4] + input[11];
1415 61226300 : bf1[5] = input[5] + input[10];
1416 61226300 : bf1[6] = input[6] + input[9];
1417 61226300 : bf1[7] = input[7] + input[8];
1418 61226300 : bf1[8] = -input[8] + input[7];
1419 61226300 : bf1[9] = -input[9] + input[6];
1420 61226300 : bf1[10] = -input[10] + input[5];
1421 61226300 : bf1[11] = -input[11] + input[4];
1422 61226300 : bf1[12] = -input[12] + input[3];
1423 61226300 : bf1[13] = -input[13] + input[2];
1424 61226300 : bf1[14] = -input[14] + input[1];
1425 61226300 : bf1[15] = -input[15] + input[0];
1426 61226300 : range_check(stage, input, bf1, size, stage_range[stage]);
1427 :
1428 : // stage 2
1429 61226300 : stage++;
1430 61226300 : cospi = cospi_arr(cos_bit);
1431 61252600 : bf0 = output;
1432 61252600 : bf1 = step;
1433 61252600 : bf1[0] = bf0[0] + bf0[7];
1434 61252600 : bf1[1] = bf0[1] + bf0[6];
1435 61252600 : bf1[2] = bf0[2] + bf0[5];
1436 61252600 : bf1[3] = bf0[3] + bf0[4];
1437 61252600 : bf1[4] = -bf0[4] + bf0[3];
1438 61252600 : bf1[5] = -bf0[5] + bf0[2];
1439 61252600 : bf1[6] = -bf0[6] + bf0[1];
1440 61252600 : bf1[7] = -bf0[7] + bf0[0];
1441 61252600 : bf1[8] = bf0[8];
1442 61252600 : bf1[9] = bf0[9];
1443 61252600 : bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
1444 61244200 : bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
1445 61219000 : bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit);
1446 61218500 : bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit);
1447 61206300 : bf1[14] = bf0[14];
1448 61206300 : bf1[15] = bf0[15];
1449 61206300 : range_check(stage, input, bf1, size, stage_range[stage]);
1450 :
1451 : // stage 3
1452 61206300 : stage++;
1453 61206300 : cospi = cospi_arr(cos_bit);
1454 61207100 : bf0 = step;
1455 61207100 : bf1 = output;
1456 61207100 : bf1[0] = bf0[0] + bf0[3];
1457 61207100 : bf1[1] = bf0[1] + bf0[2];
1458 61207100 : bf1[2] = -bf0[2] + bf0[1];
1459 61207100 : bf1[3] = -bf0[3] + bf0[0];
1460 61207100 : bf1[4] = bf0[4];
1461 61207100 : bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
1462 61195400 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit);
1463 61188200 : bf1[7] = bf0[7];
1464 61188200 : bf1[8] = bf0[8] + bf0[11];
1465 61188200 : bf1[9] = bf0[9] + bf0[10];
1466 61188200 : bf1[10] = -bf0[10] + bf0[9];
1467 61188200 : bf1[11] = -bf0[11] + bf0[8];
1468 61188200 : bf1[12] = -bf0[12] + bf0[15];
1469 61188200 : bf1[13] = -bf0[13] + bf0[14];
1470 61188200 : bf1[14] = bf0[14] + bf0[13];
1471 61188200 : bf1[15] = bf0[15] + bf0[12];
1472 61188200 : range_check(stage, input, bf1, size, stage_range[stage]);
1473 :
1474 : // stage 4
1475 61188200 : stage++;
1476 61188200 : cospi = cospi_arr(cos_bit);
1477 61199900 : bf0 = output;
1478 61199900 : bf1 = step;
1479 61199900 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
1480 61197900 : bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit);
1481 61190000 : bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit);
1482 61183000 : bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit);
1483 61185100 : bf1[4] = bf0[4] + bf0[5];
1484 61185100 : bf1[5] = -bf0[5] + bf0[4];
1485 61185100 : bf1[6] = -bf0[6] + bf0[7];
1486 61185100 : bf1[7] = bf0[7] + bf0[6];
1487 61185100 : bf1[8] = bf0[8];
1488 61185100 : bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
1489 61194800 : bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
1490 61183800 : bf1[11] = bf0[11];
1491 61183800 : bf1[12] = bf0[12];
1492 61183800 : bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit);
1493 61191600 : bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit);
1494 61189300 : bf1[15] = bf0[15];
1495 61189300 : range_check(stage, input, bf1, size, stage_range[stage]);
1496 :
1497 : // stage 5
1498 61189300 : stage++;
1499 61189300 : cospi = cospi_arr(cos_bit);
1500 61193400 : bf0 = step;
1501 61193400 : bf1 = output;
1502 61193400 : bf1[0] = bf0[0];
1503 61193400 : bf1[1] = bf0[1];
1504 61193400 : bf1[2] = bf0[2];
1505 61193400 : bf1[3] = bf0[3];
1506 61193400 : bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit);
1507 61203900 : bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit);
1508 61202500 : bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit);
1509 61217100 : bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit);
1510 61209800 : bf1[8] = bf0[8] + bf0[9];
1511 61209800 : bf1[9] = -bf0[9] + bf0[8];
1512 61209800 : bf1[10] = -bf0[10] + bf0[11];
1513 61209800 : bf1[11] = bf0[11] + bf0[10];
1514 61209800 : bf1[12] = bf0[12] + bf0[13];
1515 61209800 : bf1[13] = -bf0[13] + bf0[12];
1516 61209800 : bf1[14] = -bf0[14] + bf0[15];
1517 61209800 : bf1[15] = bf0[15] + bf0[14];
1518 61209800 : range_check(stage, input, bf1, size, stage_range[stage]);
1519 :
1520 : // stage 6
1521 61209800 : stage++;
1522 61209800 : cospi = cospi_arr(cos_bit);
1523 61220600 : bf0 = output;
1524 61220600 : bf1 = step;
1525 61220600 : bf1[0] = bf0[0];
1526 61220600 : bf1[1] = bf0[1];
1527 61220600 : bf1[2] = bf0[2];
1528 61220600 : bf1[3] = bf0[3];
1529 61220600 : bf1[4] = bf0[4];
1530 61220600 : bf1[5] = bf0[5];
1531 61220600 : bf1[6] = bf0[6];
1532 61220600 : bf1[7] = bf0[7];
1533 61220600 : bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit);
1534 61215800 : bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit);
1535 61205400 : bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit);
1536 61208400 : bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit);
1537 61204400 : bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit);
1538 61204400 : bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit);
1539 61203800 : bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit);
1540 61207200 : bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit);
1541 61209900 : range_check(stage, input, bf1, size, stage_range[stage]);
1542 :
1543 : // stage 7
1544 61209900 : stage++;
1545 61209900 : bf0 = step;
1546 61209900 : bf1 = output;
1547 61209900 : bf1[0] = bf0[0];
1548 61209900 : bf1[1] = bf0[8];
1549 61209900 : bf1[2] = bf0[4];
1550 61209900 : bf1[3] = bf0[12];
1551 61209900 : bf1[4] = bf0[2];
1552 61209900 : bf1[5] = bf0[10];
1553 61209900 : bf1[6] = bf0[6];
1554 61209900 : bf1[7] = bf0[14];
1555 61209900 : bf1[8] = bf0[1];
1556 61209900 : bf1[9] = bf0[9];
1557 61209900 : bf1[10] = bf0[5];
1558 61209900 : bf1[11] = bf0[13];
1559 61209900 : bf1[12] = bf0[3];
1560 61209900 : bf1[13] = bf0[11];
1561 61209900 : bf1[14] = bf0[7];
1562 61209900 : bf1[15] = bf0[15];
1563 61209900 : range_check(stage, input, bf1, size, stage_range[stage]);
1564 61209900 : }
1565 :
1566 30621900 : void eb_av1_fdct32_new(const int32_t *input, int32_t *output, int8_t cos_bit,
1567 : const int8_t *stage_range) {
1568 30621900 : const int32_t size = 32;
1569 : const int32_t *cospi;
1570 :
1571 30621900 : int32_t stage = 0;
1572 : int32_t *bf0, *bf1;
1573 : int32_t step[32];
1574 :
1575 : // stage 0;
1576 30621900 : range_check(stage, input, input, size, stage_range[stage]);
1577 :
1578 : // stage 1;
1579 30621900 : stage++;
1580 30621900 : bf1 = output;
1581 30621900 : bf1[0] = input[0] + input[31];
1582 30621900 : bf1[1] = input[1] + input[30];
1583 30621900 : bf1[2] = input[2] + input[29];
1584 30621900 : bf1[3] = input[3] + input[28];
1585 30621900 : bf1[4] = input[4] + input[27];
1586 30621900 : bf1[5] = input[5] + input[26];
1587 30621900 : bf1[6] = input[6] + input[25];
1588 30621900 : bf1[7] = input[7] + input[24];
1589 30621900 : bf1[8] = input[8] + input[23];
1590 30621900 : bf1[9] = input[9] + input[22];
1591 30621900 : bf1[10] = input[10] + input[21];
1592 30621900 : bf1[11] = input[11] + input[20];
1593 30621900 : bf1[12] = input[12] + input[19];
1594 30621900 : bf1[13] = input[13] + input[18];
1595 30621900 : bf1[14] = input[14] + input[17];
1596 30621900 : bf1[15] = input[15] + input[16];
1597 30621900 : bf1[16] = -input[16] + input[15];
1598 30621900 : bf1[17] = -input[17] + input[14];
1599 30621900 : bf1[18] = -input[18] + input[13];
1600 30621900 : bf1[19] = -input[19] + input[12];
1601 30621900 : bf1[20] = -input[20] + input[11];
1602 30621900 : bf1[21] = -input[21] + input[10];
1603 30621900 : bf1[22] = -input[22] + input[9];
1604 30621900 : bf1[23] = -input[23] + input[8];
1605 30621900 : bf1[24] = -input[24] + input[7];
1606 30621900 : bf1[25] = -input[25] + input[6];
1607 30621900 : bf1[26] = -input[26] + input[5];
1608 30621900 : bf1[27] = -input[27] + input[4];
1609 30621900 : bf1[28] = -input[28] + input[3];
1610 30621900 : bf1[29] = -input[29] + input[2];
1611 30621900 : bf1[30] = -input[30] + input[1];
1612 30621900 : bf1[31] = -input[31] + input[0];
1613 30621900 : range_check(stage, input, bf1, size, stage_range[stage]);
1614 :
1615 : // stage 2
1616 30621900 : stage++;
1617 30621900 : cospi = cospi_arr(cos_bit);
1618 30646000 : bf0 = output;
1619 30646000 : bf1 = step;
1620 30646000 : bf1[0] = bf0[0] + bf0[15];
1621 30646000 : bf1[1] = bf0[1] + bf0[14];
1622 30646000 : bf1[2] = bf0[2] + bf0[13];
1623 30646000 : bf1[3] = bf0[3] + bf0[12];
1624 30646000 : bf1[4] = bf0[4] + bf0[11];
1625 30646000 : bf1[5] = bf0[5] + bf0[10];
1626 30646000 : bf1[6] = bf0[6] + bf0[9];
1627 30646000 : bf1[7] = bf0[7] + bf0[8];
1628 30646000 : bf1[8] = -bf0[8] + bf0[7];
1629 30646000 : bf1[9] = -bf0[9] + bf0[6];
1630 30646000 : bf1[10] = -bf0[10] + bf0[5];
1631 30646000 : bf1[11] = -bf0[11] + bf0[4];
1632 30646000 : bf1[12] = -bf0[12] + bf0[3];
1633 30646000 : bf1[13] = -bf0[13] + bf0[2];
1634 30646000 : bf1[14] = -bf0[14] + bf0[1];
1635 30646000 : bf1[15] = -bf0[15] + bf0[0];
1636 30646000 : bf1[16] = bf0[16];
1637 30646000 : bf1[17] = bf0[17];
1638 30646000 : bf1[18] = bf0[18];
1639 30646000 : bf1[19] = bf0[19];
1640 30646000 : bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
1641 30642100 : bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
1642 30631900 : bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
1643 30619600 : bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
1644 30612500 : bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit);
1645 30609800 : bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit);
1646 30608100 : bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit);
1647 30607600 : bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit);
1648 30607400 : bf1[28] = bf0[28];
1649 30607400 : bf1[29] = bf0[29];
1650 30607400 : bf1[30] = bf0[30];
1651 30607400 : bf1[31] = bf0[31];
1652 30607400 : range_check(stage, input, bf1, size, stage_range[stage]);
1653 :
1654 : // stage 3
1655 30607400 : stage++;
1656 30607400 : cospi = cospi_arr(cos_bit);
1657 30612200 : bf0 = step;
1658 30612200 : bf1 = output;
1659 30612200 : bf1[0] = bf0[0] + bf0[7];
1660 30612200 : bf1[1] = bf0[1] + bf0[6];
1661 30612200 : bf1[2] = bf0[2] + bf0[5];
1662 30612200 : bf1[3] = bf0[3] + bf0[4];
1663 30612200 : bf1[4] = -bf0[4] + bf0[3];
1664 30612200 : bf1[5] = -bf0[5] + bf0[2];
1665 30612200 : bf1[6] = -bf0[6] + bf0[1];
1666 30612200 : bf1[7] = -bf0[7] + bf0[0];
1667 30612200 : bf1[8] = bf0[8];
1668 30612200 : bf1[9] = bf0[9];
1669 30612200 : bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
1670 30615600 : bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
1671 30609200 : bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit);
1672 30611800 : bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit);
1673 30609400 : bf1[14] = bf0[14];
1674 30609400 : bf1[15] = bf0[15];
1675 30609400 : bf1[16] = bf0[16] + bf0[23];
1676 30609400 : bf1[17] = bf0[17] + bf0[22];
1677 30609400 : bf1[18] = bf0[18] + bf0[21];
1678 30609400 : bf1[19] = bf0[19] + bf0[20];
1679 30609400 : bf1[20] = -bf0[20] + bf0[19];
1680 30609400 : bf1[21] = -bf0[21] + bf0[18];
1681 30609400 : bf1[22] = -bf0[22] + bf0[17];
1682 30609400 : bf1[23] = -bf0[23] + bf0[16];
1683 30609400 : bf1[24] = -bf0[24] + bf0[31];
1684 30609400 : bf1[25] = -bf0[25] + bf0[30];
1685 30609400 : bf1[26] = -bf0[26] + bf0[29];
1686 30609400 : bf1[27] = -bf0[27] + bf0[28];
1687 30609400 : bf1[28] = bf0[28] + bf0[27];
1688 30609400 : bf1[29] = bf0[29] + bf0[26];
1689 30609400 : bf1[30] = bf0[30] + bf0[25];
1690 30609400 : bf1[31] = bf0[31] + bf0[24];
1691 30609400 : range_check(stage, input, bf1, size, stage_range[stage]);
1692 :
1693 : // stage 4
1694 30609400 : stage++;
1695 30609400 : cospi = cospi_arr(cos_bit);
1696 30632300 : bf0 = output;
1697 30632300 : bf1 = step;
1698 30632300 : bf1[0] = bf0[0] + bf0[3];
1699 30632300 : bf1[1] = bf0[1] + bf0[2];
1700 30632300 : bf1[2] = -bf0[2] + bf0[1];
1701 30632300 : bf1[3] = -bf0[3] + bf0[0];
1702 30632300 : bf1[4] = bf0[4];
1703 30632300 : bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
1704 30627300 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit);
1705 30622800 : bf1[7] = bf0[7];
1706 30622800 : bf1[8] = bf0[8] + bf0[11];
1707 30622800 : bf1[9] = bf0[9] + bf0[10];
1708 30622800 : bf1[10] = -bf0[10] + bf0[9];
1709 30622800 : bf1[11] = -bf0[11] + bf0[8];
1710 30622800 : bf1[12] = -bf0[12] + bf0[15];
1711 30622800 : bf1[13] = -bf0[13] + bf0[14];
1712 30622800 : bf1[14] = bf0[14] + bf0[13];
1713 30622800 : bf1[15] = bf0[15] + bf0[12];
1714 30622800 : bf1[16] = bf0[16];
1715 30622800 : bf1[17] = bf0[17];
1716 30622800 : bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit);
1717 30619200 : bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit);
1718 30610400 : bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit);
1719 30609000 : bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit);
1720 30602500 : bf1[22] = bf0[22];
1721 30602500 : bf1[23] = bf0[23];
1722 30602500 : bf1[24] = bf0[24];
1723 30602500 : bf1[25] = bf0[25];
1724 30602500 : bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit);
1725 30602000 : bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit);
1726 30599600 : bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit);
1727 30604400 : bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit);
1728 30604100 : bf1[30] = bf0[30];
1729 30604100 : bf1[31] = bf0[31];
1730 30604100 : range_check(stage, input, bf1, size, stage_range[stage]);
1731 :
1732 : // stage 5
1733 30604100 : stage++;
1734 30604100 : cospi = cospi_arr(cos_bit);
1735 30609600 : bf0 = step;
1736 30609600 : bf1 = output;
1737 30609600 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
1738 30604800 : bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit);
1739 30604000 : bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit);
1740 30603200 : bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit);
1741 30606000 : bf1[4] = bf0[4] + bf0[5];
1742 30606000 : bf1[5] = -bf0[5] + bf0[4];
1743 30606000 : bf1[6] = -bf0[6] + bf0[7];
1744 30606000 : bf1[7] = bf0[7] + bf0[6];
1745 30606000 : bf1[8] = bf0[8];
1746 30606000 : bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
1747 30611500 : bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
1748 30611400 : bf1[11] = bf0[11];
1749 30611400 : bf1[12] = bf0[12];
1750 30611400 : bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit);
1751 30612900 : bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit);
1752 30608100 : bf1[15] = bf0[15];
1753 30608100 : bf1[16] = bf0[16] + bf0[19];
1754 30608100 : bf1[17] = bf0[17] + bf0[18];
1755 30608100 : bf1[18] = -bf0[18] + bf0[17];
1756 30608100 : bf1[19] = -bf0[19] + bf0[16];
1757 30608100 : bf1[20] = -bf0[20] + bf0[23];
1758 30608100 : bf1[21] = -bf0[21] + bf0[22];
1759 30608100 : bf1[22] = bf0[22] + bf0[21];
1760 30608100 : bf1[23] = bf0[23] + bf0[20];
1761 30608100 : bf1[24] = bf0[24] + bf0[27];
1762 30608100 : bf1[25] = bf0[25] + bf0[26];
1763 30608100 : bf1[26] = -bf0[26] + bf0[25];
1764 30608100 : bf1[27] = -bf0[27] + bf0[24];
1765 30608100 : bf1[28] = -bf0[28] + bf0[31];
1766 30608100 : bf1[29] = -bf0[29] + bf0[30];
1767 30608100 : bf1[30] = bf0[30] + bf0[29];
1768 30608100 : bf1[31] = bf0[31] + bf0[28];
1769 30608100 : range_check(stage, input, bf1, size, stage_range[stage]);
1770 :
1771 : // stage 6
1772 30608100 : stage++;
1773 30608100 : cospi = cospi_arr(cos_bit);
1774 30626900 : bf0 = output;
1775 30626900 : bf1 = step;
1776 30626900 : bf1[0] = bf0[0];
1777 30626900 : bf1[1] = bf0[1];
1778 30626900 : bf1[2] = bf0[2];
1779 30626900 : bf1[3] = bf0[3];
1780 30626900 : bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit);
1781 30617100 : bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit);
1782 30612000 : bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit);
1783 30609900 : bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit);
1784 30607700 : bf1[8] = bf0[8] + bf0[9];
1785 30607700 : bf1[9] = -bf0[9] + bf0[8];
1786 30607700 : bf1[10] = -bf0[10] + bf0[11];
1787 30607700 : bf1[11] = bf0[11] + bf0[10];
1788 30607700 : bf1[12] = bf0[12] + bf0[13];
1789 30607700 : bf1[13] = -bf0[13] + bf0[12];
1790 30607700 : bf1[14] = -bf0[14] + bf0[15];
1791 30607700 : bf1[15] = bf0[15] + bf0[14];
1792 30607700 : bf1[16] = bf0[16];
1793 30607700 : bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit);
1794 30616600 : bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit);
1795 30608800 : bf1[19] = bf0[19];
1796 30608800 : bf1[20] = bf0[20];
1797 30608800 : bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit);
1798 30608200 : bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit);
1799 30606500 : bf1[23] = bf0[23];
1800 30606500 : bf1[24] = bf0[24];
1801 30606500 : bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit);
1802 30606500 : bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit);
1803 30605800 : bf1[27] = bf0[27];
1804 30605800 : bf1[28] = bf0[28];
1805 30605800 : bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit);
1806 30606400 : bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit);
1807 30605400 : bf1[31] = bf0[31];
1808 30605400 : range_check(stage, input, bf1, size, stage_range[stage]);
1809 :
1810 : // stage 7
1811 30605400 : stage++;
1812 30605400 : cospi = cospi_arr(cos_bit);
1813 30609200 : bf0 = step;
1814 30609200 : bf1 = output;
1815 30609200 : bf1[0] = bf0[0];
1816 30609200 : bf1[1] = bf0[1];
1817 30609200 : bf1[2] = bf0[2];
1818 30609200 : bf1[3] = bf0[3];
1819 30609200 : bf1[4] = bf0[4];
1820 30609200 : bf1[5] = bf0[5];
1821 30609200 : bf1[6] = bf0[6];
1822 30609200 : bf1[7] = bf0[7];
1823 30609200 : bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit);
1824 30612100 : bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit);
1825 30600700 : bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit);
1826 30604600 : bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit);
1827 30600800 : bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit);
1828 30600700 : bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit);
1829 30600600 : bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit);
1830 30600700 : bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit);
1831 30599900 : bf1[16] = bf0[16] + bf0[17];
1832 30599900 : bf1[17] = -bf0[17] + bf0[16];
1833 30599900 : bf1[18] = -bf0[18] + bf0[19];
1834 30599900 : bf1[19] = bf0[19] + bf0[18];
1835 30599900 : bf1[20] = bf0[20] + bf0[21];
1836 30599900 : bf1[21] = -bf0[21] + bf0[20];
1837 30599900 : bf1[22] = -bf0[22] + bf0[23];
1838 30599900 : bf1[23] = bf0[23] + bf0[22];
1839 30599900 : bf1[24] = bf0[24] + bf0[25];
1840 30599900 : bf1[25] = -bf0[25] + bf0[24];
1841 30599900 : bf1[26] = -bf0[26] + bf0[27];
1842 30599900 : bf1[27] = bf0[27] + bf0[26];
1843 30599900 : bf1[28] = bf0[28] + bf0[29];
1844 30599900 : bf1[29] = -bf0[29] + bf0[28];
1845 30599900 : bf1[30] = -bf0[30] + bf0[31];
1846 30599900 : bf1[31] = bf0[31] + bf0[30];
1847 30599900 : range_check(stage, input, bf1, size, stage_range[stage]);
1848 :
1849 : // stage 8
1850 30599900 : stage++;
1851 30599900 : cospi = cospi_arr(cos_bit);
1852 30625700 : bf0 = output;
1853 30625700 : bf1 = step;
1854 30625700 : bf1[0] = bf0[0];
1855 30625700 : bf1[1] = bf0[1];
1856 30625700 : bf1[2] = bf0[2];
1857 30625700 : bf1[3] = bf0[3];
1858 30625700 : bf1[4] = bf0[4];
1859 30625700 : bf1[5] = bf0[5];
1860 30625700 : bf1[6] = bf0[6];
1861 30625700 : bf1[7] = bf0[7];
1862 30625700 : bf1[8] = bf0[8];
1863 30625700 : bf1[9] = bf0[9];
1864 30625700 : bf1[10] = bf0[10];
1865 30625700 : bf1[11] = bf0[11];
1866 30625700 : bf1[12] = bf0[12];
1867 30625700 : bf1[13] = bf0[13];
1868 30625700 : bf1[14] = bf0[14];
1869 30625700 : bf1[15] = bf0[15];
1870 30625700 : bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit);
1871 30635000 : bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit);
1872 30620800 : bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit);
1873 30612800 : bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit);
1874 30607900 : bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit);
1875 30605800 : bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit);
1876 30604800 : bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit);
1877 30607100 : bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit);
1878 30604000 : bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit);
1879 30600600 : bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit);
1880 30596700 : bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit);
1881 30595700 : bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit);
1882 30595600 : bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit);
1883 30595600 : bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit);
1884 30595400 : bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit);
1885 30595500 : bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit);
1886 30595700 : range_check(stage, input, bf1, size, stage_range[stage]);
1887 :
1888 : // stage 9
1889 30595700 : stage++;
1890 30595700 : bf0 = step;
1891 30595700 : bf1 = output;
1892 30595700 : bf1[0] = bf0[0];
1893 30595700 : bf1[1] = bf0[16];
1894 30595700 : bf1[2] = bf0[8];
1895 30595700 : bf1[3] = bf0[24];
1896 30595700 : bf1[4] = bf0[4];
1897 30595700 : bf1[5] = bf0[20];
1898 30595700 : bf1[6] = bf0[12];
1899 30595700 : bf1[7] = bf0[28];
1900 30595700 : bf1[8] = bf0[2];
1901 30595700 : bf1[9] = bf0[18];
1902 30595700 : bf1[10] = bf0[10];
1903 30595700 : bf1[11] = bf0[26];
1904 30595700 : bf1[12] = bf0[6];
1905 30595700 : bf1[13] = bf0[22];
1906 30595700 : bf1[14] = bf0[14];
1907 30595700 : bf1[15] = bf0[30];
1908 30595700 : bf1[16] = bf0[1];
1909 30595700 : bf1[17] = bf0[17];
1910 30595700 : bf1[18] = bf0[9];
1911 30595700 : bf1[19] = bf0[25];
1912 30595700 : bf1[20] = bf0[5];
1913 30595700 : bf1[21] = bf0[21];
1914 30595700 : bf1[22] = bf0[13];
1915 30595700 : bf1[23] = bf0[29];
1916 30595700 : bf1[24] = bf0[3];
1917 30595700 : bf1[25] = bf0[19];
1918 30595700 : bf1[26] = bf0[11];
1919 30595700 : bf1[27] = bf0[27];
1920 30595700 : bf1[28] = bf0[7];
1921 30595700 : bf1[29] = bf0[23];
1922 30595700 : bf1[30] = bf0[15];
1923 30595700 : bf1[31] = bf0[31];
1924 30595700 : range_check(stage, input, bf1, size, stage_range[stage]);
1925 30595700 : }
1926 0 : void eb_av1_fdct64_new(const int32_t *input, int32_t *output, int8_t cos_bit,
1927 : const int8_t *stage_range) {
1928 0 : const int32_t size = 64;
1929 : const int32_t *cospi;
1930 :
1931 0 : int32_t stage = 0;
1932 : int32_t *bf0, *bf1;
1933 : int32_t step[64];
1934 :
1935 : // stage 0;
1936 0 : range_check(stage, input, input, size, stage_range[stage]);
1937 :
1938 : // stage 1;
1939 0 : stage++;
1940 0 : bf1 = output;
1941 0 : bf1[0] = input[0] + input[63];
1942 0 : bf1[1] = input[1] + input[62];
1943 0 : bf1[2] = input[2] + input[61];
1944 0 : bf1[3] = input[3] + input[60];
1945 0 : bf1[4] = input[4] + input[59];
1946 0 : bf1[5] = input[5] + input[58];
1947 0 : bf1[6] = input[6] + input[57];
1948 0 : bf1[7] = input[7] + input[56];
1949 0 : bf1[8] = input[8] + input[55];
1950 0 : bf1[9] = input[9] + input[54];
1951 0 : bf1[10] = input[10] + input[53];
1952 0 : bf1[11] = input[11] + input[52];
1953 0 : bf1[12] = input[12] + input[51];
1954 0 : bf1[13] = input[13] + input[50];
1955 0 : bf1[14] = input[14] + input[49];
1956 0 : bf1[15] = input[15] + input[48];
1957 0 : bf1[16] = input[16] + input[47];
1958 0 : bf1[17] = input[17] + input[46];
1959 0 : bf1[18] = input[18] + input[45];
1960 0 : bf1[19] = input[19] + input[44];
1961 0 : bf1[20] = input[20] + input[43];
1962 0 : bf1[21] = input[21] + input[42];
1963 0 : bf1[22] = input[22] + input[41];
1964 0 : bf1[23] = input[23] + input[40];
1965 0 : bf1[24] = input[24] + input[39];
1966 0 : bf1[25] = input[25] + input[38];
1967 0 : bf1[26] = input[26] + input[37];
1968 0 : bf1[27] = input[27] + input[36];
1969 0 : bf1[28] = input[28] + input[35];
1970 0 : bf1[29] = input[29] + input[34];
1971 0 : bf1[30] = input[30] + input[33];
1972 0 : bf1[31] = input[31] + input[32];
1973 0 : bf1[32] = -input[32] + input[31];
1974 0 : bf1[33] = -input[33] + input[30];
1975 0 : bf1[34] = -input[34] + input[29];
1976 0 : bf1[35] = -input[35] + input[28];
1977 0 : bf1[36] = -input[36] + input[27];
1978 0 : bf1[37] = -input[37] + input[26];
1979 0 : bf1[38] = -input[38] + input[25];
1980 0 : bf1[39] = -input[39] + input[24];
1981 0 : bf1[40] = -input[40] + input[23];
1982 0 : bf1[41] = -input[41] + input[22];
1983 0 : bf1[42] = -input[42] + input[21];
1984 0 : bf1[43] = -input[43] + input[20];
1985 0 : bf1[44] = -input[44] + input[19];
1986 0 : bf1[45] = -input[45] + input[18];
1987 0 : bf1[46] = -input[46] + input[17];
1988 0 : bf1[47] = -input[47] + input[16];
1989 0 : bf1[48] = -input[48] + input[15];
1990 0 : bf1[49] = -input[49] + input[14];
1991 0 : bf1[50] = -input[50] + input[13];
1992 0 : bf1[51] = -input[51] + input[12];
1993 0 : bf1[52] = -input[52] + input[11];
1994 0 : bf1[53] = -input[53] + input[10];
1995 0 : bf1[54] = -input[54] + input[9];
1996 0 : bf1[55] = -input[55] + input[8];
1997 0 : bf1[56] = -input[56] + input[7];
1998 0 : bf1[57] = -input[57] + input[6];
1999 0 : bf1[58] = -input[58] + input[5];
2000 0 : bf1[59] = -input[59] + input[4];
2001 0 : bf1[60] = -input[60] + input[3];
2002 0 : bf1[61] = -input[61] + input[2];
2003 0 : bf1[62] = -input[62] + input[1];
2004 0 : bf1[63] = -input[63] + input[0];
2005 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2006 :
2007 : // stage 2
2008 0 : stage++;
2009 0 : cospi = cospi_arr(cos_bit);
2010 0 : bf0 = output;
2011 0 : bf1 = step;
2012 0 : bf1[0] = bf0[0] + bf0[31];
2013 0 : bf1[1] = bf0[1] + bf0[30];
2014 0 : bf1[2] = bf0[2] + bf0[29];
2015 0 : bf1[3] = bf0[3] + bf0[28];
2016 0 : bf1[4] = bf0[4] + bf0[27];
2017 0 : bf1[5] = bf0[5] + bf0[26];
2018 0 : bf1[6] = bf0[6] + bf0[25];
2019 0 : bf1[7] = bf0[7] + bf0[24];
2020 0 : bf1[8] = bf0[8] + bf0[23];
2021 0 : bf1[9] = bf0[9] + bf0[22];
2022 0 : bf1[10] = bf0[10] + bf0[21];
2023 0 : bf1[11] = bf0[11] + bf0[20];
2024 0 : bf1[12] = bf0[12] + bf0[19];
2025 0 : bf1[13] = bf0[13] + bf0[18];
2026 0 : bf1[14] = bf0[14] + bf0[17];
2027 0 : bf1[15] = bf0[15] + bf0[16];
2028 0 : bf1[16] = -bf0[16] + bf0[15];
2029 0 : bf1[17] = -bf0[17] + bf0[14];
2030 0 : bf1[18] = -bf0[18] + bf0[13];
2031 0 : bf1[19] = -bf0[19] + bf0[12];
2032 0 : bf1[20] = -bf0[20] + bf0[11];
2033 0 : bf1[21] = -bf0[21] + bf0[10];
2034 0 : bf1[22] = -bf0[22] + bf0[9];
2035 0 : bf1[23] = -bf0[23] + bf0[8];
2036 0 : bf1[24] = -bf0[24] + bf0[7];
2037 0 : bf1[25] = -bf0[25] + bf0[6];
2038 0 : bf1[26] = -bf0[26] + bf0[5];
2039 0 : bf1[27] = -bf0[27] + bf0[4];
2040 0 : bf1[28] = -bf0[28] + bf0[3];
2041 0 : bf1[29] = -bf0[29] + bf0[2];
2042 0 : bf1[30] = -bf0[30] + bf0[1];
2043 0 : bf1[31] = -bf0[31] + bf0[0];
2044 0 : bf1[32] = bf0[32];
2045 0 : bf1[33] = bf0[33];
2046 0 : bf1[34] = bf0[34];
2047 0 : bf1[35] = bf0[35];
2048 0 : bf1[36] = bf0[36];
2049 0 : bf1[37] = bf0[37];
2050 0 : bf1[38] = bf0[38];
2051 0 : bf1[39] = bf0[39];
2052 0 : bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit);
2053 0 : bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit);
2054 0 : bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit);
2055 0 : bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit);
2056 0 : bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit);
2057 0 : bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit);
2058 0 : bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit);
2059 0 : bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit);
2060 0 : bf1[48] = half_btf(cospi[32], bf0[48], cospi[32], bf0[47], cos_bit);
2061 0 : bf1[49] = half_btf(cospi[32], bf0[49], cospi[32], bf0[46], cos_bit);
2062 0 : bf1[50] = half_btf(cospi[32], bf0[50], cospi[32], bf0[45], cos_bit);
2063 0 : bf1[51] = half_btf(cospi[32], bf0[51], cospi[32], bf0[44], cos_bit);
2064 0 : bf1[52] = half_btf(cospi[32], bf0[52], cospi[32], bf0[43], cos_bit);
2065 0 : bf1[53] = half_btf(cospi[32], bf0[53], cospi[32], bf0[42], cos_bit);
2066 0 : bf1[54] = half_btf(cospi[32], bf0[54], cospi[32], bf0[41], cos_bit);
2067 0 : bf1[55] = half_btf(cospi[32], bf0[55], cospi[32], bf0[40], cos_bit);
2068 0 : bf1[56] = bf0[56];
2069 0 : bf1[57] = bf0[57];
2070 0 : bf1[58] = bf0[58];
2071 0 : bf1[59] = bf0[59];
2072 0 : bf1[60] = bf0[60];
2073 0 : bf1[61] = bf0[61];
2074 0 : bf1[62] = bf0[62];
2075 0 : bf1[63] = bf0[63];
2076 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2077 :
2078 : // stage 3
2079 0 : stage++;
2080 0 : cospi = cospi_arr(cos_bit);
2081 0 : bf0 = step;
2082 0 : bf1 = output;
2083 0 : bf1[0] = bf0[0] + bf0[15];
2084 0 : bf1[1] = bf0[1] + bf0[14];
2085 0 : bf1[2] = bf0[2] + bf0[13];
2086 0 : bf1[3] = bf0[3] + bf0[12];
2087 0 : bf1[4] = bf0[4] + bf0[11];
2088 0 : bf1[5] = bf0[5] + bf0[10];
2089 0 : bf1[6] = bf0[6] + bf0[9];
2090 0 : bf1[7] = bf0[7] + bf0[8];
2091 0 : bf1[8] = -bf0[8] + bf0[7];
2092 0 : bf1[9] = -bf0[9] + bf0[6];
2093 0 : bf1[10] = -bf0[10] + bf0[5];
2094 0 : bf1[11] = -bf0[11] + bf0[4];
2095 0 : bf1[12] = -bf0[12] + bf0[3];
2096 0 : bf1[13] = -bf0[13] + bf0[2];
2097 0 : bf1[14] = -bf0[14] + bf0[1];
2098 0 : bf1[15] = -bf0[15] + bf0[0];
2099 0 : bf1[16] = bf0[16];
2100 0 : bf1[17] = bf0[17];
2101 0 : bf1[18] = bf0[18];
2102 0 : bf1[19] = bf0[19];
2103 0 : bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
2104 0 : bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
2105 0 : bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
2106 0 : bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
2107 0 : bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit);
2108 0 : bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit);
2109 0 : bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit);
2110 0 : bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit);
2111 0 : bf1[28] = bf0[28];
2112 0 : bf1[29] = bf0[29];
2113 0 : bf1[30] = bf0[30];
2114 0 : bf1[31] = bf0[31];
2115 0 : bf1[32] = bf0[32] + bf0[47];
2116 0 : bf1[33] = bf0[33] + bf0[46];
2117 0 : bf1[34] = bf0[34] + bf0[45];
2118 0 : bf1[35] = bf0[35] + bf0[44];
2119 0 : bf1[36] = bf0[36] + bf0[43];
2120 0 : bf1[37] = bf0[37] + bf0[42];
2121 0 : bf1[38] = bf0[38] + bf0[41];
2122 0 : bf1[39] = bf0[39] + bf0[40];
2123 0 : bf1[40] = -bf0[40] + bf0[39];
2124 0 : bf1[41] = -bf0[41] + bf0[38];
2125 0 : bf1[42] = -bf0[42] + bf0[37];
2126 0 : bf1[43] = -bf0[43] + bf0[36];
2127 0 : bf1[44] = -bf0[44] + bf0[35];
2128 0 : bf1[45] = -bf0[45] + bf0[34];
2129 0 : bf1[46] = -bf0[46] + bf0[33];
2130 0 : bf1[47] = -bf0[47] + bf0[32];
2131 0 : bf1[48] = -bf0[48] + bf0[63];
2132 0 : bf1[49] = -bf0[49] + bf0[62];
2133 0 : bf1[50] = -bf0[50] + bf0[61];
2134 0 : bf1[51] = -bf0[51] + bf0[60];
2135 0 : bf1[52] = -bf0[52] + bf0[59];
2136 0 : bf1[53] = -bf0[53] + bf0[58];
2137 0 : bf1[54] = -bf0[54] + bf0[57];
2138 0 : bf1[55] = -bf0[55] + bf0[56];
2139 0 : bf1[56] = bf0[56] + bf0[55];
2140 0 : bf1[57] = bf0[57] + bf0[54];
2141 0 : bf1[58] = bf0[58] + bf0[53];
2142 0 : bf1[59] = bf0[59] + bf0[52];
2143 0 : bf1[60] = bf0[60] + bf0[51];
2144 0 : bf1[61] = bf0[61] + bf0[50];
2145 0 : bf1[62] = bf0[62] + bf0[49];
2146 0 : bf1[63] = bf0[63] + bf0[48];
2147 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2148 :
2149 : // stage 4
2150 0 : stage++;
2151 0 : cospi = cospi_arr(cos_bit);
2152 0 : bf0 = output;
2153 0 : bf1 = step;
2154 0 : bf1[0] = bf0[0] + bf0[7];
2155 0 : bf1[1] = bf0[1] + bf0[6];
2156 0 : bf1[2] = bf0[2] + bf0[5];
2157 0 : bf1[3] = bf0[3] + bf0[4];
2158 0 : bf1[4] = -bf0[4] + bf0[3];
2159 0 : bf1[5] = -bf0[5] + bf0[2];
2160 0 : bf1[6] = -bf0[6] + bf0[1];
2161 0 : bf1[7] = -bf0[7] + bf0[0];
2162 0 : bf1[8] = bf0[8];
2163 0 : bf1[9] = bf0[9];
2164 0 : bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
2165 0 : bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
2166 0 : bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit);
2167 0 : bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit);
2168 0 : bf1[14] = bf0[14];
2169 0 : bf1[15] = bf0[15];
2170 0 : bf1[16] = bf0[16] + bf0[23];
2171 0 : bf1[17] = bf0[17] + bf0[22];
2172 0 : bf1[18] = bf0[18] + bf0[21];
2173 0 : bf1[19] = bf0[19] + bf0[20];
2174 0 : bf1[20] = -bf0[20] + bf0[19];
2175 0 : bf1[21] = -bf0[21] + bf0[18];
2176 0 : bf1[22] = -bf0[22] + bf0[17];
2177 0 : bf1[23] = -bf0[23] + bf0[16];
2178 0 : bf1[24] = -bf0[24] + bf0[31];
2179 0 : bf1[25] = -bf0[25] + bf0[30];
2180 0 : bf1[26] = -bf0[26] + bf0[29];
2181 0 : bf1[27] = -bf0[27] + bf0[28];
2182 0 : bf1[28] = bf0[28] + bf0[27];
2183 0 : bf1[29] = bf0[29] + bf0[26];
2184 0 : bf1[30] = bf0[30] + bf0[25];
2185 0 : bf1[31] = bf0[31] + bf0[24];
2186 0 : bf1[32] = bf0[32];
2187 0 : bf1[33] = bf0[33];
2188 0 : bf1[34] = bf0[34];
2189 0 : bf1[35] = bf0[35];
2190 0 : bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit);
2191 0 : bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit);
2192 0 : bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit);
2193 0 : bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit);
2194 0 : bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit);
2195 0 : bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit);
2196 0 : bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit);
2197 0 : bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit);
2198 0 : bf1[44] = bf0[44];
2199 0 : bf1[45] = bf0[45];
2200 0 : bf1[46] = bf0[46];
2201 0 : bf1[47] = bf0[47];
2202 0 : bf1[48] = bf0[48];
2203 0 : bf1[49] = bf0[49];
2204 0 : bf1[50] = bf0[50];
2205 0 : bf1[51] = bf0[51];
2206 0 : bf1[52] = half_btf(cospi[48], bf0[52], -cospi[16], bf0[43], cos_bit);
2207 0 : bf1[53] = half_btf(cospi[48], bf0[53], -cospi[16], bf0[42], cos_bit);
2208 0 : bf1[54] = half_btf(cospi[48], bf0[54], -cospi[16], bf0[41], cos_bit);
2209 0 : bf1[55] = half_btf(cospi[48], bf0[55], -cospi[16], bf0[40], cos_bit);
2210 0 : bf1[56] = half_btf(cospi[16], bf0[56], cospi[48], bf0[39], cos_bit);
2211 0 : bf1[57] = half_btf(cospi[16], bf0[57], cospi[48], bf0[38], cos_bit);
2212 0 : bf1[58] = half_btf(cospi[16], bf0[58], cospi[48], bf0[37], cos_bit);
2213 0 : bf1[59] = half_btf(cospi[16], bf0[59], cospi[48], bf0[36], cos_bit);
2214 0 : bf1[60] = bf0[60];
2215 0 : bf1[61] = bf0[61];
2216 0 : bf1[62] = bf0[62];
2217 0 : bf1[63] = bf0[63];
2218 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2219 :
2220 : // stage 5
2221 0 : stage++;
2222 0 : cospi = cospi_arr(cos_bit);
2223 0 : bf0 = step;
2224 0 : bf1 = output;
2225 0 : bf1[0] = bf0[0] + bf0[3];
2226 0 : bf1[1] = bf0[1] + bf0[2];
2227 0 : bf1[2] = -bf0[2] + bf0[1];
2228 0 : bf1[3] = -bf0[3] + bf0[0];
2229 0 : bf1[4] = bf0[4];
2230 0 : bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
2231 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit);
2232 0 : bf1[7] = bf0[7];
2233 0 : bf1[8] = bf0[8] + bf0[11];
2234 0 : bf1[9] = bf0[9] + bf0[10];
2235 0 : bf1[10] = -bf0[10] + bf0[9];
2236 0 : bf1[11] = -bf0[11] + bf0[8];
2237 0 : bf1[12] = -bf0[12] + bf0[15];
2238 0 : bf1[13] = -bf0[13] + bf0[14];
2239 0 : bf1[14] = bf0[14] + bf0[13];
2240 0 : bf1[15] = bf0[15] + bf0[12];
2241 0 : bf1[16] = bf0[16];
2242 0 : bf1[17] = bf0[17];
2243 0 : bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit);
2244 0 : bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit);
2245 0 : bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit);
2246 0 : bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit);
2247 0 : bf1[22] = bf0[22];
2248 0 : bf1[23] = bf0[23];
2249 0 : bf1[24] = bf0[24];
2250 0 : bf1[25] = bf0[25];
2251 0 : bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit);
2252 0 : bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit);
2253 0 : bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit);
2254 0 : bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit);
2255 0 : bf1[30] = bf0[30];
2256 0 : bf1[31] = bf0[31];
2257 0 : bf1[32] = bf0[32] + bf0[39];
2258 0 : bf1[33] = bf0[33] + bf0[38];
2259 0 : bf1[34] = bf0[34] + bf0[37];
2260 0 : bf1[35] = bf0[35] + bf0[36];
2261 0 : bf1[36] = -bf0[36] + bf0[35];
2262 0 : bf1[37] = -bf0[37] + bf0[34];
2263 0 : bf1[38] = -bf0[38] + bf0[33];
2264 0 : bf1[39] = -bf0[39] + bf0[32];
2265 0 : bf1[40] = -bf0[40] + bf0[47];
2266 0 : bf1[41] = -bf0[41] + bf0[46];
2267 0 : bf1[42] = -bf0[42] + bf0[45];
2268 0 : bf1[43] = -bf0[43] + bf0[44];
2269 0 : bf1[44] = bf0[44] + bf0[43];
2270 0 : bf1[45] = bf0[45] + bf0[42];
2271 0 : bf1[46] = bf0[46] + bf0[41];
2272 0 : bf1[47] = bf0[47] + bf0[40];
2273 0 : bf1[48] = bf0[48] + bf0[55];
2274 0 : bf1[49] = bf0[49] + bf0[54];
2275 0 : bf1[50] = bf0[50] + bf0[53];
2276 0 : bf1[51] = bf0[51] + bf0[52];
2277 0 : bf1[52] = -bf0[52] + bf0[51];
2278 0 : bf1[53] = -bf0[53] + bf0[50];
2279 0 : bf1[54] = -bf0[54] + bf0[49];
2280 0 : bf1[55] = -bf0[55] + bf0[48];
2281 0 : bf1[56] = -bf0[56] + bf0[63];
2282 0 : bf1[57] = -bf0[57] + bf0[62];
2283 0 : bf1[58] = -bf0[58] + bf0[61];
2284 0 : bf1[59] = -bf0[59] + bf0[60];
2285 0 : bf1[60] = bf0[60] + bf0[59];
2286 0 : bf1[61] = bf0[61] + bf0[58];
2287 0 : bf1[62] = bf0[62] + bf0[57];
2288 0 : bf1[63] = bf0[63] + bf0[56];
2289 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2290 :
2291 : // stage 6
2292 0 : stage++;
2293 0 : cospi = cospi_arr(cos_bit);
2294 0 : bf0 = output;
2295 0 : bf1 = step;
2296 0 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
2297 0 : bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit);
2298 0 : bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit);
2299 0 : bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit);
2300 0 : bf1[4] = bf0[4] + bf0[5];
2301 0 : bf1[5] = -bf0[5] + bf0[4];
2302 0 : bf1[6] = -bf0[6] + bf0[7];
2303 0 : bf1[7] = bf0[7] + bf0[6];
2304 0 : bf1[8] = bf0[8];
2305 0 : bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
2306 0 : bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
2307 0 : bf1[11] = bf0[11];
2308 0 : bf1[12] = bf0[12];
2309 0 : bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit);
2310 0 : bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit);
2311 0 : bf1[15] = bf0[15];
2312 0 : bf1[16] = bf0[16] + bf0[19];
2313 0 : bf1[17] = bf0[17] + bf0[18];
2314 0 : bf1[18] = -bf0[18] + bf0[17];
2315 0 : bf1[19] = -bf0[19] + bf0[16];
2316 0 : bf1[20] = -bf0[20] + bf0[23];
2317 0 : bf1[21] = -bf0[21] + bf0[22];
2318 0 : bf1[22] = bf0[22] + bf0[21];
2319 0 : bf1[23] = bf0[23] + bf0[20];
2320 0 : bf1[24] = bf0[24] + bf0[27];
2321 0 : bf1[25] = bf0[25] + bf0[26];
2322 0 : bf1[26] = -bf0[26] + bf0[25];
2323 0 : bf1[27] = -bf0[27] + bf0[24];
2324 0 : bf1[28] = -bf0[28] + bf0[31];
2325 0 : bf1[29] = -bf0[29] + bf0[30];
2326 0 : bf1[30] = bf0[30] + bf0[29];
2327 0 : bf1[31] = bf0[31] + bf0[28];
2328 0 : bf1[32] = bf0[32];
2329 0 : bf1[33] = bf0[33];
2330 0 : bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit);
2331 0 : bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit);
2332 0 : bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit);
2333 0 : bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit);
2334 0 : bf1[38] = bf0[38];
2335 0 : bf1[39] = bf0[39];
2336 0 : bf1[40] = bf0[40];
2337 0 : bf1[41] = bf0[41];
2338 0 : bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit);
2339 0 : bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit);
2340 0 : bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit);
2341 0 : bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit);
2342 0 : bf1[46] = bf0[46];
2343 0 : bf1[47] = bf0[47];
2344 0 : bf1[48] = bf0[48];
2345 0 : bf1[49] = bf0[49];
2346 0 : bf1[50] = half_btf(cospi[24], bf0[50], -cospi[40], bf0[45], cos_bit);
2347 0 : bf1[51] = half_btf(cospi[24], bf0[51], -cospi[40], bf0[44], cos_bit);
2348 0 : bf1[52] = half_btf(cospi[40], bf0[52], cospi[24], bf0[43], cos_bit);
2349 0 : bf1[53] = half_btf(cospi[40], bf0[53], cospi[24], bf0[42], cos_bit);
2350 0 : bf1[54] = bf0[54];
2351 0 : bf1[55] = bf0[55];
2352 0 : bf1[56] = bf0[56];
2353 0 : bf1[57] = bf0[57];
2354 0 : bf1[58] = half_btf(cospi[56], bf0[58], -cospi[8], bf0[37], cos_bit);
2355 0 : bf1[59] = half_btf(cospi[56], bf0[59], -cospi[8], bf0[36], cos_bit);
2356 0 : bf1[60] = half_btf(cospi[8], bf0[60], cospi[56], bf0[35], cos_bit);
2357 0 : bf1[61] = half_btf(cospi[8], bf0[61], cospi[56], bf0[34], cos_bit);
2358 0 : bf1[62] = bf0[62];
2359 0 : bf1[63] = bf0[63];
2360 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2361 :
2362 : // stage 7
2363 0 : stage++;
2364 0 : cospi = cospi_arr(cos_bit);
2365 0 : bf0 = step;
2366 0 : bf1 = output;
2367 0 : bf1[0] = bf0[0];
2368 0 : bf1[1] = bf0[1];
2369 0 : bf1[2] = bf0[2];
2370 0 : bf1[3] = bf0[3];
2371 0 : bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit);
2372 0 : bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit);
2373 0 : bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit);
2374 0 : bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit);
2375 0 : bf1[8] = bf0[8] + bf0[9];
2376 0 : bf1[9] = -bf0[9] + bf0[8];
2377 0 : bf1[10] = -bf0[10] + bf0[11];
2378 0 : bf1[11] = bf0[11] + bf0[10];
2379 0 : bf1[12] = bf0[12] + bf0[13];
2380 0 : bf1[13] = -bf0[13] + bf0[12];
2381 0 : bf1[14] = -bf0[14] + bf0[15];
2382 0 : bf1[15] = bf0[15] + bf0[14];
2383 0 : bf1[16] = bf0[16];
2384 0 : bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit);
2385 0 : bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit);
2386 0 : bf1[19] = bf0[19];
2387 0 : bf1[20] = bf0[20];
2388 0 : bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit);
2389 0 : bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit);
2390 0 : bf1[23] = bf0[23];
2391 0 : bf1[24] = bf0[24];
2392 0 : bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit);
2393 0 : bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit);
2394 0 : bf1[27] = bf0[27];
2395 0 : bf1[28] = bf0[28];
2396 0 : bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit);
2397 0 : bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit);
2398 0 : bf1[31] = bf0[31];
2399 0 : bf1[32] = bf0[32] + bf0[35];
2400 0 : bf1[33] = bf0[33] + bf0[34];
2401 0 : bf1[34] = -bf0[34] + bf0[33];
2402 0 : bf1[35] = -bf0[35] + bf0[32];
2403 0 : bf1[36] = -bf0[36] + bf0[39];
2404 0 : bf1[37] = -bf0[37] + bf0[38];
2405 0 : bf1[38] = bf0[38] + bf0[37];
2406 0 : bf1[39] = bf0[39] + bf0[36];
2407 0 : bf1[40] = bf0[40] + bf0[43];
2408 0 : bf1[41] = bf0[41] + bf0[42];
2409 0 : bf1[42] = -bf0[42] + bf0[41];
2410 0 : bf1[43] = -bf0[43] + bf0[40];
2411 0 : bf1[44] = -bf0[44] + bf0[47];
2412 0 : bf1[45] = -bf0[45] + bf0[46];
2413 0 : bf1[46] = bf0[46] + bf0[45];
2414 0 : bf1[47] = bf0[47] + bf0[44];
2415 0 : bf1[48] = bf0[48] + bf0[51];
2416 0 : bf1[49] = bf0[49] + bf0[50];
2417 0 : bf1[50] = -bf0[50] + bf0[49];
2418 0 : bf1[51] = -bf0[51] + bf0[48];
2419 0 : bf1[52] = -bf0[52] + bf0[55];
2420 0 : bf1[53] = -bf0[53] + bf0[54];
2421 0 : bf1[54] = bf0[54] + bf0[53];
2422 0 : bf1[55] = bf0[55] + bf0[52];
2423 0 : bf1[56] = bf0[56] + bf0[59];
2424 0 : bf1[57] = bf0[57] + bf0[58];
2425 0 : bf1[58] = -bf0[58] + bf0[57];
2426 0 : bf1[59] = -bf0[59] + bf0[56];
2427 0 : bf1[60] = -bf0[60] + bf0[63];
2428 0 : bf1[61] = -bf0[61] + bf0[62];
2429 0 : bf1[62] = bf0[62] + bf0[61];
2430 0 : bf1[63] = bf0[63] + bf0[60];
2431 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2432 :
2433 : // stage 8
2434 0 : stage++;
2435 0 : cospi = cospi_arr(cos_bit);
2436 0 : bf0 = output;
2437 0 : bf1 = step;
2438 0 : bf1[0] = bf0[0];
2439 0 : bf1[1] = bf0[1];
2440 0 : bf1[2] = bf0[2];
2441 0 : bf1[3] = bf0[3];
2442 0 : bf1[4] = bf0[4];
2443 0 : bf1[5] = bf0[5];
2444 0 : bf1[6] = bf0[6];
2445 0 : bf1[7] = bf0[7];
2446 0 : bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit);
2447 0 : bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit);
2448 0 : bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit);
2449 0 : bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit);
2450 0 : bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit);
2451 0 : bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit);
2452 0 : bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit);
2453 0 : bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit);
2454 0 : bf1[16] = bf0[16] + bf0[17];
2455 0 : bf1[17] = -bf0[17] + bf0[16];
2456 0 : bf1[18] = -bf0[18] + bf0[19];
2457 0 : bf1[19] = bf0[19] + bf0[18];
2458 0 : bf1[20] = bf0[20] + bf0[21];
2459 0 : bf1[21] = -bf0[21] + bf0[20];
2460 0 : bf1[22] = -bf0[22] + bf0[23];
2461 0 : bf1[23] = bf0[23] + bf0[22];
2462 0 : bf1[24] = bf0[24] + bf0[25];
2463 0 : bf1[25] = -bf0[25] + bf0[24];
2464 0 : bf1[26] = -bf0[26] + bf0[27];
2465 0 : bf1[27] = bf0[27] + bf0[26];
2466 0 : bf1[28] = bf0[28] + bf0[29];
2467 0 : bf1[29] = -bf0[29] + bf0[28];
2468 0 : bf1[30] = -bf0[30] + bf0[31];
2469 0 : bf1[31] = bf0[31] + bf0[30];
2470 0 : bf1[32] = bf0[32];
2471 0 : bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit);
2472 0 : bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit);
2473 0 : bf1[35] = bf0[35];
2474 0 : bf1[36] = bf0[36];
2475 0 : bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit);
2476 0 : bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit);
2477 0 : bf1[39] = bf0[39];
2478 0 : bf1[40] = bf0[40];
2479 0 : bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit);
2480 0 : bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit);
2481 0 : bf1[43] = bf0[43];
2482 0 : bf1[44] = bf0[44];
2483 0 : bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit);
2484 0 : bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit);
2485 0 : bf1[47] = bf0[47];
2486 0 : bf1[48] = bf0[48];
2487 0 : bf1[49] = half_btf(cospi[12], bf0[49], -cospi[52], bf0[46], cos_bit);
2488 0 : bf1[50] = half_btf(cospi[52], bf0[50], cospi[12], bf0[45], cos_bit);
2489 0 : bf1[51] = bf0[51];
2490 0 : bf1[52] = bf0[52];
2491 0 : bf1[53] = half_btf(cospi[44], bf0[53], -cospi[20], bf0[42], cos_bit);
2492 0 : bf1[54] = half_btf(cospi[20], bf0[54], cospi[44], bf0[41], cos_bit);
2493 0 : bf1[55] = bf0[55];
2494 0 : bf1[56] = bf0[56];
2495 0 : bf1[57] = half_btf(cospi[28], bf0[57], -cospi[36], bf0[38], cos_bit);
2496 0 : bf1[58] = half_btf(cospi[36], bf0[58], cospi[28], bf0[37], cos_bit);
2497 0 : bf1[59] = bf0[59];
2498 0 : bf1[60] = bf0[60];
2499 0 : bf1[61] = half_btf(cospi[60], bf0[61], -cospi[4], bf0[34], cos_bit);
2500 0 : bf1[62] = half_btf(cospi[4], bf0[62], cospi[60], bf0[33], cos_bit);
2501 0 : bf1[63] = bf0[63];
2502 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2503 :
2504 : // stage 9
2505 0 : stage++;
2506 0 : cospi = cospi_arr(cos_bit);
2507 0 : bf0 = step;
2508 0 : bf1 = output;
2509 0 : bf1[0] = bf0[0];
2510 0 : bf1[1] = bf0[1];
2511 0 : bf1[2] = bf0[2];
2512 0 : bf1[3] = bf0[3];
2513 0 : bf1[4] = bf0[4];
2514 0 : bf1[5] = bf0[5];
2515 0 : bf1[6] = bf0[6];
2516 0 : bf1[7] = bf0[7];
2517 0 : bf1[8] = bf0[8];
2518 0 : bf1[9] = bf0[9];
2519 0 : bf1[10] = bf0[10];
2520 0 : bf1[11] = bf0[11];
2521 0 : bf1[12] = bf0[12];
2522 0 : bf1[13] = bf0[13];
2523 0 : bf1[14] = bf0[14];
2524 0 : bf1[15] = bf0[15];
2525 0 : bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit);
2526 0 : bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit);
2527 0 : bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit);
2528 0 : bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit);
2529 0 : bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit);
2530 0 : bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit);
2531 0 : bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit);
2532 0 : bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit);
2533 0 : bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit);
2534 0 : bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit);
2535 0 : bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit);
2536 0 : bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit);
2537 0 : bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit);
2538 0 : bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit);
2539 0 : bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit);
2540 0 : bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit);
2541 0 : bf1[32] = bf0[32] + bf0[33];
2542 0 : bf1[33] = -bf0[33] + bf0[32];
2543 0 : bf1[34] = -bf0[34] + bf0[35];
2544 0 : bf1[35] = bf0[35] + bf0[34];
2545 0 : bf1[36] = bf0[36] + bf0[37];
2546 0 : bf1[37] = -bf0[37] + bf0[36];
2547 0 : bf1[38] = -bf0[38] + bf0[39];
2548 0 : bf1[39] = bf0[39] + bf0[38];
2549 0 : bf1[40] = bf0[40] + bf0[41];
2550 0 : bf1[41] = -bf0[41] + bf0[40];
2551 0 : bf1[42] = -bf0[42] + bf0[43];
2552 0 : bf1[43] = bf0[43] + bf0[42];
2553 0 : bf1[44] = bf0[44] + bf0[45];
2554 0 : bf1[45] = -bf0[45] + bf0[44];
2555 0 : bf1[46] = -bf0[46] + bf0[47];
2556 0 : bf1[47] = bf0[47] + bf0[46];
2557 0 : bf1[48] = bf0[48] + bf0[49];
2558 0 : bf1[49] = -bf0[49] + bf0[48];
2559 0 : bf1[50] = -bf0[50] + bf0[51];
2560 0 : bf1[51] = bf0[51] + bf0[50];
2561 0 : bf1[52] = bf0[52] + bf0[53];
2562 0 : bf1[53] = -bf0[53] + bf0[52];
2563 0 : bf1[54] = -bf0[54] + bf0[55];
2564 0 : bf1[55] = bf0[55] + bf0[54];
2565 0 : bf1[56] = bf0[56] + bf0[57];
2566 0 : bf1[57] = -bf0[57] + bf0[56];
2567 0 : bf1[58] = -bf0[58] + bf0[59];
2568 0 : bf1[59] = bf0[59] + bf0[58];
2569 0 : bf1[60] = bf0[60] + bf0[61];
2570 0 : bf1[61] = -bf0[61] + bf0[60];
2571 0 : bf1[62] = -bf0[62] + bf0[63];
2572 0 : bf1[63] = bf0[63] + bf0[62];
2573 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2574 :
2575 : // stage 10
2576 0 : stage++;
2577 0 : cospi = cospi_arr(cos_bit);
2578 0 : bf0 = output;
2579 0 : bf1 = step;
2580 0 : bf1[0] = bf0[0];
2581 0 : bf1[1] = bf0[1];
2582 0 : bf1[2] = bf0[2];
2583 0 : bf1[3] = bf0[3];
2584 0 : bf1[4] = bf0[4];
2585 0 : bf1[5] = bf0[5];
2586 0 : bf1[6] = bf0[6];
2587 0 : bf1[7] = bf0[7];
2588 0 : bf1[8] = bf0[8];
2589 0 : bf1[9] = bf0[9];
2590 0 : bf1[10] = bf0[10];
2591 0 : bf1[11] = bf0[11];
2592 0 : bf1[12] = bf0[12];
2593 0 : bf1[13] = bf0[13];
2594 0 : bf1[14] = bf0[14];
2595 0 : bf1[15] = bf0[15];
2596 0 : bf1[16] = bf0[16];
2597 0 : bf1[17] = bf0[17];
2598 0 : bf1[18] = bf0[18];
2599 0 : bf1[19] = bf0[19];
2600 0 : bf1[20] = bf0[20];
2601 0 : bf1[21] = bf0[21];
2602 0 : bf1[22] = bf0[22];
2603 0 : bf1[23] = bf0[23];
2604 0 : bf1[24] = bf0[24];
2605 0 : bf1[25] = bf0[25];
2606 0 : bf1[26] = bf0[26];
2607 0 : bf1[27] = bf0[27];
2608 0 : bf1[28] = bf0[28];
2609 0 : bf1[29] = bf0[29];
2610 0 : bf1[30] = bf0[30];
2611 0 : bf1[31] = bf0[31];
2612 0 : bf1[32] = half_btf(cospi[63], bf0[32], cospi[1], bf0[63], cos_bit);
2613 0 : bf1[33] = half_btf(cospi[31], bf0[33], cospi[33], bf0[62], cos_bit);
2614 0 : bf1[34] = half_btf(cospi[47], bf0[34], cospi[17], bf0[61], cos_bit);
2615 0 : bf1[35] = half_btf(cospi[15], bf0[35], cospi[49], bf0[60], cos_bit);
2616 0 : bf1[36] = half_btf(cospi[55], bf0[36], cospi[9], bf0[59], cos_bit);
2617 0 : bf1[37] = half_btf(cospi[23], bf0[37], cospi[41], bf0[58], cos_bit);
2618 0 : bf1[38] = half_btf(cospi[39], bf0[38], cospi[25], bf0[57], cos_bit);
2619 0 : bf1[39] = half_btf(cospi[7], bf0[39], cospi[57], bf0[56], cos_bit);
2620 0 : bf1[40] = half_btf(cospi[59], bf0[40], cospi[5], bf0[55], cos_bit);
2621 0 : bf1[41] = half_btf(cospi[27], bf0[41], cospi[37], bf0[54], cos_bit);
2622 0 : bf1[42] = half_btf(cospi[43], bf0[42], cospi[21], bf0[53], cos_bit);
2623 0 : bf1[43] = half_btf(cospi[11], bf0[43], cospi[53], bf0[52], cos_bit);
2624 0 : bf1[44] = half_btf(cospi[51], bf0[44], cospi[13], bf0[51], cos_bit);
2625 0 : bf1[45] = half_btf(cospi[19], bf0[45], cospi[45], bf0[50], cos_bit);
2626 0 : bf1[46] = half_btf(cospi[35], bf0[46], cospi[29], bf0[49], cos_bit);
2627 0 : bf1[47] = half_btf(cospi[3], bf0[47], cospi[61], bf0[48], cos_bit);
2628 0 : bf1[48] = half_btf(cospi[3], bf0[48], -cospi[61], bf0[47], cos_bit);
2629 0 : bf1[49] = half_btf(cospi[35], bf0[49], -cospi[29], bf0[46], cos_bit);
2630 0 : bf1[50] = half_btf(cospi[19], bf0[50], -cospi[45], bf0[45], cos_bit);
2631 0 : bf1[51] = half_btf(cospi[51], bf0[51], -cospi[13], bf0[44], cos_bit);
2632 0 : bf1[52] = half_btf(cospi[11], bf0[52], -cospi[53], bf0[43], cos_bit);
2633 0 : bf1[53] = half_btf(cospi[43], bf0[53], -cospi[21], bf0[42], cos_bit);
2634 0 : bf1[54] = half_btf(cospi[27], bf0[54], -cospi[37], bf0[41], cos_bit);
2635 0 : bf1[55] = half_btf(cospi[59], bf0[55], -cospi[5], bf0[40], cos_bit);
2636 0 : bf1[56] = half_btf(cospi[7], bf0[56], -cospi[57], bf0[39], cos_bit);
2637 0 : bf1[57] = half_btf(cospi[39], bf0[57], -cospi[25], bf0[38], cos_bit);
2638 0 : bf1[58] = half_btf(cospi[23], bf0[58], -cospi[41], bf0[37], cos_bit);
2639 0 : bf1[59] = half_btf(cospi[55], bf0[59], -cospi[9], bf0[36], cos_bit);
2640 0 : bf1[60] = half_btf(cospi[15], bf0[60], -cospi[49], bf0[35], cos_bit);
2641 0 : bf1[61] = half_btf(cospi[47], bf0[61], -cospi[17], bf0[34], cos_bit);
2642 0 : bf1[62] = half_btf(cospi[31], bf0[62], -cospi[33], bf0[33], cos_bit);
2643 0 : bf1[63] = half_btf(cospi[63], bf0[63], -cospi[1], bf0[32], cos_bit);
2644 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2645 :
2646 : // stage 11
2647 0 : stage++;
2648 0 : bf0 = step;
2649 0 : bf1 = output;
2650 0 : bf1[0] = bf0[0];
2651 0 : bf1[1] = bf0[32];
2652 0 : bf1[2] = bf0[16];
2653 0 : bf1[3] = bf0[48];
2654 0 : bf1[4] = bf0[8];
2655 0 : bf1[5] = bf0[40];
2656 0 : bf1[6] = bf0[24];
2657 0 : bf1[7] = bf0[56];
2658 0 : bf1[8] = bf0[4];
2659 0 : bf1[9] = bf0[36];
2660 0 : bf1[10] = bf0[20];
2661 0 : bf1[11] = bf0[52];
2662 0 : bf1[12] = bf0[12];
2663 0 : bf1[13] = bf0[44];
2664 0 : bf1[14] = bf0[28];
2665 0 : bf1[15] = bf0[60];
2666 0 : bf1[16] = bf0[2];
2667 0 : bf1[17] = bf0[34];
2668 0 : bf1[18] = bf0[18];
2669 0 : bf1[19] = bf0[50];
2670 0 : bf1[20] = bf0[10];
2671 0 : bf1[21] = bf0[42];
2672 0 : bf1[22] = bf0[26];
2673 0 : bf1[23] = bf0[58];
2674 0 : bf1[24] = bf0[6];
2675 0 : bf1[25] = bf0[38];
2676 0 : bf1[26] = bf0[22];
2677 0 : bf1[27] = bf0[54];
2678 0 : bf1[28] = bf0[14];
2679 0 : bf1[29] = bf0[46];
2680 0 : bf1[30] = bf0[30];
2681 0 : bf1[31] = bf0[62];
2682 0 : bf1[32] = bf0[1];
2683 0 : bf1[33] = bf0[33];
2684 0 : bf1[34] = bf0[17];
2685 0 : bf1[35] = bf0[49];
2686 0 : bf1[36] = bf0[9];
2687 0 : bf1[37] = bf0[41];
2688 0 : bf1[38] = bf0[25];
2689 0 : bf1[39] = bf0[57];
2690 0 : bf1[40] = bf0[5];
2691 0 : bf1[41] = bf0[37];
2692 0 : bf1[42] = bf0[21];
2693 0 : bf1[43] = bf0[53];
2694 0 : bf1[44] = bf0[13];
2695 0 : bf1[45] = bf0[45];
2696 0 : bf1[46] = bf0[29];
2697 0 : bf1[47] = bf0[61];
2698 0 : bf1[48] = bf0[3];
2699 0 : bf1[49] = bf0[35];
2700 0 : bf1[50] = bf0[19];
2701 0 : bf1[51] = bf0[51];
2702 0 : bf1[52] = bf0[11];
2703 0 : bf1[53] = bf0[43];
2704 0 : bf1[54] = bf0[27];
2705 0 : bf1[55] = bf0[59];
2706 0 : bf1[56] = bf0[7];
2707 0 : bf1[57] = bf0[39];
2708 0 : bf1[58] = bf0[23];
2709 0 : bf1[59] = bf0[55];
2710 0 : bf1[60] = bf0[15];
2711 0 : bf1[61] = bf0[47];
2712 0 : bf1[62] = bf0[31];
2713 0 : bf1[63] = bf0[63];
2714 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2715 0 : }
2716 :
2717 0 : void eb_av1_fadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
2718 : const int8_t *stage_range) {
2719 0 : int32_t bit = cos_bit;
2720 0 : const int32_t *sinpi = sinpi_arr(bit);
2721 : int32_t x0, x1, x2, x3;
2722 : int32_t s0, s1, s2, s3, s4, s5, s6, s7;
2723 :
2724 : // stage 0
2725 : range_check(0, input, input, 4, stage_range[0]);
2726 0 : x0 = input[0];
2727 0 : x1 = input[1];
2728 0 : x2 = input[2];
2729 0 : x3 = input[3];
2730 :
2731 0 : if (!(x0 | x1 | x2 | x3)) {
2732 0 : output[0] = output[1] = output[2] = output[3] = 0;
2733 0 : return;
2734 : }
2735 :
2736 : //// stage 1
2737 : //s0 = range_check_value(sinpi[1] * x0, bit + stage_range[1]);
2738 : //s1 = range_check_value(sinpi[4] * x0, bit + stage_range[1]);
2739 : //s2 = range_check_value(sinpi[2] * x1, bit + stage_range[1]);
2740 : //s3 = range_check_value(sinpi[1] * x1, bit + stage_range[1]);
2741 : //s4 = range_check_value(sinpi[3] * x2, bit + stage_range[1]);
2742 : //s5 = range_check_value(sinpi[4] * x3, bit + stage_range[1]);
2743 : //s6 = range_check_value(sinpi[2] * x3, bit + stage_range[1]);
2744 : //s7 = range_check_value(x0 + x1, stage_range[1]);
2745 :
2746 : //// stage 2
2747 : //s7 = range_check_value(s7 - x3, stage_range[2]);
2748 :
2749 : //// stage 3
2750 : //x0 = range_check_value(s0 + s2, bit + stage_range[3]);
2751 : //x1 = range_check_value(sinpi[3] * s7, bit + stage_range[3]);
2752 : //x2 = range_check_value(s1 - s3, bit + stage_range[3]);
2753 : //x3 = range_check_value(s4, bit + stage_range[3]);
2754 :
2755 : //// stage 4
2756 : //x0 = range_check_value(x0 + s5, bit + stage_range[4]);
2757 : //x2 = range_check_value(x2 + s6, bit + stage_range[4]);
2758 :
2759 : //// stage 5
2760 : //s0 = range_check_value(x0 + x3, bit + stage_range[5]);
2761 : //s1 = range_check_value(x1, bit + stage_range[5]);
2762 : //s2 = range_check_value(x2 - x3, bit + stage_range[5]);
2763 : //s3 = range_check_value(x2 - x0, bit + stage_range[5]);
2764 :
2765 : //// stage 6
2766 : //s3 = range_check_value(s3 + x3, bit + stage_range[6]);
2767 :
2768 : // stage 1
2769 0 : s0 = sinpi[1] * x0;
2770 0 : s1 = sinpi[4] * x0;
2771 0 : s2 = sinpi[2] * x1;
2772 0 : s3 = sinpi[1] * x1;
2773 0 : s4 = sinpi[3] * x2;
2774 0 : s5 = sinpi[4] * x3;
2775 0 : s6 = sinpi[2] * x3;
2776 0 : s7 = x0 + x1;
2777 :
2778 : // stage 2
2779 0 : s7 = s7 - x3;
2780 :
2781 : // stage 3
2782 0 : x0 = s0 + s2;
2783 0 : x1 = sinpi[3] * s7;
2784 0 : x2 = s1 - s3;
2785 0 : x3 = s4;
2786 :
2787 : // stage 4
2788 0 : x0 = x0 + s5;
2789 0 : x2 = x2 + s6;
2790 :
2791 : // stage 5
2792 0 : s0 = x0 + x3;
2793 0 : s1 = x1;
2794 0 : s2 = x2 - x3;
2795 0 : s3 = x2 - x0;
2796 :
2797 : // stage 6
2798 0 : s3 = s3 + x3;
2799 :
2800 : // 1-D transform scaling factor is sqrt(2).
2801 0 : output[0] = round_shift(s0, bit);
2802 0 : output[1] = round_shift(s1, bit);
2803 0 : output[2] = round_shift(s2, bit);
2804 0 : output[3] = round_shift(s3, bit);
2805 0 : range_check(6, input, output, 4, stage_range[6]);
2806 : }
2807 :
2808 0 : void eb_av1_fadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
2809 : const int8_t *stage_range) {
2810 0 : const int32_t size = 8;
2811 : const int32_t *cospi;
2812 :
2813 0 : int32_t stage = 0;
2814 : int32_t *bf0, *bf1;
2815 : int32_t step[8];
2816 :
2817 : // stage 0;
2818 0 : range_check(stage, input, input, size, stage_range[stage]);
2819 :
2820 : // stage 1;
2821 0 : stage++;
2822 0 : assert(output != input);
2823 0 : bf1 = output;
2824 0 : bf1[0] = input[0];
2825 0 : bf1[1] = -input[7];
2826 0 : bf1[2] = -input[3];
2827 0 : bf1[3] = input[4];
2828 0 : bf1[4] = -input[1];
2829 0 : bf1[5] = input[6];
2830 0 : bf1[6] = input[2];
2831 0 : bf1[7] = -input[5];
2832 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2833 :
2834 : // stage 2
2835 0 : stage++;
2836 0 : cospi = cospi_arr(cos_bit);
2837 0 : bf0 = output;
2838 0 : bf1 = step;
2839 0 : bf1[0] = bf0[0];
2840 0 : bf1[1] = bf0[1];
2841 0 : bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
2842 0 : bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit);
2843 0 : bf1[4] = bf0[4];
2844 0 : bf1[5] = bf0[5];
2845 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
2846 0 : bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit);
2847 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2848 :
2849 : // stage 3
2850 0 : stage++;
2851 0 : bf0 = step;
2852 0 : bf1 = output;
2853 0 : bf1[0] = bf0[0] + bf0[2];
2854 0 : bf1[1] = bf0[1] + bf0[3];
2855 0 : bf1[2] = bf0[0] - bf0[2];
2856 0 : bf1[3] = bf0[1] - bf0[3];
2857 0 : bf1[4] = bf0[4] + bf0[6];
2858 0 : bf1[5] = bf0[5] + bf0[7];
2859 0 : bf1[6] = bf0[4] - bf0[6];
2860 0 : bf1[7] = bf0[5] - bf0[7];
2861 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2862 :
2863 : // stage 4
2864 0 : stage++;
2865 0 : cospi = cospi_arr(cos_bit);
2866 0 : bf0 = output;
2867 0 : bf1 = step;
2868 0 : bf1[0] = bf0[0];
2869 0 : bf1[1] = bf0[1];
2870 0 : bf1[2] = bf0[2];
2871 0 : bf1[3] = bf0[3];
2872 0 : bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
2873 0 : bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit);
2874 0 : bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
2875 0 : bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit);
2876 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2877 :
2878 : // stage 5
2879 0 : stage++;
2880 0 : bf0 = step;
2881 0 : bf1 = output;
2882 0 : bf1[0] = bf0[0] + bf0[4];
2883 0 : bf1[1] = bf0[1] + bf0[5];
2884 0 : bf1[2] = bf0[2] + bf0[6];
2885 0 : bf1[3] = bf0[3] + bf0[7];
2886 0 : bf1[4] = bf0[0] - bf0[4];
2887 0 : bf1[5] = bf0[1] - bf0[5];
2888 0 : bf1[6] = bf0[2] - bf0[6];
2889 0 : bf1[7] = bf0[3] - bf0[7];
2890 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2891 :
2892 : // stage 6
2893 0 : stage++;
2894 0 : cospi = cospi_arr(cos_bit);
2895 0 : bf0 = output;
2896 0 : bf1 = step;
2897 0 : bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit);
2898 0 : bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit);
2899 0 : bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit);
2900 0 : bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit);
2901 0 : bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit);
2902 0 : bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit);
2903 0 : bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit);
2904 0 : bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit);
2905 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2906 :
2907 : // stage 7
2908 0 : stage++;
2909 0 : bf0 = step;
2910 0 : bf1 = output;
2911 0 : bf1[0] = bf0[1];
2912 0 : bf1[1] = bf0[6];
2913 0 : bf1[2] = bf0[3];
2914 0 : bf1[3] = bf0[4];
2915 0 : bf1[4] = bf0[5];
2916 0 : bf1[5] = bf0[2];
2917 0 : bf1[6] = bf0[7];
2918 0 : bf1[7] = bf0[0];
2919 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2920 0 : }
2921 :
2922 0 : void eb_av1_fadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
2923 : const int8_t *stage_range) {
2924 0 : const int32_t size = 16;
2925 : const int32_t *cospi;
2926 :
2927 0 : int32_t stage = 0;
2928 : int32_t *bf0, *bf1;
2929 : int32_t step[16];
2930 :
2931 : // stage 0;
2932 0 : range_check(stage, input, input, size, stage_range[stage]);
2933 :
2934 : // stage 1;
2935 0 : stage++;
2936 0 : assert(output != input);
2937 0 : bf1 = output;
2938 0 : bf1[0] = input[0];
2939 0 : bf1[1] = -input[15];
2940 0 : bf1[2] = -input[7];
2941 0 : bf1[3] = input[8];
2942 0 : bf1[4] = -input[3];
2943 0 : bf1[5] = input[12];
2944 0 : bf1[6] = input[4];
2945 0 : bf1[7] = -input[11];
2946 0 : bf1[8] = -input[1];
2947 0 : bf1[9] = input[14];
2948 0 : bf1[10] = input[6];
2949 0 : bf1[11] = -input[9];
2950 0 : bf1[12] = input[2];
2951 0 : bf1[13] = -input[13];
2952 0 : bf1[14] = -input[5];
2953 0 : bf1[15] = input[10];
2954 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2955 :
2956 : // stage 2
2957 0 : stage++;
2958 0 : cospi = cospi_arr(cos_bit);
2959 0 : bf0 = output;
2960 0 : bf1 = step;
2961 0 : bf1[0] = bf0[0];
2962 0 : bf1[1] = bf0[1];
2963 0 : bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
2964 0 : bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit);
2965 0 : bf1[4] = bf0[4];
2966 0 : bf1[5] = bf0[5];
2967 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
2968 0 : bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit);
2969 0 : bf1[8] = bf0[8];
2970 0 : bf1[9] = bf0[9];
2971 0 : bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit);
2972 0 : bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit);
2973 0 : bf1[12] = bf0[12];
2974 0 : bf1[13] = bf0[13];
2975 0 : bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit);
2976 0 : bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit);
2977 0 : range_check(stage, input, bf1, size, stage_range[stage]);
2978 :
2979 : // stage 3
2980 0 : stage++;
2981 0 : bf0 = step;
2982 0 : bf1 = output;
2983 0 : bf1[0] = bf0[0] + bf0[2];
2984 0 : bf1[1] = bf0[1] + bf0[3];
2985 0 : bf1[2] = bf0[0] - bf0[2];
2986 0 : bf1[3] = bf0[1] - bf0[3];
2987 0 : bf1[4] = bf0[4] + bf0[6];
2988 0 : bf1[5] = bf0[5] + bf0[7];
2989 0 : bf1[6] = bf0[4] - bf0[6];
2990 0 : bf1[7] = bf0[5] - bf0[7];
2991 0 : bf1[8] = bf0[8] + bf0[10];
2992 0 : bf1[9] = bf0[9] + bf0[11];
2993 0 : bf1[10] = bf0[8] - bf0[10];
2994 0 : bf1[11] = bf0[9] - bf0[11];
2995 0 : bf1[12] = bf0[12] + bf0[14];
2996 0 : bf1[13] = bf0[13] + bf0[15];
2997 0 : bf1[14] = bf0[12] - bf0[14];
2998 0 : bf1[15] = bf0[13] - bf0[15];
2999 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3000 :
3001 : // stage 4
3002 0 : stage++;
3003 0 : cospi = cospi_arr(cos_bit);
3004 0 : bf0 = output;
3005 0 : bf1 = step;
3006 0 : bf1[0] = bf0[0];
3007 0 : bf1[1] = bf0[1];
3008 0 : bf1[2] = bf0[2];
3009 0 : bf1[3] = bf0[3];
3010 0 : bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
3011 0 : bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit);
3012 0 : bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
3013 0 : bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit);
3014 0 : bf1[8] = bf0[8];
3015 0 : bf1[9] = bf0[9];
3016 0 : bf1[10] = bf0[10];
3017 0 : bf1[11] = bf0[11];
3018 0 : bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit);
3019 0 : bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit);
3020 0 : bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit);
3021 0 : bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit);
3022 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3023 :
3024 : // stage 5
3025 0 : stage++;
3026 0 : bf0 = step;
3027 0 : bf1 = output;
3028 0 : bf1[0] = bf0[0] + bf0[4];
3029 0 : bf1[1] = bf0[1] + bf0[5];
3030 0 : bf1[2] = bf0[2] + bf0[6];
3031 0 : bf1[3] = bf0[3] + bf0[7];
3032 0 : bf1[4] = bf0[0] - bf0[4];
3033 0 : bf1[5] = bf0[1] - bf0[5];
3034 0 : bf1[6] = bf0[2] - bf0[6];
3035 0 : bf1[7] = bf0[3] - bf0[7];
3036 0 : bf1[8] = bf0[8] + bf0[12];
3037 0 : bf1[9] = bf0[9] + bf0[13];
3038 0 : bf1[10] = bf0[10] + bf0[14];
3039 0 : bf1[11] = bf0[11] + bf0[15];
3040 0 : bf1[12] = bf0[8] - bf0[12];
3041 0 : bf1[13] = bf0[9] - bf0[13];
3042 0 : bf1[14] = bf0[10] - bf0[14];
3043 0 : bf1[15] = bf0[11] - bf0[15];
3044 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3045 :
3046 : // stage 6
3047 0 : stage++;
3048 0 : cospi = cospi_arr(cos_bit);
3049 0 : bf0 = output;
3050 0 : bf1 = step;
3051 0 : bf1[0] = bf0[0];
3052 0 : bf1[1] = bf0[1];
3053 0 : bf1[2] = bf0[2];
3054 0 : bf1[3] = bf0[3];
3055 0 : bf1[4] = bf0[4];
3056 0 : bf1[5] = bf0[5];
3057 0 : bf1[6] = bf0[6];
3058 0 : bf1[7] = bf0[7];
3059 0 : bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit);
3060 0 : bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit);
3061 0 : bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit);
3062 0 : bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit);
3063 0 : bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit);
3064 0 : bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit);
3065 0 : bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit);
3066 0 : bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit);
3067 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3068 :
3069 : // stage 7
3070 0 : stage++;
3071 0 : bf0 = step;
3072 0 : bf1 = output;
3073 0 : bf1[0] = bf0[0] + bf0[8];
3074 0 : bf1[1] = bf0[1] + bf0[9];
3075 0 : bf1[2] = bf0[2] + bf0[10];
3076 0 : bf1[3] = bf0[3] + bf0[11];
3077 0 : bf1[4] = bf0[4] + bf0[12];
3078 0 : bf1[5] = bf0[5] + bf0[13];
3079 0 : bf1[6] = bf0[6] + bf0[14];
3080 0 : bf1[7] = bf0[7] + bf0[15];
3081 0 : bf1[8] = bf0[0] - bf0[8];
3082 0 : bf1[9] = bf0[1] - bf0[9];
3083 0 : bf1[10] = bf0[2] - bf0[10];
3084 0 : bf1[11] = bf0[3] - bf0[11];
3085 0 : bf1[12] = bf0[4] - bf0[12];
3086 0 : bf1[13] = bf0[5] - bf0[13];
3087 0 : bf1[14] = bf0[6] - bf0[14];
3088 0 : bf1[15] = bf0[7] - bf0[15];
3089 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3090 :
3091 : // stage 8
3092 0 : stage++;
3093 0 : cospi = cospi_arr(cos_bit);
3094 0 : bf0 = output;
3095 0 : bf1 = step;
3096 0 : bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit);
3097 0 : bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit);
3098 0 : bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit);
3099 0 : bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit);
3100 0 : bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit);
3101 0 : bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit);
3102 0 : bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit);
3103 0 : bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit);
3104 0 : bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit);
3105 0 : bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit);
3106 0 : bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit);
3107 0 : bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit);
3108 0 : bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit);
3109 0 : bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit);
3110 0 : bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit);
3111 0 : bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit);
3112 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3113 :
3114 : // stage 9
3115 0 : stage++;
3116 0 : bf0 = step;
3117 0 : bf1 = output;
3118 0 : bf1[0] = bf0[1];
3119 0 : bf1[1] = bf0[14];
3120 0 : bf1[2] = bf0[3];
3121 0 : bf1[3] = bf0[12];
3122 0 : bf1[4] = bf0[5];
3123 0 : bf1[5] = bf0[10];
3124 0 : bf1[6] = bf0[7];
3125 0 : bf1[7] = bf0[8];
3126 0 : bf1[8] = bf0[9];
3127 0 : bf1[9] = bf0[6];
3128 0 : bf1[10] = bf0[11];
3129 0 : bf1[11] = bf0[4];
3130 0 : bf1[12] = bf0[13];
3131 0 : bf1[13] = bf0[2];
3132 0 : bf1[14] = bf0[15];
3133 0 : bf1[15] = bf0[0];
3134 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3135 0 : }
3136 :
3137 0 : void av1_fadst32_new(const int32_t *input, int32_t *output, int8_t cos_bit,
3138 : const int8_t *stage_range) {
3139 0 : const int32_t size = 32;
3140 : const int32_t *cospi;
3141 :
3142 0 : int32_t stage = 0;
3143 : int32_t *bf0, *bf1;
3144 : int32_t step[32];
3145 :
3146 : // stage 0;
3147 0 : range_check(stage, input, input, size, stage_range[stage]);
3148 :
3149 : // stage 1;
3150 0 : stage++;
3151 0 : bf1 = output;
3152 0 : bf1[0] = input[31];
3153 0 : bf1[1] = input[0];
3154 0 : bf1[2] = input[29];
3155 0 : bf1[3] = input[2];
3156 0 : bf1[4] = input[27];
3157 0 : bf1[5] = input[4];
3158 0 : bf1[6] = input[25];
3159 0 : bf1[7] = input[6];
3160 0 : bf1[8] = input[23];
3161 0 : bf1[9] = input[8];
3162 0 : bf1[10] = input[21];
3163 0 : bf1[11] = input[10];
3164 0 : bf1[12] = input[19];
3165 0 : bf1[13] = input[12];
3166 0 : bf1[14] = input[17];
3167 0 : bf1[15] = input[14];
3168 0 : bf1[16] = input[15];
3169 0 : bf1[17] = input[16];
3170 0 : bf1[18] = input[13];
3171 0 : bf1[19] = input[18];
3172 0 : bf1[20] = input[11];
3173 0 : bf1[21] = input[20];
3174 0 : bf1[22] = input[9];
3175 0 : bf1[23] = input[22];
3176 0 : bf1[24] = input[7];
3177 0 : bf1[25] = input[24];
3178 0 : bf1[26] = input[5];
3179 0 : bf1[27] = input[26];
3180 0 : bf1[28] = input[3];
3181 0 : bf1[29] = input[28];
3182 0 : bf1[30] = input[1];
3183 0 : bf1[31] = input[30];
3184 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3185 :
3186 : // stage 2
3187 0 : stage++;
3188 0 : cospi = cospi_arr(cos_bit);
3189 0 : bf0 = output;
3190 0 : bf1 = step;
3191 0 : bf1[0] = half_btf(cospi[1], bf0[0], cospi[63], bf0[1], cos_bit);
3192 0 : bf1[1] = half_btf(-cospi[1], bf0[1], cospi[63], bf0[0], cos_bit);
3193 0 : bf1[2] = half_btf(cospi[5], bf0[2], cospi[59], bf0[3], cos_bit);
3194 0 : bf1[3] = half_btf(-cospi[5], bf0[3], cospi[59], bf0[2], cos_bit);
3195 0 : bf1[4] = half_btf(cospi[9], bf0[4], cospi[55], bf0[5], cos_bit);
3196 0 : bf1[5] = half_btf(-cospi[9], bf0[5], cospi[55], bf0[4], cos_bit);
3197 0 : bf1[6] = half_btf(cospi[13], bf0[6], cospi[51], bf0[7], cos_bit);
3198 0 : bf1[7] = half_btf(-cospi[13], bf0[7], cospi[51], bf0[6], cos_bit);
3199 0 : bf1[8] = half_btf(cospi[17], bf0[8], cospi[47], bf0[9], cos_bit);
3200 0 : bf1[9] = half_btf(-cospi[17], bf0[9], cospi[47], bf0[8], cos_bit);
3201 0 : bf1[10] = half_btf(cospi[21], bf0[10], cospi[43], bf0[11], cos_bit);
3202 0 : bf1[11] = half_btf(-cospi[21], bf0[11], cospi[43], bf0[10], cos_bit);
3203 0 : bf1[12] = half_btf(cospi[25], bf0[12], cospi[39], bf0[13], cos_bit);
3204 0 : bf1[13] = half_btf(-cospi[25], bf0[13], cospi[39], bf0[12], cos_bit);
3205 0 : bf1[14] = half_btf(cospi[29], bf0[14], cospi[35], bf0[15], cos_bit);
3206 0 : bf1[15] = half_btf(-cospi[29], bf0[15], cospi[35], bf0[14], cos_bit);
3207 0 : bf1[16] = half_btf(cospi[33], bf0[16], cospi[31], bf0[17], cos_bit);
3208 0 : bf1[17] = half_btf(-cospi[33], bf0[17], cospi[31], bf0[16], cos_bit);
3209 0 : bf1[18] = half_btf(cospi[37], bf0[18], cospi[27], bf0[19], cos_bit);
3210 0 : bf1[19] = half_btf(-cospi[37], bf0[19], cospi[27], bf0[18], cos_bit);
3211 0 : bf1[20] = half_btf(cospi[41], bf0[20], cospi[23], bf0[21], cos_bit);
3212 0 : bf1[21] = half_btf(-cospi[41], bf0[21], cospi[23], bf0[20], cos_bit);
3213 0 : bf1[22] = half_btf(cospi[45], bf0[22], cospi[19], bf0[23], cos_bit);
3214 0 : bf1[23] = half_btf(-cospi[45], bf0[23], cospi[19], bf0[22], cos_bit);
3215 0 : bf1[24] = half_btf(cospi[49], bf0[24], cospi[15], bf0[25], cos_bit);
3216 0 : bf1[25] = half_btf(-cospi[49], bf0[25], cospi[15], bf0[24], cos_bit);
3217 0 : bf1[26] = half_btf(cospi[53], bf0[26], cospi[11], bf0[27], cos_bit);
3218 0 : bf1[27] = half_btf(-cospi[53], bf0[27], cospi[11], bf0[26], cos_bit);
3219 0 : bf1[28] = half_btf(cospi[57], bf0[28], cospi[7], bf0[29], cos_bit);
3220 0 : bf1[29] = half_btf(-cospi[57], bf0[29], cospi[7], bf0[28], cos_bit);
3221 0 : bf1[30] = half_btf(cospi[61], bf0[30], cospi[3], bf0[31], cos_bit);
3222 0 : bf1[31] = half_btf(-cospi[61], bf0[31], cospi[3], bf0[30], cos_bit);
3223 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3224 :
3225 : // stage 3
3226 0 : stage++;
3227 0 : bf0 = step;
3228 0 : bf1 = output;
3229 0 : bf1[0] = bf0[0] + bf0[16];
3230 0 : bf1[1] = bf0[1] + bf0[17];
3231 0 : bf1[2] = bf0[2] + bf0[18];
3232 0 : bf1[3] = bf0[3] + bf0[19];
3233 0 : bf1[4] = bf0[4] + bf0[20];
3234 0 : bf1[5] = bf0[5] + bf0[21];
3235 0 : bf1[6] = bf0[6] + bf0[22];
3236 0 : bf1[7] = bf0[7] + bf0[23];
3237 0 : bf1[8] = bf0[8] + bf0[24];
3238 0 : bf1[9] = bf0[9] + bf0[25];
3239 0 : bf1[10] = bf0[10] + bf0[26];
3240 0 : bf1[11] = bf0[11] + bf0[27];
3241 0 : bf1[12] = bf0[12] + bf0[28];
3242 0 : bf1[13] = bf0[13] + bf0[29];
3243 0 : bf1[14] = bf0[14] + bf0[30];
3244 0 : bf1[15] = bf0[15] + bf0[31];
3245 0 : bf1[16] = -bf0[16] + bf0[0];
3246 0 : bf1[17] = -bf0[17] + bf0[1];
3247 0 : bf1[18] = -bf0[18] + bf0[2];
3248 0 : bf1[19] = -bf0[19] + bf0[3];
3249 0 : bf1[20] = -bf0[20] + bf0[4];
3250 0 : bf1[21] = -bf0[21] + bf0[5];
3251 0 : bf1[22] = -bf0[22] + bf0[6];
3252 0 : bf1[23] = -bf0[23] + bf0[7];
3253 0 : bf1[24] = -bf0[24] + bf0[8];
3254 0 : bf1[25] = -bf0[25] + bf0[9];
3255 0 : bf1[26] = -bf0[26] + bf0[10];
3256 0 : bf1[27] = -bf0[27] + bf0[11];
3257 0 : bf1[28] = -bf0[28] + bf0[12];
3258 0 : bf1[29] = -bf0[29] + bf0[13];
3259 0 : bf1[30] = -bf0[30] + bf0[14];
3260 0 : bf1[31] = -bf0[31] + bf0[15];
3261 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3262 :
3263 : // stage 4
3264 0 : stage++;
3265 0 : cospi = cospi_arr(cos_bit);
3266 0 : bf0 = output;
3267 0 : bf1 = step;
3268 0 : bf1[0] = bf0[0];
3269 0 : bf1[1] = bf0[1];
3270 0 : bf1[2] = bf0[2];
3271 0 : bf1[3] = bf0[3];
3272 0 : bf1[4] = bf0[4];
3273 0 : bf1[5] = bf0[5];
3274 0 : bf1[6] = bf0[6];
3275 0 : bf1[7] = bf0[7];
3276 0 : bf1[8] = bf0[8];
3277 0 : bf1[9] = bf0[9];
3278 0 : bf1[10] = bf0[10];
3279 0 : bf1[11] = bf0[11];
3280 0 : bf1[12] = bf0[12];
3281 0 : bf1[13] = bf0[13];
3282 0 : bf1[14] = bf0[14];
3283 0 : bf1[15] = bf0[15];
3284 0 : bf1[16] = half_btf(cospi[4], bf0[16], cospi[60], bf0[17], cos_bit);
3285 0 : bf1[17] = half_btf(-cospi[4], bf0[17], cospi[60], bf0[16], cos_bit);
3286 0 : bf1[18] = half_btf(cospi[20], bf0[18], cospi[44], bf0[19], cos_bit);
3287 0 : bf1[19] = half_btf(-cospi[20], bf0[19], cospi[44], bf0[18], cos_bit);
3288 0 : bf1[20] = half_btf(cospi[36], bf0[20], cospi[28], bf0[21], cos_bit);
3289 0 : bf1[21] = half_btf(-cospi[36], bf0[21], cospi[28], bf0[20], cos_bit);
3290 0 : bf1[22] = half_btf(cospi[52], bf0[22], cospi[12], bf0[23], cos_bit);
3291 0 : bf1[23] = half_btf(-cospi[52], bf0[23], cospi[12], bf0[22], cos_bit);
3292 0 : bf1[24] = half_btf(-cospi[60], bf0[24], cospi[4], bf0[25], cos_bit);
3293 0 : bf1[25] = half_btf(cospi[60], bf0[25], cospi[4], bf0[24], cos_bit);
3294 0 : bf1[26] = half_btf(-cospi[44], bf0[26], cospi[20], bf0[27], cos_bit);
3295 0 : bf1[27] = half_btf(cospi[44], bf0[27], cospi[20], bf0[26], cos_bit);
3296 0 : bf1[28] = half_btf(-cospi[28], bf0[28], cospi[36], bf0[29], cos_bit);
3297 0 : bf1[29] = half_btf(cospi[28], bf0[29], cospi[36], bf0[28], cos_bit);
3298 0 : bf1[30] = half_btf(-cospi[12], bf0[30], cospi[52], bf0[31], cos_bit);
3299 0 : bf1[31] = half_btf(cospi[12], bf0[31], cospi[52], bf0[30], cos_bit);
3300 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3301 :
3302 : // stage 5
3303 0 : stage++;
3304 0 : bf0 = step;
3305 0 : bf1 = output;
3306 0 : bf1[0] = bf0[0] + bf0[8];
3307 0 : bf1[1] = bf0[1] + bf0[9];
3308 0 : bf1[2] = bf0[2] + bf0[10];
3309 0 : bf1[3] = bf0[3] + bf0[11];
3310 0 : bf1[4] = bf0[4] + bf0[12];
3311 0 : bf1[5] = bf0[5] + bf0[13];
3312 0 : bf1[6] = bf0[6] + bf0[14];
3313 0 : bf1[7] = bf0[7] + bf0[15];
3314 0 : bf1[8] = -bf0[8] + bf0[0];
3315 0 : bf1[9] = -bf0[9] + bf0[1];
3316 0 : bf1[10] = -bf0[10] + bf0[2];
3317 0 : bf1[11] = -bf0[11] + bf0[3];
3318 0 : bf1[12] = -bf0[12] + bf0[4];
3319 0 : bf1[13] = -bf0[13] + bf0[5];
3320 0 : bf1[14] = -bf0[14] + bf0[6];
3321 0 : bf1[15] = -bf0[15] + bf0[7];
3322 0 : bf1[16] = bf0[16] + bf0[24];
3323 0 : bf1[17] = bf0[17] + bf0[25];
3324 0 : bf1[18] = bf0[18] + bf0[26];
3325 0 : bf1[19] = bf0[19] + bf0[27];
3326 0 : bf1[20] = bf0[20] + bf0[28];
3327 0 : bf1[21] = bf0[21] + bf0[29];
3328 0 : bf1[22] = bf0[22] + bf0[30];
3329 0 : bf1[23] = bf0[23] + bf0[31];
3330 0 : bf1[24] = -bf0[24] + bf0[16];
3331 0 : bf1[25] = -bf0[25] + bf0[17];
3332 0 : bf1[26] = -bf0[26] + bf0[18];
3333 0 : bf1[27] = -bf0[27] + bf0[19];
3334 0 : bf1[28] = -bf0[28] + bf0[20];
3335 0 : bf1[29] = -bf0[29] + bf0[21];
3336 0 : bf1[30] = -bf0[30] + bf0[22];
3337 0 : bf1[31] = -bf0[31] + bf0[23];
3338 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3339 :
3340 : // stage 6
3341 0 : stage++;
3342 0 : cospi = cospi_arr(cos_bit);
3343 0 : bf0 = output;
3344 0 : bf1 = step;
3345 0 : bf1[0] = bf0[0];
3346 0 : bf1[1] = bf0[1];
3347 0 : bf1[2] = bf0[2];
3348 0 : bf1[3] = bf0[3];
3349 0 : bf1[4] = bf0[4];
3350 0 : bf1[5] = bf0[5];
3351 0 : bf1[6] = bf0[6];
3352 0 : bf1[7] = bf0[7];
3353 0 : bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit);
3354 0 : bf1[9] = half_btf(-cospi[8], bf0[9], cospi[56], bf0[8], cos_bit);
3355 0 : bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit);
3356 0 : bf1[11] = half_btf(-cospi[40], bf0[11], cospi[24], bf0[10], cos_bit);
3357 0 : bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit);
3358 0 : bf1[13] = half_btf(cospi[56], bf0[13], cospi[8], bf0[12], cos_bit);
3359 0 : bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit);
3360 0 : bf1[15] = half_btf(cospi[24], bf0[15], cospi[40], bf0[14], cos_bit);
3361 0 : bf1[16] = bf0[16];
3362 0 : bf1[17] = bf0[17];
3363 0 : bf1[18] = bf0[18];
3364 0 : bf1[19] = bf0[19];
3365 0 : bf1[20] = bf0[20];
3366 0 : bf1[21] = bf0[21];
3367 0 : bf1[22] = bf0[22];
3368 0 : bf1[23] = bf0[23];
3369 0 : bf1[24] = half_btf(cospi[8], bf0[24], cospi[56], bf0[25], cos_bit);
3370 0 : bf1[25] = half_btf(-cospi[8], bf0[25], cospi[56], bf0[24], cos_bit);
3371 0 : bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[27], cos_bit);
3372 0 : bf1[27] = half_btf(-cospi[40], bf0[27], cospi[24], bf0[26], cos_bit);
3373 0 : bf1[28] = half_btf(-cospi[56], bf0[28], cospi[8], bf0[29], cos_bit);
3374 0 : bf1[29] = half_btf(cospi[56], bf0[29], cospi[8], bf0[28], cos_bit);
3375 0 : bf1[30] = half_btf(-cospi[24], bf0[30], cospi[40], bf0[31], cos_bit);
3376 0 : bf1[31] = half_btf(cospi[24], bf0[31], cospi[40], bf0[30], cos_bit);
3377 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3378 :
3379 : // stage 7
3380 0 : stage++;
3381 0 : bf0 = step;
3382 0 : bf1 = output;
3383 0 : bf1[0] = bf0[0] + bf0[4];
3384 0 : bf1[1] = bf0[1] + bf0[5];
3385 0 : bf1[2] = bf0[2] + bf0[6];
3386 0 : bf1[3] = bf0[3] + bf0[7];
3387 0 : bf1[4] = -bf0[4] + bf0[0];
3388 0 : bf1[5] = -bf0[5] + bf0[1];
3389 0 : bf1[6] = -bf0[6] + bf0[2];
3390 0 : bf1[7] = -bf0[7] + bf0[3];
3391 0 : bf1[8] = bf0[8] + bf0[12];
3392 0 : bf1[9] = bf0[9] + bf0[13];
3393 0 : bf1[10] = bf0[10] + bf0[14];
3394 0 : bf1[11] = bf0[11] + bf0[15];
3395 0 : bf1[12] = -bf0[12] + bf0[8];
3396 0 : bf1[13] = -bf0[13] + bf0[9];
3397 0 : bf1[14] = -bf0[14] + bf0[10];
3398 0 : bf1[15] = -bf0[15] + bf0[11];
3399 0 : bf1[16] = bf0[16] + bf0[20];
3400 0 : bf1[17] = bf0[17] + bf0[21];
3401 0 : bf1[18] = bf0[18] + bf0[22];
3402 0 : bf1[19] = bf0[19] + bf0[23];
3403 0 : bf1[20] = -bf0[20] + bf0[16];
3404 0 : bf1[21] = -bf0[21] + bf0[17];
3405 0 : bf1[22] = -bf0[22] + bf0[18];
3406 0 : bf1[23] = -bf0[23] + bf0[19];
3407 0 : bf1[24] = bf0[24] + bf0[28];
3408 0 : bf1[25] = bf0[25] + bf0[29];
3409 0 : bf1[26] = bf0[26] + bf0[30];
3410 0 : bf1[27] = bf0[27] + bf0[31];
3411 0 : bf1[28] = -bf0[28] + bf0[24];
3412 0 : bf1[29] = -bf0[29] + bf0[25];
3413 0 : bf1[30] = -bf0[30] + bf0[26];
3414 0 : bf1[31] = -bf0[31] + bf0[27];
3415 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3416 :
3417 : // stage 8
3418 0 : stage++;
3419 0 : cospi = cospi_arr(cos_bit);
3420 0 : bf0 = output;
3421 0 : bf1 = step;
3422 0 : bf1[0] = bf0[0];
3423 0 : bf1[1] = bf0[1];
3424 0 : bf1[2] = bf0[2];
3425 0 : bf1[3] = bf0[3];
3426 0 : bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
3427 0 : bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit);
3428 0 : bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
3429 0 : bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit);
3430 0 : bf1[8] = bf0[8];
3431 0 : bf1[9] = bf0[9];
3432 0 : bf1[10] = bf0[10];
3433 0 : bf1[11] = bf0[11];
3434 0 : bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit);
3435 0 : bf1[13] = half_btf(-cospi[16], bf0[13], cospi[48], bf0[12], cos_bit);
3436 0 : bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit);
3437 0 : bf1[15] = half_btf(cospi[48], bf0[15], cospi[16], bf0[14], cos_bit);
3438 0 : bf1[16] = bf0[16];
3439 0 : bf1[17] = bf0[17];
3440 0 : bf1[18] = bf0[18];
3441 0 : bf1[19] = bf0[19];
3442 0 : bf1[20] = half_btf(cospi[16], bf0[20], cospi[48], bf0[21], cos_bit);
3443 0 : bf1[21] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[20], cos_bit);
3444 0 : bf1[22] = half_btf(-cospi[48], bf0[22], cospi[16], bf0[23], cos_bit);
3445 0 : bf1[23] = half_btf(cospi[48], bf0[23], cospi[16], bf0[22], cos_bit);
3446 0 : bf1[24] = bf0[24];
3447 0 : bf1[25] = bf0[25];
3448 0 : bf1[26] = bf0[26];
3449 0 : bf1[27] = bf0[27];
3450 0 : bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[29], cos_bit);
3451 0 : bf1[29] = half_btf(-cospi[16], bf0[29], cospi[48], bf0[28], cos_bit);
3452 0 : bf1[30] = half_btf(-cospi[48], bf0[30], cospi[16], bf0[31], cos_bit);
3453 0 : bf1[31] = half_btf(cospi[48], bf0[31], cospi[16], bf0[30], cos_bit);
3454 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3455 :
3456 : // stage 9
3457 0 : stage++;
3458 0 : bf0 = step;
3459 0 : bf1 = output;
3460 0 : bf1[0] = bf0[0] + bf0[2];
3461 0 : bf1[1] = bf0[1] + bf0[3];
3462 0 : bf1[2] = -bf0[2] + bf0[0];
3463 0 : bf1[3] = -bf0[3] + bf0[1];
3464 0 : bf1[4] = bf0[4] + bf0[6];
3465 0 : bf1[5] = bf0[5] + bf0[7];
3466 0 : bf1[6] = -bf0[6] + bf0[4];
3467 0 : bf1[7] = -bf0[7] + bf0[5];
3468 0 : bf1[8] = bf0[8] + bf0[10];
3469 0 : bf1[9] = bf0[9] + bf0[11];
3470 0 : bf1[10] = -bf0[10] + bf0[8];
3471 0 : bf1[11] = -bf0[11] + bf0[9];
3472 0 : bf1[12] = bf0[12] + bf0[14];
3473 0 : bf1[13] = bf0[13] + bf0[15];
3474 0 : bf1[14] = -bf0[14] + bf0[12];
3475 0 : bf1[15] = -bf0[15] + bf0[13];
3476 0 : bf1[16] = bf0[16] + bf0[18];
3477 0 : bf1[17] = bf0[17] + bf0[19];
3478 0 : bf1[18] = -bf0[18] + bf0[16];
3479 0 : bf1[19] = -bf0[19] + bf0[17];
3480 0 : bf1[20] = bf0[20] + bf0[22];
3481 0 : bf1[21] = bf0[21] + bf0[23];
3482 0 : bf1[22] = -bf0[22] + bf0[20];
3483 0 : bf1[23] = -bf0[23] + bf0[21];
3484 0 : bf1[24] = bf0[24] + bf0[26];
3485 0 : bf1[25] = bf0[25] + bf0[27];
3486 0 : bf1[26] = -bf0[26] + bf0[24];
3487 0 : bf1[27] = -bf0[27] + bf0[25];
3488 0 : bf1[28] = bf0[28] + bf0[30];
3489 0 : bf1[29] = bf0[29] + bf0[31];
3490 0 : bf1[30] = -bf0[30] + bf0[28];
3491 0 : bf1[31] = -bf0[31] + bf0[29];
3492 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3493 :
3494 : // stage 10
3495 0 : stage++;
3496 0 : cospi = cospi_arr(cos_bit);
3497 0 : bf0 = output;
3498 0 : bf1 = step;
3499 0 : bf1[0] = bf0[0];
3500 0 : bf1[1] = bf0[1];
3501 0 : bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
3502 0 : bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit);
3503 0 : bf1[4] = bf0[4];
3504 0 : bf1[5] = bf0[5];
3505 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
3506 0 : bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit);
3507 0 : bf1[8] = bf0[8];
3508 0 : bf1[9] = bf0[9];
3509 0 : bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit);
3510 0 : bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[10], cos_bit);
3511 0 : bf1[12] = bf0[12];
3512 0 : bf1[13] = bf0[13];
3513 0 : bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit);
3514 0 : bf1[15] = half_btf(-cospi[32], bf0[15], cospi[32], bf0[14], cos_bit);
3515 0 : bf1[16] = bf0[16];
3516 0 : bf1[17] = bf0[17];
3517 0 : bf1[18] = half_btf(cospi[32], bf0[18], cospi[32], bf0[19], cos_bit);
3518 0 : bf1[19] = half_btf(-cospi[32], bf0[19], cospi[32], bf0[18], cos_bit);
3519 0 : bf1[20] = bf0[20];
3520 0 : bf1[21] = bf0[21];
3521 0 : bf1[22] = half_btf(cospi[32], bf0[22], cospi[32], bf0[23], cos_bit);
3522 0 : bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[22], cos_bit);
3523 0 : bf1[24] = bf0[24];
3524 0 : bf1[25] = bf0[25];
3525 0 : bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[27], cos_bit);
3526 0 : bf1[27] = half_btf(-cospi[32], bf0[27], cospi[32], bf0[26], cos_bit);
3527 0 : bf1[28] = bf0[28];
3528 0 : bf1[29] = bf0[29];
3529 0 : bf1[30] = half_btf(cospi[32], bf0[30], cospi[32], bf0[31], cos_bit);
3530 0 : bf1[31] = half_btf(-cospi[32], bf0[31], cospi[32], bf0[30], cos_bit);
3531 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3532 :
3533 : // stage 11
3534 0 : stage++;
3535 0 : bf0 = step;
3536 0 : bf1 = output;
3537 0 : bf1[0] = bf0[0];
3538 0 : bf1[1] = -bf0[16];
3539 0 : bf1[2] = bf0[24];
3540 0 : bf1[3] = -bf0[8];
3541 0 : bf1[4] = bf0[12];
3542 0 : bf1[5] = -bf0[28];
3543 0 : bf1[6] = bf0[20];
3544 0 : bf1[7] = -bf0[4];
3545 0 : bf1[8] = bf0[6];
3546 0 : bf1[9] = -bf0[22];
3547 0 : bf1[10] = bf0[30];
3548 0 : bf1[11] = -bf0[14];
3549 0 : bf1[12] = bf0[10];
3550 0 : bf1[13] = -bf0[26];
3551 0 : bf1[14] = bf0[18];
3552 0 : bf1[15] = -bf0[2];
3553 0 : bf1[16] = bf0[3];
3554 0 : bf1[17] = -bf0[19];
3555 0 : bf1[18] = bf0[27];
3556 0 : bf1[19] = -bf0[11];
3557 0 : bf1[20] = bf0[15];
3558 0 : bf1[21] = -bf0[31];
3559 0 : bf1[22] = bf0[23];
3560 0 : bf1[23] = -bf0[7];
3561 0 : bf1[24] = bf0[5];
3562 0 : bf1[25] = -bf0[21];
3563 0 : bf1[26] = bf0[29];
3564 0 : bf1[27] = -bf0[13];
3565 0 : bf1[28] = bf0[9];
3566 0 : bf1[29] = -bf0[25];
3567 0 : bf1[30] = bf0[17];
3568 0 : bf1[31] = -bf0[1];
3569 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3570 0 : }
3571 :
3572 0 : void eb_av1_fidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
3573 : const int8_t *stage_range) {
3574 : (void)cos_bit;
3575 0 : for (int32_t i = 0; i < 4; ++i)
3576 0 : output[i] = round_shift((int64_t)input[i] * NewSqrt2, NewSqrt2Bits);
3577 0 : assert(stage_range[0] + NewSqrt2Bits <= 32);
3578 : range_check(0, input, output, 4, stage_range[0]);
3579 0 : }
3580 :
3581 0 : void eb_av1_fidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,
3582 : const int8_t *stage_range) {
3583 : (void)cos_bit;
3584 0 : for (int32_t i = 0; i < 8; ++i) output[i] = input[i] * 2;
3585 : range_check(0, input, output, 8, stage_range[0]);
3586 0 : }
3587 :
3588 0 : void eb_av1_fidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit,
3589 : const int8_t *stage_range) {
3590 : (void)cos_bit;
3591 0 : for (int32_t i = 0; i < 16; ++i)
3592 0 : output[i] = round_shift((int64_t)input[i] * 2 * NewSqrt2, NewSqrt2Bits);
3593 0 : assert(stage_range[0] + NewSqrt2Bits <= 32);
3594 : range_check(0, input, output, 16, stage_range[0]);
3595 0 : }
3596 :
3597 0 : void eb_av1_fidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit,
3598 : const int8_t *stage_range) {
3599 : (void)cos_bit;
3600 0 : for (int32_t i = 0; i < 32; ++i) output[i] = input[i] * 4;
3601 : range_check(0, input, output, 32, stage_range[0]);
3602 0 : }
3603 :
3604 0 : void av1_fidentity64_c(const int32_t *input, int32_t *output, int8_t cos_bit,
3605 : const int8_t *stage_range) {
3606 : (void)cos_bit;
3607 0 : for (int32_t i = 0; i < 64; ++i)
3608 0 : output[i] = round_shift((int64_t)input[i] * 4 * NewSqrt2, NewSqrt2Bits);
3609 0 : assert(stage_range[0] + NewSqrt2Bits <= 32);
3610 : range_check(0, input, output, 64, stage_range[0]);
3611 0 : }
3612 :
3613 3833360 : static INLINE TxfmFunc fwd_txfm_type_to_func(TxfmType TxfmType) {
3614 3833360 : switch (TxfmType) {
3615 0 : case TXFM_TYPE_DCT4: return eb_av1_fdct4_new;
3616 0 : case TXFM_TYPE_DCT8: return eb_av1_fdct8_new;
3617 1916700 : case TXFM_TYPE_DCT16: return eb_av1_fdct16_new;
3618 1916710 : case TXFM_TYPE_DCT32: return eb_av1_fdct32_new;
3619 0 : case TXFM_TYPE_DCT64: return eb_av1_fdct64_new;
3620 0 : case TXFM_TYPE_ADST4: return eb_av1_fadst4_new;
3621 0 : case TXFM_TYPE_ADST8: return eb_av1_fadst8_new;
3622 0 : case TXFM_TYPE_ADST16: return eb_av1_fadst16_new;
3623 0 : case TXFM_TYPE_ADST32: return av1_fadst32_new;
3624 0 : case TXFM_TYPE_IDENTITY4: return eb_av1_fidentity4_c;
3625 0 : case TXFM_TYPE_IDENTITY8: return eb_av1_fidentity8_c;
3626 0 : case TXFM_TYPE_IDENTITY16: return eb_av1_fidentity16_c;
3627 0 : case TXFM_TYPE_IDENTITY32: return eb_av1_fidentity32_c;
3628 0 : case TXFM_TYPE_IDENTITY64: return av1_fidentity64_c;
3629 0 : default: assert(0); return NULL;
3630 : }
3631 : }
3632 :
3633 152882000 : void eb_av1_round_shift_array_c(int32_t *arr, int32_t size, int32_t bit) {
3634 : int32_t i;
3635 152882000 : if (bit == 0)
3636 30633500 : return;
3637 : else {
3638 122248000 : if (bit > 0) {
3639 1037510000 : for (i = 0; i < size; i++)
3640 976355000 : arr[i] = round_shift(arr[i], bit);
3641 : }
3642 : else {
3643 1040080000 : for (i = 0; i < size; i++)
3644 979090000 : arr[i] = arr[i] * (1 << (-bit));
3645 : }
3646 : }
3647 : }
3648 : //fwd_txfm2d_c
3649 1916680 : static INLINE void Av1TranformTwoDCore_c(
3650 : int16_t *input,
3651 : uint32_t input_stride,
3652 : int32_t *output,
3653 : const Txfm2DFlipCfg *cfg,
3654 : int32_t *buf,
3655 : uint8_t bit_depth)
3656 : {
3657 : int32_t c, r;
3658 : // Note when assigning txfm_size_col, we use the txfm_size from the
3659 : // row configuration and vice versa. This is intentionally done to
3660 : // accurately perform rectangular transforms. When the transform is
3661 : // rectangular, the number of columns will be the same as the
3662 : // txfm_size stored in the row cfg struct. It will make no difference
3663 : // for square transforms.
3664 1916680 : const int32_t txfm_size_col = tx_size_wide[cfg->tx_size];
3665 1916680 : const int32_t txfm_size_row = tx_size_high[cfg->tx_size];
3666 : // Take the shift from the larger dimension in the rectangular case.
3667 1916680 : const int8_t *shift = cfg->shift;
3668 1916680 : const int32_t rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
3669 : int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
3670 : int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
3671 1916680 : assert(cfg->stage_num_col <= MAX_TXFM_STAGE_NUM);
3672 1916680 : assert(cfg->stage_num_row <= MAX_TXFM_STAGE_NUM);
3673 1916680 : eb_av1_gen_fwd_stage_range(stage_range_col, stage_range_row, cfg, bit_depth);
3674 :
3675 1916700 : const int8_t cos_bit_col = cfg->cos_bit_col;
3676 1916700 : const int8_t cos_bit_row = cfg->cos_bit_row;
3677 1916700 : const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col);
3678 1916700 : const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->txfm_type_row);
3679 : ASSERT(txfm_func_col != NULL);
3680 : ASSERT(txfm_func_row != NULL);
3681 : // use output buffer as temp buffer
3682 1977540 : int32_t *temp_in = output;
3683 1977540 : int32_t *temp_out = output + txfm_size_row;
3684 :
3685 : // Columns
3686 63146700 : for (c = 0; c < txfm_size_col; ++c) {
3687 61230000 : if (cfg->ud_flip == 0)
3688 1039890000 : for (r = 0; r < txfm_size_row; ++r) temp_in[r] = input[r * input_stride + c];
3689 : else {
3690 0 : for (r = 0; r < txfm_size_row; ++r)
3691 : // flip upside down
3692 0 : temp_in[r] = input[(txfm_size_row - r - 1) * input_stride + c];
3693 : }
3694 61230000 : eb_av1_round_shift_array_c(temp_in, txfm_size_row, -shift[0]); // NM eb_av1_round_shift_array_c
3695 61218600 : txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
3696 61246600 : eb_av1_round_shift_array_c(temp_out, txfm_size_row, -shift[1]); // NM eb_av1_round_shift_array_c
3697 61169100 : if (cfg->lr_flip == 0) {
3698 1039700000 : for (r = 0; r < txfm_size_row; ++r)
3699 978529000 : buf[r * txfm_size_col + c] = temp_out[r];
3700 : }
3701 : else {
3702 0 : for (r = 0; r < txfm_size_row; ++r)
3703 : // flip from left to right
3704 0 : buf[r * txfm_size_col + (txfm_size_col - c - 1)] = temp_out[r];
3705 : }
3706 : }
3707 :
3708 : // Rows
3709 32526000 : for (r = 0; r < txfm_size_row; ++r) {
3710 30615200 : txfm_func_row(buf + r * txfm_size_col,
3711 30615200 : output + r * txfm_size_col,
3712 : cos_bit_row,
3713 : stage_range_row);
3714 30633600 : eb_av1_round_shift_array_c(output + r * txfm_size_col, txfm_size_col, -shift[2]);
3715 :
3716 30702000 : if (abs(rect_type) == 1) {
3717 : // Multiply everything by Sqrt2 if the transform is rectangular and the
3718 : // size difference is a factor of 2.
3719 1004890000 : for (c = 0; c < txfm_size_col; ++c) {
3720 974261000 : output[r * txfm_size_col + c] = round_shift(
3721 974354000 : (int64_t)output[r * txfm_size_col + c] * NewSqrt2, NewSqrt2Bits);
3722 : }
3723 : }
3724 : }
3725 1910800 : }
3726 :
3727 0 : void av1_round_shift_array_pf_c(int32_t *arr_in, int32_t *arr_out, int32_t size, int32_t bit) {
3728 : int32_t i;
3729 0 : if (bit == 0) {
3730 0 : for (i = 0; i < size; i++)
3731 0 : arr_out[i] = arr_in[i];
3732 : }
3733 : else {
3734 0 : if (bit > 0) {
3735 0 : for (i = 0; i < size; i++)
3736 0 : arr_out[i] = round_shift(arr_in[i], bit);
3737 : }
3738 : else {
3739 0 : for (i = 0; i < size; i++)
3740 0 : arr_out[i] = arr_in[i] * (1 << (-bit));
3741 : }
3742 : }
3743 0 : }
3744 0 : void av1_fdct32_pf_new(const int32_t *input, int32_t *output, int8_t cos_bit,
3745 : const int8_t *stage_range) {
3746 0 : const int32_t size = 32;
3747 : const int32_t *cospi;
3748 :
3749 0 : int32_t stage = 0;
3750 : int32_t *bf0, *bf1;
3751 : int32_t step[32];
3752 :
3753 : // stage 0;
3754 0 : range_check(stage, input, input, size, stage_range[stage]);
3755 :
3756 : // stage 1;
3757 0 : stage++;
3758 0 : bf1 = output;
3759 0 : bf1[0] = input[0] + input[31];
3760 0 : bf1[1] = input[1] + input[30];
3761 0 : bf1[2] = input[2] + input[29];
3762 0 : bf1[3] = input[3] + input[28];
3763 0 : bf1[4] = input[4] + input[27];
3764 0 : bf1[5] = input[5] + input[26];
3765 0 : bf1[6] = input[6] + input[25];
3766 0 : bf1[7] = input[7] + input[24];
3767 0 : bf1[8] = input[8] + input[23];
3768 0 : bf1[9] = input[9] + input[22];
3769 0 : bf1[10] = input[10] + input[21];
3770 0 : bf1[11] = input[11] + input[20];
3771 0 : bf1[12] = input[12] + input[19];
3772 0 : bf1[13] = input[13] + input[18];
3773 0 : bf1[14] = input[14] + input[17];
3774 0 : bf1[15] = input[15] + input[16];
3775 0 : bf1[16] = -input[16] + input[15];
3776 0 : bf1[17] = -input[17] + input[14];
3777 0 : bf1[18] = -input[18] + input[13];
3778 0 : bf1[19] = -input[19] + input[12];
3779 0 : bf1[20] = -input[20] + input[11];
3780 0 : bf1[21] = -input[21] + input[10];
3781 0 : bf1[22] = -input[22] + input[9];
3782 0 : bf1[23] = -input[23] + input[8];
3783 0 : bf1[24] = -input[24] + input[7];
3784 0 : bf1[25] = -input[25] + input[6];
3785 0 : bf1[26] = -input[26] + input[5];
3786 0 : bf1[27] = -input[27] + input[4];
3787 0 : bf1[28] = -input[28] + input[3];
3788 0 : bf1[29] = -input[29] + input[2];
3789 0 : bf1[30] = -input[30] + input[1];
3790 0 : bf1[31] = -input[31] + input[0];
3791 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3792 :
3793 : // stage 2
3794 0 : stage++;
3795 0 : cospi = cospi_arr(cos_bit);
3796 0 : bf0 = output;
3797 0 : bf1 = step;
3798 0 : bf1[0] = bf0[0] + bf0[15];
3799 0 : bf1[1] = bf0[1] + bf0[14];
3800 0 : bf1[2] = bf0[2] + bf0[13];
3801 0 : bf1[3] = bf0[3] + bf0[12];
3802 0 : bf1[4] = bf0[4] + bf0[11];
3803 0 : bf1[5] = bf0[5] + bf0[10];
3804 0 : bf1[6] = bf0[6] + bf0[9];
3805 0 : bf1[7] = bf0[7] + bf0[8];
3806 0 : bf1[8] = -bf0[8] + bf0[7];
3807 0 : bf1[9] = -bf0[9] + bf0[6];
3808 0 : bf1[10] = -bf0[10] + bf0[5];
3809 0 : bf1[11] = -bf0[11] + bf0[4];
3810 0 : bf1[12] = -bf0[12] + bf0[3];
3811 0 : bf1[13] = -bf0[13] + bf0[2];
3812 0 : bf1[14] = -bf0[14] + bf0[1];
3813 0 : bf1[15] = -bf0[15] + bf0[0];
3814 0 : bf1[16] = bf0[16];
3815 0 : bf1[17] = bf0[17];
3816 0 : bf1[18] = bf0[18];
3817 0 : bf1[19] = bf0[19];
3818 0 : bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
3819 0 : bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
3820 0 : bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
3821 0 : bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
3822 0 : bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit);
3823 0 : bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit);
3824 0 : bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit);
3825 0 : bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit);
3826 0 : bf1[28] = bf0[28];
3827 0 : bf1[29] = bf0[29];
3828 0 : bf1[30] = bf0[30];
3829 0 : bf1[31] = bf0[31];
3830 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3831 :
3832 : // stage 3
3833 0 : stage++;
3834 0 : cospi = cospi_arr(cos_bit);
3835 0 : bf0 = step;
3836 0 : bf1 = output;
3837 0 : bf1[0] = bf0[0] + bf0[7];
3838 0 : bf1[1] = bf0[1] + bf0[6];
3839 0 : bf1[2] = bf0[2] + bf0[5];
3840 0 : bf1[3] = bf0[3] + bf0[4];
3841 0 : bf1[4] = -bf0[4] + bf0[3];
3842 0 : bf1[5] = -bf0[5] + bf0[2];
3843 0 : bf1[6] = -bf0[6] + bf0[1];
3844 0 : bf1[7] = -bf0[7] + bf0[0];
3845 0 : bf1[8] = bf0[8];
3846 0 : bf1[9] = bf0[9];
3847 0 : bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
3848 0 : bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
3849 0 : bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit);
3850 0 : bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit);
3851 0 : bf1[14] = bf0[14];
3852 0 : bf1[15] = bf0[15];
3853 0 : bf1[16] = bf0[16] + bf0[23];
3854 0 : bf1[17] = bf0[17] + bf0[22];
3855 0 : bf1[18] = bf0[18] + bf0[21];
3856 0 : bf1[19] = bf0[19] + bf0[20];
3857 0 : bf1[20] = -bf0[20] + bf0[19];
3858 0 : bf1[21] = -bf0[21] + bf0[18];
3859 0 : bf1[22] = -bf0[22] + bf0[17];
3860 0 : bf1[23] = -bf0[23] + bf0[16];
3861 0 : bf1[24] = -bf0[24] + bf0[31];
3862 0 : bf1[25] = -bf0[25] + bf0[30];
3863 0 : bf1[26] = -bf0[26] + bf0[29];
3864 0 : bf1[27] = -bf0[27] + bf0[28];
3865 0 : bf1[28] = bf0[28] + bf0[27];
3866 0 : bf1[29] = bf0[29] + bf0[26];
3867 0 : bf1[30] = bf0[30] + bf0[25];
3868 0 : bf1[31] = bf0[31] + bf0[24];
3869 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3870 :
3871 : // stage 4
3872 0 : stage++;
3873 0 : cospi = cospi_arr(cos_bit);
3874 0 : bf0 = output;
3875 0 : bf1 = step;
3876 0 : bf1[0] = bf0[0] + bf0[3];
3877 0 : bf1[1] = bf0[1] + bf0[2];
3878 0 : bf1[2] = -bf0[2] + bf0[1];
3879 0 : bf1[3] = -bf0[3] + bf0[0];
3880 0 : bf1[4] = bf0[4];
3881 0 : bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
3882 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit);
3883 0 : bf1[7] = bf0[7];
3884 0 : bf1[8] = bf0[8] + bf0[11];
3885 0 : bf1[9] = bf0[9] + bf0[10];
3886 0 : bf1[10] = -bf0[10] + bf0[9];
3887 0 : bf1[11] = -bf0[11] + bf0[8];
3888 0 : bf1[12] = -bf0[12] + bf0[15];
3889 0 : bf1[13] = -bf0[13] + bf0[14];
3890 0 : bf1[14] = bf0[14] + bf0[13];
3891 0 : bf1[15] = bf0[15] + bf0[12];
3892 0 : bf1[16] = bf0[16];
3893 0 : bf1[17] = bf0[17];
3894 0 : bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit);
3895 0 : bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit);
3896 0 : bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit);
3897 0 : bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit);
3898 0 : bf1[22] = bf0[22];
3899 0 : bf1[23] = bf0[23];
3900 0 : bf1[24] = bf0[24];
3901 0 : bf1[25] = bf0[25];
3902 0 : bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit);
3903 0 : bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit);
3904 0 : bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit);
3905 0 : bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit);
3906 0 : bf1[30] = bf0[30];
3907 0 : bf1[31] = bf0[31];
3908 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3909 :
3910 : // stage 5
3911 0 : stage++;
3912 0 : cospi = cospi_arr(cos_bit);
3913 0 : bf0 = step;
3914 0 : bf1 = output;
3915 0 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
3916 : //bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit);
3917 0 : bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit);
3918 : //bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit);
3919 0 : bf1[4] = bf0[4] + bf0[5];
3920 0 : bf1[5] = -bf0[5] + bf0[4];
3921 0 : bf1[6] = -bf0[6] + bf0[7];
3922 0 : bf1[7] = bf0[7] + bf0[6];
3923 0 : bf1[8] = bf0[8];
3924 0 : bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
3925 0 : bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
3926 0 : bf1[11] = bf0[11];
3927 0 : bf1[12] = bf0[12];
3928 0 : bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit);
3929 0 : bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit);
3930 0 : bf1[15] = bf0[15];
3931 0 : bf1[16] = bf0[16] + bf0[19];
3932 0 : bf1[17] = bf0[17] + bf0[18];
3933 0 : bf1[18] = -bf0[18] + bf0[17];
3934 0 : bf1[19] = -bf0[19] + bf0[16];
3935 0 : bf1[20] = -bf0[20] + bf0[23];
3936 0 : bf1[21] = -bf0[21] + bf0[22];
3937 0 : bf1[22] = bf0[22] + bf0[21];
3938 0 : bf1[23] = bf0[23] + bf0[20];
3939 0 : bf1[24] = bf0[24] + bf0[27];
3940 0 : bf1[25] = bf0[25] + bf0[26];
3941 0 : bf1[26] = -bf0[26] + bf0[25];
3942 0 : bf1[27] = -bf0[27] + bf0[24];
3943 0 : bf1[28] = -bf0[28] + bf0[31];
3944 0 : bf1[29] = -bf0[29] + bf0[30];
3945 0 : bf1[30] = bf0[30] + bf0[29];
3946 0 : bf1[31] = bf0[31] + bf0[28];
3947 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3948 :
3949 : // stage 6
3950 0 : stage++;
3951 0 : cospi = cospi_arr(cos_bit);
3952 0 : bf0 = output;
3953 0 : bf1 = step;
3954 0 : bf1[0] = bf0[0];
3955 : //bf1[1] = bf0[1];
3956 0 : bf1[2] = bf0[2];
3957 : //bf1[3] = bf0[3];
3958 0 : bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit);
3959 : //bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit);
3960 0 : bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit);
3961 : //bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit);
3962 0 : bf1[8] = bf0[8] + bf0[9];
3963 0 : bf1[9] = -bf0[9] + bf0[8];
3964 0 : bf1[10] = -bf0[10] + bf0[11];
3965 0 : bf1[11] = bf0[11] + bf0[10];
3966 0 : bf1[12] = bf0[12] + bf0[13];
3967 0 : bf1[13] = -bf0[13] + bf0[12];
3968 0 : bf1[14] = -bf0[14] + bf0[15];
3969 0 : bf1[15] = bf0[15] + bf0[14];
3970 0 : bf1[16] = bf0[16];
3971 0 : bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit);
3972 0 : bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit);
3973 0 : bf1[19] = bf0[19];
3974 0 : bf1[20] = bf0[20];
3975 0 : bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit);
3976 0 : bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit);
3977 0 : bf1[23] = bf0[23];
3978 0 : bf1[24] = bf0[24];
3979 0 : bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit);
3980 0 : bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit);
3981 0 : bf1[27] = bf0[27];
3982 0 : bf1[28] = bf0[28];
3983 0 : bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit);
3984 0 : bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit);
3985 0 : bf1[31] = bf0[31];
3986 0 : range_check(stage, input, bf1, size, stage_range[stage]);
3987 :
3988 : // stage 7
3989 0 : stage++;
3990 0 : cospi = cospi_arr(cos_bit);
3991 0 : bf0 = step;
3992 0 : bf1 = output;
3993 0 : bf1[0] = bf0[0];
3994 : //bf1[1] = bf0[1];
3995 0 : bf1[2] = bf0[2];
3996 : //bf1[3] = bf0[3];
3997 0 : bf1[4] = bf0[4];
3998 : //bf1[5] = bf0[5];
3999 0 : bf1[6] = bf0[6];
4000 : //bf1[7] = bf0[7];
4001 0 : bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit);
4002 : //bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit);
4003 0 : bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit);
4004 : //bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit);
4005 0 : bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit);
4006 : //bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit);
4007 0 : bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit);
4008 : //bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit);
4009 0 : bf1[16] = bf0[16] + bf0[17];
4010 0 : bf1[17] = -bf0[17] + bf0[16];
4011 0 : bf1[18] = -bf0[18] + bf0[19];
4012 0 : bf1[19] = bf0[19] + bf0[18];
4013 0 : bf1[20] = bf0[20] + bf0[21];
4014 0 : bf1[21] = -bf0[21] + bf0[20];
4015 0 : bf1[22] = -bf0[22] + bf0[23];
4016 0 : bf1[23] = bf0[23] + bf0[22];
4017 0 : bf1[24] = bf0[24] + bf0[25];
4018 0 : bf1[25] = -bf0[25] + bf0[24];
4019 0 : bf1[26] = -bf0[26] + bf0[27];
4020 0 : bf1[27] = bf0[27] + bf0[26];
4021 0 : bf1[28] = bf0[28] + bf0[29];
4022 0 : bf1[29] = -bf0[29] + bf0[28];
4023 0 : bf1[30] = -bf0[30] + bf0[31];
4024 0 : bf1[31] = bf0[31] + bf0[30];
4025 0 : range_check(stage, input, bf1, size, stage_range[stage]);
4026 :
4027 : // stage 8
4028 0 : stage++;
4029 0 : cospi = cospi_arr(cos_bit);
4030 0 : bf0 = output;
4031 0 : bf1 = step;
4032 0 : bf1[0] = bf0[0];
4033 : //bf1[1] = bf0[1];
4034 0 : bf1[2] = bf0[2];
4035 : //bf1[3] = bf0[3];
4036 0 : bf1[4] = bf0[4];
4037 : //bf1[5] = bf0[5];
4038 0 : bf1[6] = bf0[6];
4039 : //bf1[7] = bf0[7];
4040 0 : bf1[8] = bf0[8];
4041 : //bf1[9] = bf0[9];
4042 0 : bf1[10] = bf0[10];
4043 : //bf1[11] = bf0[11];
4044 0 : bf1[12] = bf0[12];
4045 : //bf1[13] = bf0[13];
4046 0 : bf1[14] = bf0[14];
4047 : //bf1[15] = bf0[15];
4048 0 : bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit);
4049 : //bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit);
4050 0 : bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit);
4051 : //bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit);
4052 0 : bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit);
4053 : //bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit);
4054 0 : bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit);
4055 : //bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit);
4056 0 : bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit);
4057 : //bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit);
4058 0 : bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit);
4059 : //bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit);
4060 0 : bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit);
4061 : //bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit);
4062 0 : bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit);
4063 : //bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit);
4064 0 : range_check(stage, input, bf1, size, stage_range[stage]);
4065 :
4066 : // stage 9
4067 0 : stage++;
4068 0 : bf0 = step;
4069 0 : bf1 = output;
4070 0 : bf1[0] = bf0[0];
4071 0 : bf1[1] = bf0[16];
4072 0 : bf1[2] = bf0[8];
4073 0 : bf1[3] = bf0[24];
4074 0 : bf1[4] = bf0[4];
4075 0 : bf1[5] = bf0[20];
4076 0 : bf1[6] = bf0[12];
4077 0 : bf1[7] = bf0[28];
4078 0 : bf1[8] = bf0[2];
4079 0 : bf1[9] = bf0[18];
4080 0 : bf1[10] = bf0[10];
4081 0 : bf1[11] = bf0[26];
4082 0 : bf1[12] = bf0[6];
4083 0 : bf1[13] = bf0[22];
4084 0 : bf1[14] = bf0[14];
4085 0 : bf1[15] = bf0[30];
4086 : /* bf1[16] = bf0[1];
4087 : bf1[17] = bf0[17];
4088 : bf1[18] = bf0[9];
4089 : bf1[19] = bf0[25];
4090 : bf1[20] = bf0[5];
4091 : bf1[21] = bf0[21];
4092 : bf1[22] = bf0[13];
4093 : bf1[23] = bf0[29];
4094 : bf1[24] = bf0[3];
4095 : bf1[25] = bf0[19];
4096 : bf1[26] = bf0[11];
4097 : bf1[27] = bf0[27];
4098 : bf1[28] = bf0[7];
4099 : bf1[29] = bf0[23];
4100 : bf1[30] = bf0[15];
4101 : bf1[31] = bf0[31];*/
4102 0 : range_check(stage, input, bf1, size, stage_range[stage]);
4103 0 : }
4104 0 : static INLINE TxfmFunc fwd_txfm_pf_type_to_func(TxfmType TxfmType) {
4105 0 : switch (TxfmType) {
4106 0 : case TXFM_TYPE_DCT4: return eb_av1_fdct4_new;
4107 0 : case TXFM_TYPE_DCT8: return eb_av1_fdct8_new;
4108 0 : case TXFM_TYPE_DCT16: return eb_av1_fdct16_new;
4109 0 : case TXFM_TYPE_DCT32: return av1_fdct32_pf_new;
4110 0 : case TXFM_TYPE_DCT64: return eb_av1_fdct64_new;
4111 0 : case TXFM_TYPE_ADST4: return eb_av1_fadst4_new;
4112 0 : case TXFM_TYPE_ADST8: return eb_av1_fadst8_new;
4113 0 : case TXFM_TYPE_ADST16: return eb_av1_fadst16_new;
4114 0 : case TXFM_TYPE_ADST32: return av1_fadst32_new;
4115 0 : case TXFM_TYPE_IDENTITY4: return eb_av1_fidentity4_c;
4116 0 : case TXFM_TYPE_IDENTITY8: return eb_av1_fidentity8_c;
4117 0 : case TXFM_TYPE_IDENTITY16: return eb_av1_fidentity16_c;
4118 0 : case TXFM_TYPE_IDENTITY32: return eb_av1_fidentity32_c;
4119 0 : case TXFM_TYPE_IDENTITY64: return av1_fidentity64_c;
4120 0 : default: assert(0); return NULL;
4121 : }
4122 : }
4123 0 : static INLINE void Av1TranformTwoDCore_pf_c(
4124 : int16_t *input,
4125 : uint32_t inputStride,
4126 : int32_t *output,
4127 : const Txfm2DFlipCfg *cfg,
4128 : int32_t *buf,
4129 : uint8_t bit_depth)
4130 : {
4131 : int32_t c, r;
4132 : // Note when assigning txfm_size_col, we use the txfm_size from the
4133 : // row configuration and vice versa. This is intentionally done to
4134 : // accurately perform rectangular transforms. When the transform is
4135 : // rectangular, the number of columns will be the same as the
4136 : // txfm_size stored in the row cfg struct. It will make no difference
4137 : // for square transforms.
4138 0 : const int32_t txfm_size_col = tx_size_wide[cfg->tx_size];
4139 0 : const int32_t txfm_size_row = tx_size_high[cfg->tx_size];
4140 : // Take the shift from the larger dimension in the rectangular case.
4141 0 : const int8_t *shift = cfg->shift;
4142 0 : const int32_t rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
4143 : int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
4144 : int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
4145 0 : assert(cfg->stage_num_col <= MAX_TXFM_STAGE_NUM);
4146 0 : assert(cfg->stage_num_row <= MAX_TXFM_STAGE_NUM);
4147 0 : eb_av1_gen_fwd_stage_range(stage_range_col, stage_range_row, cfg, bit_depth);
4148 :
4149 0 : const int8_t cos_bit_col = cfg->cos_bit_col;
4150 0 : const int8_t cos_bit_row = cfg->cos_bit_row;
4151 0 : const TxfmFunc txfm_func_col = fwd_txfm_pf_type_to_func(cfg->txfm_type_col);
4152 0 : const TxfmFunc txfm_func_row = fwd_txfm_pf_type_to_func(cfg->txfm_type_row);
4153 : ASSERT(txfm_func_col != NULL);
4154 : ASSERT(txfm_func_row != NULL);
4155 : // use output buffer as temp buffer
4156 : /* int32_t *temp_in = output;
4157 : int32_t *temp_out = output + txfm_size_row;*/
4158 : int32_t temp_in[32];
4159 : int32_t temp_out[32];
4160 :
4161 : // Columns
4162 0 : for (c = 0; c < txfm_size_col; ++c) {
4163 0 : if (cfg->ud_flip == 0)
4164 0 : for (r = 0; r < txfm_size_row; ++r) temp_in[r] = input[r * inputStride + c];
4165 : else {
4166 0 : for (r = 0; r < txfm_size_row; ++r)
4167 : // flip upside down
4168 0 : temp_in[r] = input[(txfm_size_row - r - 1) * inputStride + c];
4169 : }
4170 0 : eb_av1_round_shift_array_c(temp_in, txfm_size_row, -shift[0]); // NM eb_av1_round_shift_array_c
4171 0 : txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
4172 0 : eb_av1_round_shift_array_c(temp_out, 16/*txfm_size_row*/, -shift[1]); // NM eb_av1_round_shift_array_c
4173 0 : if (cfg->lr_flip == 0) {
4174 0 : for (r = 0; r < txfm_size_row; ++r)
4175 0 : buf[r * txfm_size_col + c] = temp_out[r];
4176 : }
4177 : else {
4178 0 : for (r = 0; r < txfm_size_row; ++r)
4179 : // flip from left to right
4180 0 : buf[r * txfm_size_col + (txfm_size_col - c - 1)] = temp_out[r];
4181 : }
4182 : }
4183 :
4184 : // Rows
4185 0 : for (r = 0; r < 16/*txfm_size_row*/; ++r) {
4186 0 : txfm_func_row(buf + r * txfm_size_col,
4187 : temp_out, //output + r * txfm_size_col,//
4188 : cos_bit_row,
4189 : stage_range_row);
4190 0 : av1_round_shift_array_pf_c(temp_out, output + r * txfm_size_col, 16/*txfm_size_col*/, -shift[2]);
4191 :
4192 0 : if (abs(rect_type) == 1) {
4193 : // Multiply everything by Sqrt2 if the transform is rectangular and the
4194 : // size difference is a factor of 2.
4195 0 : for (c = 0; c < txfm_size_col; ++c) {
4196 0 : output[r * txfm_size_col + c] = round_shift(
4197 0 : (int64_t)output[r * txfm_size_col + c] * NewSqrt2, NewSqrt2Bits);
4198 : }
4199 : }
4200 : }
4201 0 : }
4202 :
4203 3630020 : static INLINE void set_flip_cfg(TxType tx_type, Txfm2DFlipCfg *cfg) {
4204 3630020 : get_flip_cfg(tx_type, &cfg->ud_flip, &cfg->lr_flip);
4205 3630020 : }
4206 3630020 : static INLINE void set_fwd_txfm_non_scale_range(Txfm2DFlipCfg *cfg) {
4207 3630020 : const int32_t txh_idx = get_txh_idx(cfg->tx_size);
4208 3630030 : av1_zero(cfg->stage_range_col);
4209 3630030 : av1_zero(cfg->stage_range_row);
4210 3630030 : assert(cfg->txfm_type_col < TXFM_TYPES);
4211 3630030 : if (cfg->txfm_type_col != TXFM_TYPE_INVALID) {
4212 3630080 : int32_t stage_num_col = cfg->stage_num_col;
4213 3630080 : const int8_t *range_mult2_col =
4214 3630080 : fwd_txfm_range_mult2_list[cfg->txfm_type_col];
4215 35277400 : for (int32_t i = 0; i < stage_num_col; ++i)
4216 31647300 : cfg->stage_range_col[i] = (range_mult2_col[i] + 1) >> 1;
4217 : }
4218 :
4219 3630030 : if (cfg->txfm_type_row != TXFM_TYPE_INVALID) {
4220 3630130 : int32_t stage_num_row = cfg->stage_num_row;
4221 3630130 : assert(cfg->txfm_type_row < TXFM_TYPES);
4222 3630130 : const int8_t *range_mult2_row =
4223 3630130 : fwd_txfm_range_mult2_list[cfg->txfm_type_row];
4224 39110500 : for (int32_t i = 0; i < stage_num_row; ++i)
4225 35480300 : cfg->stage_range_row[i] =
4226 35480300 : (max_fwd_range_mult2_col[txh_idx] + range_mult2_row[i] + 1) >> 1;
4227 : }
4228 3630030 : }
4229 :
4230 3630040 : void Av1TransformConfig(
4231 : TxType tx_type,
4232 : TxSize tx_size,
4233 : Txfm2DFlipCfg *cfg)
4234 : {
4235 3630040 : assert(cfg != NULL);
4236 3630040 : cfg->tx_size = tx_size;
4237 3630040 : set_flip_cfg(tx_type, cfg);
4238 3630040 : const TxType1D tx_type_1d_col = vtx_tab[tx_type];
4239 3630040 : const TxType1D tx_type_1d_row = htx_tab[tx_type];
4240 3630040 : const int32_t txw_idx = tx_size_wide_log2[tx_size] - tx_size_wide_log2[0];
4241 3630040 : const int32_t txh_idx = tx_size_high_log2[tx_size] - tx_size_high_log2[0];
4242 3630040 : cfg->shift = fwd_txfm_shift_ls[tx_size];
4243 3630040 : cfg->cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
4244 3630040 : cfg->cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
4245 3630040 : cfg->txfm_type_col = av1_txfm_type_ls[txh_idx][tx_type_1d_col];
4246 3630040 : cfg->txfm_type_row = av1_txfm_type_ls[txw_idx][tx_type_1d_row];
4247 3630040 : cfg->stage_num_col = av1_txfm_stage_num_list[cfg->txfm_type_col];
4248 3630040 : cfg->stage_num_row = av1_txfm_stage_num_list[cfg->txfm_type_row];
4249 3630040 : set_fwd_txfm_non_scale_range(cfg);
4250 3630110 : }
4251 :
4252 0 : static uint64_t EnergyComputation(
4253 : int32_t *coeff,
4254 : uint32_t coeff_stride,
4255 : uint32_t area_width,
4256 : uint32_t area_height)
4257 : {
4258 : uint32_t columnIndex;
4259 0 : uint32_t row_index = 0;
4260 0 : uint64_t predictionDistortion = 0;
4261 :
4262 0 : while (row_index < area_height) {
4263 0 : columnIndex = 0;
4264 0 : while (columnIndex < area_width) {
4265 0 : predictionDistortion += (int64_t)SQR((int64_t)(coeff[columnIndex]));
4266 0 : ++columnIndex;
4267 : }
4268 :
4269 0 : coeff += coeff_stride;
4270 0 : ++row_index;
4271 : }
4272 :
4273 0 : return predictionDistortion;
4274 : }
4275 :
4276 0 : uint64_t HandleTransform64x64_c(int32_t *output) {
4277 : uint64_t three_quad_energy;
4278 :
4279 : // top - right 32x32 area.
4280 0 : three_quad_energy = EnergyComputation(output + 32, 64, 32, 32);
4281 : //bottom 64x32 area.
4282 0 : three_quad_energy += EnergyComputation(output + 32 * 64, 64, 64, 32);
4283 :
4284 : // Zero out top-right 32x32 area.
4285 0 : for (int32_t row = 0; row < 32; ++row)
4286 0 : memset(output + row * 64 + 32, 0, 32 * sizeof(*output));
4287 :
4288 : // Zero out the bottom 64x32 area.
4289 0 : memset(output + 32 * 64, 0, 32 * 64 * sizeof(*output));
4290 :
4291 : // Re-pack non-zero coeffs in the first 32x32 indices.
4292 0 : for (int32_t row = 1; row < 32; ++row)
4293 0 : memcpy(output + row * 32, output + row * 64, 32 * sizeof(*output));
4294 :
4295 0 : return three_quad_energy;
4296 : }
4297 :
4298 0 : void Av1TransformTwoD_64x64_c(
4299 : int16_t *input,
4300 : int32_t *output,
4301 : uint32_t input_stride,
4302 : TxType transform_type,
4303 : uint8_t bit_depth)
4304 : {
4305 : int32_t intermediateTransformBuffer[64 * 64];
4306 : Txfm2DFlipCfg cfg;
4307 : //av1_get_fwd_txfm_cfg
4308 0 : Av1TransformConfig(
4309 : transform_type,
4310 : TX_64X64,
4311 : &cfg);
4312 : //fwd_txfm2d_c
4313 0 : Av1TranformTwoDCore_c(
4314 : input,
4315 : input_stride,
4316 : output,
4317 : &cfg,
4318 : intermediateTransformBuffer,
4319 : bit_depth);
4320 0 : }
4321 :
4322 0 : void Av1TransformTwoD_32x32_c(
4323 : int16_t *input,
4324 : int32_t *output,
4325 : uint32_t input_stride,
4326 : TxType transform_type,
4327 : uint8_t bit_depth)
4328 : {
4329 : int32_t intermediateTransformBuffer[32 * 32];
4330 : Txfm2DFlipCfg cfg;
4331 :
4332 0 : Av1TransformConfig(
4333 : transform_type,
4334 : TX_32X32,
4335 : &cfg);
4336 :
4337 0 : Av1TranformTwoDCore_c(
4338 : input,
4339 : input_stride,
4340 : output,
4341 : &cfg,
4342 : intermediateTransformBuffer,
4343 : bit_depth);
4344 0 : }
4345 0 : void av1_fwd_txfm2d_pf_32x32_c(
4346 : int16_t *input,
4347 : int32_t *output,
4348 : uint32_t inputStride,
4349 : TxType transform_type,
4350 : uint8_t bit_depth)
4351 : {
4352 : int32_t intermediateTransformBuffer[32 * 32];
4353 : Txfm2DFlipCfg cfg;
4354 :
4355 0 : memset(output, 0, 1024 * sizeof(int32_t));
4356 :
4357 0 : Av1TransformConfig(
4358 : transform_type,
4359 : TX_32X32,
4360 : &cfg);
4361 :
4362 0 : Av1TranformTwoDCore_pf_c(
4363 : input,
4364 : inputStride,
4365 : output,
4366 : &cfg,
4367 : intermediateTransformBuffer,
4368 : bit_depth);
4369 0 : }
4370 0 : void Av1TransformTwoD_16x16_c(
4371 : int16_t *input,
4372 : int32_t *output,
4373 : uint32_t input_stride,
4374 : TxType transform_type,
4375 : uint8_t bit_depth)
4376 : {
4377 : int32_t intermediateTransformBuffer[16 * 16];
4378 : Txfm2DFlipCfg cfg;
4379 :
4380 0 : Av1TransformConfig(
4381 : transform_type,
4382 : TX_16X16,
4383 : &cfg);
4384 :
4385 0 : Av1TranformTwoDCore_c(
4386 : input,
4387 : input_stride,
4388 : output,
4389 : &cfg,
4390 : intermediateTransformBuffer,
4391 : bit_depth);
4392 0 : }
4393 :
4394 0 : void Av1TransformTwoD_8x8_c(
4395 : int16_t *input,
4396 : int32_t *output,
4397 : uint32_t input_stride,
4398 : TxType transform_type,
4399 : uint8_t bit_depth)
4400 : {
4401 : int32_t intermediateTransformBuffer[8 * 8];
4402 : Txfm2DFlipCfg cfg;
4403 :
4404 0 : Av1TransformConfig(
4405 : transform_type,
4406 : TX_8X8,
4407 : &cfg);
4408 :
4409 0 : Av1TranformTwoDCore_c(
4410 : input,
4411 : input_stride,
4412 : output,
4413 : &cfg,
4414 : intermediateTransformBuffer,
4415 : bit_depth);
4416 0 : }
4417 :
4418 0 : void Av1TransformTwoD_4x4_c(
4419 : int16_t *input,
4420 : int32_t *output,
4421 : uint32_t input_stride,
4422 : TxType transform_type,
4423 : uint8_t bit_depth)
4424 : {
4425 : int32_t intermediateTransformBuffer[4 * 4];
4426 : Txfm2DFlipCfg cfg;
4427 :
4428 0 : Av1TransformConfig(
4429 : transform_type,
4430 : TX_4X4,
4431 : &cfg);
4432 :
4433 0 : Av1TranformTwoDCore_c(
4434 : input,
4435 : input_stride,
4436 : output,
4437 : &cfg,
4438 : intermediateTransformBuffer,
4439 : bit_depth);
4440 0 : }
4441 :
4442 : /*********************************************************************
4443 : * Calculate CBF
4444 : *********************************************************************/
4445 0 : void eb_av1_fwd_txfm2d_64x32_c(
4446 : int16_t *input,
4447 : int32_t *output,
4448 : uint32_t input_stride,
4449 : TxType transform_type,
4450 : uint8_t bit_depth) {
4451 : int32_t intermediateTransformBuffer[64 * 32];
4452 : Txfm2DFlipCfg cfg;
4453 : /*av1_get_fwd_txfm_cfg*/
4454 0 : Av1TransformConfig
4455 : (transform_type, TX_64X32, &cfg);
4456 0 : /*fwd_txfm2d_c*/Av1TranformTwoDCore_c(
4457 : input,
4458 : input_stride,
4459 : output,
4460 : &cfg,
4461 : intermediateTransformBuffer,
4462 : bit_depth);
4463 0 : }
4464 :
4465 0 : uint64_t HandleTransform64x32_c(int32_t *output) {
4466 : // top - right 32x32 area.
4467 : const uint64_t three_quad_energy =
4468 0 : EnergyComputation(output + 32, 64, 32, 32);
4469 :
4470 : // Zero out right 32x32 area.
4471 0 : for (int32_t row = 0; row < 32; ++row)
4472 0 : memset(output + row * 64 + 32, 0, 32 * sizeof(*output));
4473 :
4474 : // Re-pack non-zero coeffs in the first 32x32 indices.
4475 0 : for (int32_t row = 1; row < 32; ++row)
4476 0 : memcpy(output + row * 32, output + row * 64, 32 * sizeof(*output));
4477 :
4478 0 : return three_quad_energy;
4479 : }
4480 :
4481 0 : void eb_av1_fwd_txfm2d_32x64_c(
4482 : int16_t *input,
4483 : int32_t *output,
4484 : uint32_t input_stride,
4485 : TxType transform_type,
4486 : uint8_t bit_depth) {
4487 : int32_t intermediateTransformBuffer[32 * 64];
4488 :
4489 : Txfm2DFlipCfg cfg;
4490 : /*av1_get_fwd_txfm_cfg*/
4491 0 : Av1TransformConfig(transform_type, TX_32X64, &cfg);
4492 : /*fwd_txfm2d_c*/
4493 0 : Av1TranformTwoDCore_c(
4494 : input,
4495 : input_stride,
4496 : output,
4497 : &cfg,
4498 : intermediateTransformBuffer,
4499 : bit_depth);
4500 0 : }
4501 :
4502 0 : uint64_t HandleTransform32x64_c(int32_t *output) {
4503 : //bottom 32x32 area.
4504 : const uint64_t three_quad_energy =
4505 0 : EnergyComputation(output + 32 * 32, 32, 32, 32);
4506 :
4507 : // Zero out the bottom 32x32 area.
4508 0 : memset(output + 32 * 32, 0, 32 * 32 * sizeof(*output));
4509 :
4510 0 : return three_quad_energy;
4511 : }
4512 :
4513 0 : void eb_av1_fwd_txfm2d_64x16_c(
4514 : int16_t *input,
4515 : int32_t *output,
4516 : uint32_t input_stride,
4517 : TxType transform_type,
4518 : uint8_t bit_depth) {
4519 : int32_t intermediateTransformBuffer[64 * 16];
4520 : Txfm2DFlipCfg cfg;
4521 : /*av1_get_fwd_txfm_cfg*/
4522 0 : Av1TransformConfig
4523 : (transform_type, TX_64X16, &cfg);
4524 0 : /*fwd_txfm2d_c*/Av1TranformTwoDCore_c(
4525 : input,
4526 : input_stride,
4527 : output,
4528 : &cfg,
4529 : intermediateTransformBuffer,
4530 : bit_depth);
4531 0 : }
4532 :
4533 0 : uint64_t HandleTransform64x16_c(int32_t *output) {
4534 : // top - right 32x16 area.
4535 : const uint64_t three_quad_energy =
4536 0 : EnergyComputation(output + 32, 64, 32, 16);
4537 :
4538 : // Zero out right 32x16 area.
4539 0 : for (int32_t row = 0; row < 16; ++row)
4540 0 : memset(output + row * 64 + 32, 0, 32 * sizeof(*output));
4541 :
4542 : // Re-pack non-zero coeffs in the first 32x16 indices.
4543 0 : for (int32_t row = 1; row < 16; ++row)
4544 0 : memcpy(output + row * 32, output + row * 64, 32 * sizeof(*output));
4545 :
4546 0 : return three_quad_energy;
4547 : }
4548 :
4549 0 : void eb_av1_fwd_txfm2d_16x64_c(
4550 : int16_t *input,
4551 : int32_t *output,
4552 : uint32_t input_stride,
4553 : TxType transform_type,
4554 : uint8_t bit_depth) {
4555 : int32_t intermediateTransformBuffer[16 * 64];
4556 :
4557 : Txfm2DFlipCfg cfg;
4558 : /*av1_get_fwd_txfm_cfg*/
4559 0 : Av1TransformConfig(transform_type, TX_16X64, &cfg);
4560 : /*fwd_txfm2d_c*/
4561 0 : Av1TranformTwoDCore_c(
4562 : input,
4563 : input_stride,
4564 : output,
4565 : &cfg,
4566 : intermediateTransformBuffer,
4567 : bit_depth);
4568 0 : }
4569 :
4570 0 : uint64_t HandleTransform16x64_c(int32_t *output) {
4571 : //bottom 16x32 area.
4572 : const uint64_t three_quad_energy =
4573 0 : EnergyComputation(output + 16 * 32, 16, 16, 32);
4574 :
4575 : // Zero out the bottom 16x32 area.
4576 0 : memset(output + 16 * 32, 0, 16 * 32 * sizeof(*output));
4577 :
4578 0 : return three_quad_energy;
4579 : }
4580 :
4581 1916670 : void eb_av1_fwd_txfm2d_32x16_c(
4582 : int16_t *input,
4583 : int32_t *output,
4584 : uint32_t input_stride,
4585 : TxType transform_type,
4586 : uint8_t bit_depth) {
4587 : int32_t intermediateTransformBuffer[32 * 16];
4588 : Txfm2DFlipCfg cfg;
4589 1916670 : /*av1_get_fwd_txfm_cfg*/Av1TransformConfig(transform_type, TX_32X16, &cfg);
4590 1916690 : /*fwd_txfm2d_c*/Av1TranformTwoDCore_c(
4591 : input,
4592 : input_stride,
4593 : output,
4594 : &cfg,
4595 : intermediateTransformBuffer,
4596 : bit_depth);
4597 1916540 : }
4598 :
4599 0 : void eb_av1_fwd_txfm2d_16x32_c(
4600 : int16_t *input,
4601 : int32_t *output,
4602 : uint32_t input_stride,
4603 : TxType transform_type,
4604 : uint8_t bit_depth) {
4605 : int32_t intermediateTransformBuffer[16 * 32];
4606 : Txfm2DFlipCfg cfg;
4607 0 : /*av1_get_fwd_txfm_cfg*/Av1TransformConfig(transform_type, TX_16X32, &cfg);
4608 0 : /*fwd_txfm2d_c*/Av1TranformTwoDCore_c(
4609 : input,
4610 : input_stride,
4611 : output,
4612 : &cfg,
4613 : intermediateTransformBuffer,
4614 : bit_depth);
4615 0 : }
4616 :
4617 0 : void eb_av1_fwd_txfm2d_16x8_c(
4618 : int16_t *input,
4619 : int32_t *output,
4620 : uint32_t input_stride,
4621 : TxType transform_type,
4622 : uint8_t bit_depth) {
4623 : int32_t intermediateTransformBuffer[16 * 8];
4624 : Txfm2DFlipCfg cfg;
4625 0 : /*av1_get_fwd_txfm_cfg*/Av1TransformConfig(transform_type, TX_16X8, &cfg);
4626 0 : /*fwd_txfm2d_c*/Av1TranformTwoDCore_c(
4627 : input,
4628 : input_stride,
4629 : output,
4630 : &cfg,
4631 : intermediateTransformBuffer,
4632 : bit_depth);
4633 0 : }
4634 :
4635 0 : void eb_av1_fwd_txfm2d_8x16_c(
4636 : int16_t *input,
4637 : int32_t *output,
4638 : uint32_t input_stride,
4639 : TxType transform_type,
4640 : uint8_t bit_depth) {
4641 : int32_t intermediateTransformBuffer[8 * 16];
4642 : Txfm2DFlipCfg cfg;
4643 0 : /*av1_get_fwd_txfm_cfg*/Av1TransformConfig(transform_type, TX_8X16, &cfg);
4644 0 : /*fwd_txfm2d_c*/Av1TranformTwoDCore_c(
4645 : input,
4646 : input_stride,
4647 : output,
4648 : &cfg,
4649 : intermediateTransformBuffer,
4650 : bit_depth);
4651 0 : }
4652 :
4653 0 : void eb_av1_fwd_txfm2d_32x8_c(
4654 : int16_t *input,
4655 : int32_t *output,
4656 : uint32_t input_stride,
4657 : TxType transform_type,
4658 : uint8_t bit_depth) {
4659 : int32_t intermediateTransformBuffer[32 * 8];
4660 : Txfm2DFlipCfg cfg;
4661 0 : /*av1_get_fwd_txfm_cfg*/Av1TransformConfig(transform_type, TX_32X8, &cfg);
4662 0 : /*fwd_txfm2d_c*/Av1TranformTwoDCore_c(
4663 : input,
4664 : input_stride,
4665 : output,
4666 : &cfg,
4667 : intermediateTransformBuffer,
4668 : bit_depth);
4669 0 : }
4670 :
4671 0 : void eb_av1_fwd_txfm2d_8x32_c(
4672 : int16_t *input,
4673 : int32_t *output,
4674 : uint32_t input_stride,
4675 : TxType transform_type,
4676 : uint8_t bit_depth) {
4677 : int32_t intermediateTransformBuffer[8 * 32];
4678 : Txfm2DFlipCfg cfg;
4679 0 : /*av1_get_fwd_txfm_cfg*/Av1TransformConfig(transform_type, TX_8X32, &cfg);
4680 0 : /*fwd_txfm2d_c*/Av1TranformTwoDCore_c(
4681 : input,
4682 : input_stride,
4683 : output,
4684 : &cfg,
4685 : intermediateTransformBuffer,
4686 : bit_depth);
4687 0 : }
4688 :
4689 0 : void eb_av1_fwd_txfm2d_16x4_c(
4690 : int16_t *input,
4691 : int32_t *output,
4692 : uint32_t input_stride,
4693 : TxType transform_type,
4694 : uint8_t bit_depth) {
4695 : int32_t intermediateTransformBuffer[16 * 4];
4696 : Txfm2DFlipCfg cfg;
4697 0 : /*av1_get_fwd_txfm_cfg*/Av1TransformConfig(transform_type, TX_16X4, &cfg);
4698 0 : /*fwd_txfm2d_c*/Av1TranformTwoDCore_c(
4699 : input,
4700 : input_stride,
4701 : output,
4702 : &cfg,
4703 : intermediateTransformBuffer,
4704 : bit_depth);
4705 0 : }
4706 :
4707 0 : void eb_av1_fwd_txfm2d_4x16_c(
4708 : int16_t *input,
4709 : int32_t *output,
4710 : uint32_t input_stride,
4711 : TxType transform_type,
4712 : uint8_t bit_depth) {
4713 : int32_t intermediateTransformBuffer[4 * 16];
4714 : Txfm2DFlipCfg cfg;
4715 0 : /*av1_get_fwd_txfm_cfg*/Av1TransformConfig(transform_type, TX_4X16, &cfg);
4716 0 : /*fwd_txfm2d_c*/Av1TranformTwoDCore_c(
4717 : input,
4718 : input_stride,
4719 : output,
4720 : &cfg,
4721 : intermediateTransformBuffer,
4722 : bit_depth);
4723 0 : }
4724 :
4725 0 : void eb_av1_fwd_txfm2d_8x4_c(
4726 : int16_t *input,
4727 : int32_t *output,
4728 : uint32_t input_stride,
4729 : TxType transform_type,
4730 : uint8_t bit_depth) {
4731 : int32_t intermediateTransformBuffer[8 * 4];
4732 : Txfm2DFlipCfg cfg;
4733 0 : /*av1_get_fwd_txfm_cfg*/Av1TransformConfig(transform_type, TX_8X4, &cfg);
4734 0 : /*fwd_txfm2d_c*/Av1TranformTwoDCore_c(
4735 : input,
4736 : input_stride,
4737 : output,
4738 : &cfg,
4739 : intermediateTransformBuffer,
4740 : bit_depth);
4741 0 : }
4742 :
4743 0 : void eb_av1_fwd_txfm2d_4x8_c(
4744 : int16_t *input,
4745 : int32_t *output,
4746 : uint32_t input_stride,
4747 : TxType transform_type,
4748 : uint8_t bit_depth) {
4749 : int32_t intermediateTransformBuffer[4 * 8];
4750 : Txfm2DFlipCfg cfg;
4751 0 : /*av1_get_fwd_txfm_cfg*/Av1TransformConfig(transform_type, TX_4X8, &cfg);
4752 0 : /*fwd_txfm2d_c*/Av1TranformTwoDCore_c(
4753 : input,
4754 : input_stride,
4755 : output,
4756 : &cfg,
4757 : intermediateTransformBuffer,
4758 : bit_depth);
4759 0 : }
4760 :
4761 : /*********************************************************************
4762 : * Transform
4763 : * Note there is an implicit assumption that TU Size <= PU Size,
4764 : * which is different than the HEVC requirements.
4765 : *********************************************************************/
4766 105601000 : EbErrorType av1_estimate_transform(
4767 : int16_t *residual_buffer,
4768 : uint32_t residual_stride,
4769 : int32_t *coeff_buffer,
4770 : uint32_t coeff_stride,
4771 : TxSize transform_size,
4772 : uint64_t *three_quad_energy,
4773 : int16_t *transform_inner_array_ptr,
4774 : uint32_t bit_increment,
4775 : TxType transform_type,
4776 : PlaneType component_type,
4777 : EB_TRANS_COEFF_SHAPE trans_coeff_shape)
4778 :
4779 : {
4780 : (void)trans_coeff_shape;
4781 105601000 : EbErrorType return_error = EB_ErrorNone;
4782 :
4783 : (void)transform_inner_array_ptr;
4784 : (void)coeff_stride;
4785 : (void)component_type;
4786 105601000 : uint8_t bit_depth = bit_increment ? 10 : 8;// NM - Set to zero for the moment
4787 :
4788 105601000 : switch (transform_size) {
4789 513582 : case TX_64X32:
4790 513582 : if (transform_type == DCT_DCT)
4791 513582 : eb_av1_fwd_txfm2d_64x32(
4792 : residual_buffer,
4793 : coeff_buffer,
4794 : residual_stride,
4795 : transform_type,
4796 : bit_depth);
4797 : else
4798 0 : eb_av1_fwd_txfm2d_64x32_c(
4799 : residual_buffer,
4800 : coeff_buffer,
4801 : residual_stride,
4802 : transform_type,
4803 : bit_depth);
4804 :
4805 513589 : *three_quad_energy = HandleTransform64x32(coeff_buffer);
4806 :
4807 513583 : break;
4808 :
4809 557016 : case TX_32X64:
4810 557016 : if (transform_type == DCT_DCT)
4811 557019 : eb_av1_fwd_txfm2d_32x64(
4812 : residual_buffer,
4813 : coeff_buffer,
4814 : residual_stride,
4815 : transform_type,
4816 : bit_depth);
4817 : else
4818 0 : eb_av1_fwd_txfm2d_32x64_c(
4819 : residual_buffer,
4820 : coeff_buffer,
4821 : residual_stride,
4822 : transform_type,
4823 : bit_depth);
4824 :
4825 557014 : *three_quad_energy = HandleTransform32x64(coeff_buffer);
4826 :
4827 557012 : break;
4828 :
4829 790603 : case TX_64X16:
4830 790603 : if (transform_type == DCT_DCT)
4831 790603 : eb_av1_fwd_txfm2d_64x16(
4832 : residual_buffer,
4833 : coeff_buffer,
4834 : residual_stride,
4835 : transform_type,
4836 : bit_depth);
4837 : else
4838 0 : eb_av1_fwd_txfm2d_64x16_c(
4839 : residual_buffer,
4840 : coeff_buffer,
4841 : residual_stride,
4842 : transform_type,
4843 : bit_depth);
4844 :
4845 790609 : *three_quad_energy = HandleTransform64x16(coeff_buffer);
4846 :
4847 790601 : break;
4848 :
4849 855372 : case TX_16X64:
4850 855372 : if (transform_type == DCT_DCT)
4851 855372 : eb_av1_fwd_txfm2d_16x64(
4852 : residual_buffer,
4853 : coeff_buffer,
4854 : residual_stride,
4855 : transform_type,
4856 : bit_depth);
4857 : else
4858 0 : eb_av1_fwd_txfm2d_16x64_c(
4859 : residual_buffer,
4860 : coeff_buffer,
4861 : residual_stride,
4862 : transform_type,
4863 : bit_depth);
4864 :
4865 855372 : *three_quad_energy = HandleTransform16x64(coeff_buffer);
4866 :
4867 855376 : break;
4868 :
4869 2003820 : case TX_32X16:
4870 : // TTK
4871 2003820 : if (transform_type == IDTX)
4872 87137 : eb_av1_fwd_txfm2d_32x16(
4873 : residual_buffer,
4874 : coeff_buffer,
4875 : residual_stride,
4876 : transform_type,
4877 : bit_depth);
4878 : else
4879 1916680 : eb_av1_fwd_txfm2d_32x16_c(
4880 : residual_buffer,
4881 : coeff_buffer,
4882 : residual_stride,
4883 : transform_type,
4884 : bit_depth);
4885 2003680 : break;
4886 :
4887 2006860 : case TX_16X32:
4888 2006860 : if ((transform_type == DCT_DCT) || (transform_type == IDTX))
4889 2006860 : eb_av1_fwd_txfm2d_16x32(
4890 : residual_buffer,
4891 : coeff_buffer,
4892 : residual_stride,
4893 : transform_type,
4894 : bit_depth);
4895 : else
4896 0 : eb_av1_fwd_txfm2d_16x32_c(
4897 : residual_buffer,
4898 : coeff_buffer,
4899 : residual_stride,
4900 : transform_type,
4901 : bit_depth);
4902 2006880 : break;
4903 :
4904 7281400 : case TX_16X8:
4905 7281400 : eb_av1_fwd_txfm2d_16x8(
4906 : residual_buffer,
4907 : coeff_buffer,
4908 : residual_stride,
4909 : transform_type,
4910 : bit_depth);
4911 7282350 : break;
4912 :
4913 8828210 : case TX_8X16:
4914 8828210 : eb_av1_fwd_txfm2d_8x16(
4915 : residual_buffer,
4916 : coeff_buffer,
4917 : residual_stride,
4918 : transform_type,
4919 : bit_depth);
4920 8829530 : break;
4921 :
4922 2533920 : case TX_32X8:
4923 2533920 : if ((transform_type == DCT_DCT) || (transform_type == IDTX))
4924 2533920 : eb_av1_fwd_txfm2d_32x8(
4925 : residual_buffer,
4926 : coeff_buffer,
4927 : residual_stride,
4928 : transform_type,
4929 : bit_depth);
4930 : else
4931 0 : eb_av1_fwd_txfm2d_32x8_c(
4932 : residual_buffer,
4933 : coeff_buffer,
4934 : residual_stride,
4935 : transform_type,
4936 : bit_depth);
4937 2534090 : break;
4938 :
4939 2537710 : case TX_8X32:
4940 2537710 : if ((transform_type == DCT_DCT) || (transform_type == IDTX))
4941 2537710 : eb_av1_fwd_txfm2d_8x32(
4942 : residual_buffer,
4943 : coeff_buffer,
4944 : residual_stride,
4945 : transform_type,
4946 : bit_depth);
4947 : else
4948 0 : eb_av1_fwd_txfm2d_8x32_c(
4949 : residual_buffer,
4950 : coeff_buffer,
4951 : residual_stride,
4952 : transform_type,
4953 : bit_depth);
4954 2537860 : break;
4955 5777420 : case TX_16X4:
4956 5777420 : eb_av1_fwd_txfm2d_16x4(
4957 : residual_buffer,
4958 : coeff_buffer,
4959 : residual_stride,
4960 : transform_type,
4961 : bit_depth);
4962 5777920 : break;
4963 5564350 : case TX_4X16:
4964 5564350 : eb_av1_fwd_txfm2d_4x16(
4965 : residual_buffer,
4966 : coeff_buffer,
4967 : residual_stride,
4968 : transform_type,
4969 : bit_depth);
4970 5564820 : break;
4971 7357360 : case TX_8X4:
4972 :
4973 7357360 : eb_av1_fwd_txfm2d_8x4(
4974 : residual_buffer,
4975 : coeff_buffer,
4976 : residual_stride,
4977 : transform_type,
4978 : bit_depth);
4979 :
4980 7358330 : break;
4981 7683930 : case TX_4X8:
4982 :
4983 7683930 : eb_av1_fwd_txfm2d_4x8(
4984 : residual_buffer,
4985 : coeff_buffer,
4986 : residual_stride,
4987 : transform_type,
4988 : bit_depth);
4989 :
4990 7684920 : break;
4991 :
4992 314333 : case TX_64X64:
4993 :
4994 314333 : eb_av1_fwd_txfm2d_64x64(
4995 : residual_buffer,
4996 : coeff_buffer,
4997 : residual_stride,
4998 : transform_type,
4999 : bit_depth);
5000 :
5001 314334 : *three_quad_energy = HandleTransform64x64(coeff_buffer);
5002 :
5003 70574 : break;
5004 :
5005 1713680 : case TX_32X32:
5006 1713680 : if (transform_type == V_DCT || transform_type == H_DCT || transform_type == V_ADST || transform_type == H_ADST || transform_type == V_FLIPADST || transform_type == H_FLIPADST)
5007 : // Tahani: I believe those cases are never hit
5008 0 : Av1TransformTwoD_32x32_c(
5009 : residual_buffer,
5010 : coeff_buffer,
5011 : residual_stride,
5012 : transform_type,
5013 : bit_depth);
5014 :
5015 : else {
5016 1713680 : eb_av1_fwd_txfm2d_32x32(
5017 : residual_buffer,
5018 : coeff_buffer,
5019 : residual_stride,
5020 : transform_type,
5021 : bit_depth);
5022 : }
5023 :
5024 1713690 : break;
5025 :
5026 7143980 : case TX_16X16:
5027 :
5028 7143980 : eb_av1_fwd_txfm2d_16x16(
5029 : residual_buffer,
5030 : coeff_buffer,
5031 : residual_stride,
5032 : transform_type,
5033 : bit_depth);
5034 :
5035 7144360 : break;
5036 19667700 : case TX_8X8:
5037 :
5038 19667700 : eb_av1_fwd_txfm2d_8x8(
5039 : residual_buffer,
5040 : coeff_buffer,
5041 : residual_stride,
5042 : transform_type,
5043 : bit_depth);
5044 :
5045 19672900 : break;
5046 22761000 : case TX_4X4:
5047 :
5048 22761000 : eb_av1_fwd_txfm2d_4x4(
5049 : residual_buffer,
5050 : coeff_buffer,
5051 : residual_stride,
5052 : transform_type,
5053 : bit_depth);
5054 :
5055 22767800 : break;
5056 0 : default: assert(0); break;
5057 : }
5058 :
5059 105666000 : return return_error;
5060 : }
5061 :
5062 0 : void Av1InverseTransformConfig(
5063 : TxType tx_type,
5064 : TxSize tx_size,
5065 : Txfm2DFlipCfg *cfg)
5066 : {
5067 0 : assert(cfg != NULL);
5068 0 : cfg->tx_size = tx_size;
5069 0 : set_flip_cfg(tx_type, cfg);
5070 0 : av1_zero(cfg->stage_range_col);
5071 0 : av1_zero(cfg->stage_range_row);
5072 0 : set_flip_cfg(tx_type, cfg);
5073 0 : const TxType1D tx_type_1d_col = vtx_tab[tx_type];
5074 0 : const TxType1D tx_type_1d_row = htx_tab[tx_type];
5075 0 : cfg->shift = eb_inv_txfm_shift_ls[tx_size];
5076 0 : const int32_t txw_idx = get_txw_idx(tx_size);
5077 0 : const int32_t txh_idx = get_txh_idx(tx_size);
5078 0 : cfg->cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
5079 0 : cfg->cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
5080 0 : cfg->txfm_type_col = av1_txfm_type_ls[txh_idx][tx_type_1d_col];
5081 0 : if (cfg->txfm_type_col == TXFM_TYPE_ADST4)
5082 0 : memcpy(cfg->stage_range_col, iadst4_range, sizeof(iadst4_range));
5083 0 : cfg->txfm_type_row = av1_txfm_type_ls[txw_idx][tx_type_1d_row];
5084 0 : if (cfg->txfm_type_row == TXFM_TYPE_ADST4)
5085 0 : memcpy(cfg->stage_range_row, iadst4_range, sizeof(iadst4_range));
5086 0 : cfg->stage_num_col = av1_txfm_stage_num_list[cfg->txfm_type_col];
5087 0 : cfg->stage_num_row = av1_txfm_stage_num_list[cfg->txfm_type_row];
5088 0 : }
5089 :
5090 0 : void eb_av1_gen_inv_stage_range(
5091 : int8_t *stage_range_col,
5092 : int8_t *stage_range_row,
5093 : const Txfm2DFlipCfg *cfg,
5094 : TxSize tx_size,
5095 : int32_t bd)
5096 : {
5097 0 : const int32_t fwd_shift = inv_start_range[tx_size];
5098 0 : const int8_t *shift = cfg->shift;
5099 : int8_t opt_range_row, opt_range_col;
5100 0 : if (bd == 8) {
5101 0 : opt_range_row = 16;
5102 0 : opt_range_col = 16;
5103 : }
5104 0 : else if (bd == 10) {
5105 0 : opt_range_row = 18;
5106 0 : opt_range_col = 16;
5107 : }
5108 : else {
5109 0 : assert(bd == 12);
5110 0 : opt_range_row = 20;
5111 0 : opt_range_col = 18;
5112 : }
5113 : // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
5114 0 : for (int32_t i = 0; i < cfg->stage_num_row && i < MAX_TXFM_STAGE_NUM; ++i) {
5115 0 : int32_t real_range_row = cfg->stage_range_row[i] + fwd_shift + bd + 1;
5116 : (void)real_range_row;
5117 0 : if (cfg->txfm_type_row == TXFM_TYPE_ADST4 && i == 1) {
5118 : // the adst4 may use 1 extra bit on top of opt_range_row at stage 1
5119 : // so opt_range_col >= real_range_col will not hold
5120 0 : stage_range_row[i] = opt_range_row;
5121 : }
5122 : else {
5123 0 : assert(opt_range_row >= real_range_row);
5124 0 : stage_range_row[i] = opt_range_row;
5125 : }
5126 : }
5127 : // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
5128 0 : for (int32_t i = 0; i < cfg->stage_num_col && i < MAX_TXFM_STAGE_NUM; ++i) {
5129 0 : int32_t real_range_col =
5130 0 : cfg->stage_range_col[i] + fwd_shift + shift[0] + bd + 1;
5131 : (void)real_range_col;
5132 0 : if (cfg->txfm_type_col == TXFM_TYPE_ADST4 && i == 1) {
5133 : // the adst4 may use 1 extra bit on top of opt_range_row at stage 1
5134 : // so opt_range_col >= real_range_col will not hold
5135 0 : stage_range_col[i] = opt_range_col;
5136 : }
5137 : else {
5138 0 : assert(opt_range_col >= real_range_col);
5139 0 : stage_range_col[i] = opt_range_col;
5140 : }
5141 : }
5142 0 : }
5143 :
5144 0 : static INLINE int32_t clamp_value(int32_t value, int8_t bit) {
5145 0 : if (bit <= 0) return value; // Do nothing for invalid clamp bit.
5146 0 : const int64_t max_value = (1LL << (bit - 1)) - 1;
5147 0 : const int64_t min_value = -(1LL << (bit - 1));
5148 0 : return (int32_t)clamp64(value, min_value, max_value);
5149 : }
5150 :
5151 0 : void eb_av1_idct4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
5152 : const int8_t *stage_range) {
5153 0 : assert(output != input);
5154 0 : const int32_t *cospi = cospi_arr(cos_bit);
5155 :
5156 0 : int32_t stage = 0;
5157 : int32_t *bf0, *bf1;
5158 : int32_t step[4];
5159 :
5160 : // stage 0;
5161 :
5162 : // stage 1;
5163 0 : stage++;
5164 0 : bf1 = output;
5165 0 : bf1[0] = input[0];
5166 0 : bf1[1] = input[2];
5167 0 : bf1[2] = input[1];
5168 0 : bf1[3] = input[3];
5169 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5170 :
5171 : // stage 2
5172 0 : stage++;
5173 0 : bf0 = output;
5174 0 : bf1 = step;
5175 0 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
5176 0 : bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
5177 0 : bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
5178 0 : bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
5179 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5180 :
5181 : // stage 3
5182 0 : stage++;
5183 0 : bf0 = step;
5184 0 : bf1 = output;
5185 0 : bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
5186 0 : bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
5187 0 : bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
5188 0 : bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
5189 0 : }
5190 0 : void eb_av1_idct8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
5191 : const int8_t *stage_range) {
5192 0 : assert(output != input);
5193 0 : const int32_t *cospi = cospi_arr(cos_bit);
5194 :
5195 0 : int32_t stage = 0;
5196 : int32_t *bf0, *bf1;
5197 : int32_t step[8];
5198 :
5199 : // stage 0;
5200 :
5201 : // stage 1;
5202 0 : stage++;
5203 0 : bf1 = output;
5204 0 : bf1[0] = input[0];
5205 0 : bf1[1] = input[4];
5206 0 : bf1[2] = input[2];
5207 0 : bf1[3] = input[6];
5208 0 : bf1[4] = input[1];
5209 0 : bf1[5] = input[5];
5210 0 : bf1[6] = input[3];
5211 0 : bf1[7] = input[7];
5212 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5213 :
5214 : // stage 2
5215 0 : stage++;
5216 0 : bf0 = output;
5217 0 : bf1 = step;
5218 0 : bf1[0] = bf0[0];
5219 0 : bf1[1] = bf0[1];
5220 0 : bf1[2] = bf0[2];
5221 0 : bf1[3] = bf0[3];
5222 0 : bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
5223 0 : bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
5224 0 : bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
5225 0 : bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
5226 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5227 :
5228 : // stage 3
5229 0 : stage++;
5230 0 : bf0 = step;
5231 0 : bf1 = output;
5232 0 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
5233 0 : bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
5234 0 : bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
5235 0 : bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
5236 0 : bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
5237 0 : bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
5238 0 : bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
5239 0 : bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
5240 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5241 :
5242 : // stage 4
5243 0 : stage++;
5244 0 : bf0 = output;
5245 0 : bf1 = step;
5246 0 : bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
5247 0 : bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
5248 0 : bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
5249 0 : bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
5250 0 : bf1[4] = bf0[4];
5251 0 : bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
5252 0 : bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
5253 0 : bf1[7] = bf0[7];
5254 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5255 :
5256 : // stage 5
5257 0 : stage++;
5258 0 : bf0 = step;
5259 0 : bf1 = output;
5260 0 : bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
5261 0 : bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
5262 0 : bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
5263 0 : bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
5264 0 : bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
5265 0 : bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
5266 0 : bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
5267 0 : bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
5268 0 : }
5269 0 : void eb_av1_idct16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
5270 : const int8_t *stage_range) {
5271 0 : assert(output != input);
5272 0 : const int32_t *cospi = cospi_arr(cos_bit);
5273 :
5274 0 : int32_t stage = 0;
5275 : int32_t *bf0, *bf1;
5276 : int32_t step[16];
5277 :
5278 : // stage 0;
5279 :
5280 : // stage 1;
5281 0 : stage++;
5282 0 : bf1 = output;
5283 0 : bf1[0] = input[0];
5284 0 : bf1[1] = input[8];
5285 0 : bf1[2] = input[4];
5286 0 : bf1[3] = input[12];
5287 0 : bf1[4] = input[2];
5288 0 : bf1[5] = input[10];
5289 0 : bf1[6] = input[6];
5290 0 : bf1[7] = input[14];
5291 0 : bf1[8] = input[1];
5292 0 : bf1[9] = input[9];
5293 0 : bf1[10] = input[5];
5294 0 : bf1[11] = input[13];
5295 0 : bf1[12] = input[3];
5296 0 : bf1[13] = input[11];
5297 0 : bf1[14] = input[7];
5298 0 : bf1[15] = input[15];
5299 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5300 :
5301 : // stage 2
5302 0 : stage++;
5303 0 : bf0 = output;
5304 0 : bf1 = step;
5305 0 : bf1[0] = bf0[0];
5306 0 : bf1[1] = bf0[1];
5307 0 : bf1[2] = bf0[2];
5308 0 : bf1[3] = bf0[3];
5309 0 : bf1[4] = bf0[4];
5310 0 : bf1[5] = bf0[5];
5311 0 : bf1[6] = bf0[6];
5312 0 : bf1[7] = bf0[7];
5313 0 : bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit);
5314 0 : bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit);
5315 0 : bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit);
5316 0 : bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit);
5317 0 : bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit);
5318 0 : bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit);
5319 0 : bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit);
5320 0 : bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit);
5321 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5322 :
5323 : // stage 3
5324 0 : stage++;
5325 0 : bf0 = step;
5326 0 : bf1 = output;
5327 0 : bf1[0] = bf0[0];
5328 0 : bf1[1] = bf0[1];
5329 0 : bf1[2] = bf0[2];
5330 0 : bf1[3] = bf0[3];
5331 0 : bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
5332 0 : bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
5333 0 : bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
5334 0 : bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
5335 0 : bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]);
5336 0 : bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]);
5337 0 : bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]);
5338 0 : bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]);
5339 0 : bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]);
5340 0 : bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]);
5341 0 : bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]);
5342 0 : bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]);
5343 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5344 :
5345 : // stage 4
5346 0 : stage++;
5347 0 : bf0 = output;
5348 0 : bf1 = step;
5349 0 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
5350 0 : bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
5351 0 : bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
5352 0 : bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
5353 0 : bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
5354 0 : bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
5355 0 : bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
5356 0 : bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
5357 0 : bf1[8] = bf0[8];
5358 0 : bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
5359 0 : bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
5360 0 : bf1[11] = bf0[11];
5361 0 : bf1[12] = bf0[12];
5362 0 : bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit);
5363 0 : bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit);
5364 0 : bf1[15] = bf0[15];
5365 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5366 :
5367 : // stage 5
5368 0 : stage++;
5369 0 : bf0 = step;
5370 0 : bf1 = output;
5371 0 : bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
5372 0 : bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
5373 0 : bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
5374 0 : bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
5375 0 : bf1[4] = bf0[4];
5376 0 : bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
5377 0 : bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
5378 0 : bf1[7] = bf0[7];
5379 0 : bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]);
5380 0 : bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]);
5381 0 : bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]);
5382 0 : bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]);
5383 0 : bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]);
5384 0 : bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]);
5385 0 : bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]);
5386 0 : bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]);
5387 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5388 :
5389 : // stage 6
5390 0 : stage++;
5391 0 : bf0 = output;
5392 0 : bf1 = step;
5393 0 : bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
5394 0 : bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
5395 0 : bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
5396 0 : bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
5397 0 : bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
5398 0 : bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
5399 0 : bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
5400 0 : bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
5401 0 : bf1[8] = bf0[8];
5402 0 : bf1[9] = bf0[9];
5403 0 : bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
5404 0 : bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
5405 0 : bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
5406 0 : bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
5407 0 : bf1[14] = bf0[14];
5408 0 : bf1[15] = bf0[15];
5409 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5410 :
5411 : // stage 7
5412 0 : stage++;
5413 0 : bf0 = step;
5414 0 : bf1 = output;
5415 0 : bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]);
5416 0 : bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]);
5417 0 : bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]);
5418 0 : bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]);
5419 0 : bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]);
5420 0 : bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]);
5421 0 : bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]);
5422 0 : bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]);
5423 0 : bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]);
5424 0 : bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]);
5425 0 : bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]);
5426 0 : bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]);
5427 0 : bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]);
5428 0 : bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]);
5429 0 : bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]);
5430 0 : bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]);
5431 0 : }
5432 0 : void eb_av1_idct32_new(const int32_t *input, int32_t *output, int8_t cos_bit,
5433 : const int8_t *stage_range) {
5434 0 : assert(output != input);
5435 0 : const int32_t *cospi = cospi_arr(cos_bit);
5436 :
5437 0 : int32_t stage = 0;
5438 : int32_t *bf0, *bf1;
5439 : int32_t step[32];
5440 :
5441 : // stage 0;
5442 :
5443 : // stage 1;
5444 0 : stage++;
5445 0 : bf1 = output;
5446 0 : bf1[0] = input[0];
5447 0 : bf1[1] = input[16];
5448 0 : bf1[2] = input[8];
5449 0 : bf1[3] = input[24];
5450 0 : bf1[4] = input[4];
5451 0 : bf1[5] = input[20];
5452 0 : bf1[6] = input[12];
5453 0 : bf1[7] = input[28];
5454 0 : bf1[8] = input[2];
5455 0 : bf1[9] = input[18];
5456 0 : bf1[10] = input[10];
5457 0 : bf1[11] = input[26];
5458 0 : bf1[12] = input[6];
5459 0 : bf1[13] = input[22];
5460 0 : bf1[14] = input[14];
5461 0 : bf1[15] = input[30];
5462 0 : bf1[16] = input[1];
5463 0 : bf1[17] = input[17];
5464 0 : bf1[18] = input[9];
5465 0 : bf1[19] = input[25];
5466 0 : bf1[20] = input[5];
5467 0 : bf1[21] = input[21];
5468 0 : bf1[22] = input[13];
5469 0 : bf1[23] = input[29];
5470 0 : bf1[24] = input[3];
5471 0 : bf1[25] = input[19];
5472 0 : bf1[26] = input[11];
5473 0 : bf1[27] = input[27];
5474 0 : bf1[28] = input[7];
5475 0 : bf1[29] = input[23];
5476 0 : bf1[30] = input[15];
5477 0 : bf1[31] = input[31];
5478 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5479 :
5480 : // stage 2
5481 0 : stage++;
5482 0 : bf0 = output;
5483 0 : bf1 = step;
5484 0 : bf1[0] = bf0[0];
5485 0 : bf1[1] = bf0[1];
5486 0 : bf1[2] = bf0[2];
5487 0 : bf1[3] = bf0[3];
5488 0 : bf1[4] = bf0[4];
5489 0 : bf1[5] = bf0[5];
5490 0 : bf1[6] = bf0[6];
5491 0 : bf1[7] = bf0[7];
5492 0 : bf1[8] = bf0[8];
5493 0 : bf1[9] = bf0[9];
5494 0 : bf1[10] = bf0[10];
5495 0 : bf1[11] = bf0[11];
5496 0 : bf1[12] = bf0[12];
5497 0 : bf1[13] = bf0[13];
5498 0 : bf1[14] = bf0[14];
5499 0 : bf1[15] = bf0[15];
5500 0 : bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit);
5501 0 : bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit);
5502 0 : bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit);
5503 0 : bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit);
5504 0 : bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit);
5505 0 : bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit);
5506 0 : bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit);
5507 0 : bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit);
5508 0 : bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit);
5509 0 : bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit);
5510 0 : bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit);
5511 0 : bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit);
5512 0 : bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit);
5513 0 : bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit);
5514 0 : bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit);
5515 0 : bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit);
5516 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5517 :
5518 : // stage 3
5519 0 : stage++;
5520 0 : bf0 = step;
5521 0 : bf1 = output;
5522 0 : bf1[0] = bf0[0];
5523 0 : bf1[1] = bf0[1];
5524 0 : bf1[2] = bf0[2];
5525 0 : bf1[3] = bf0[3];
5526 0 : bf1[4] = bf0[4];
5527 0 : bf1[5] = bf0[5];
5528 0 : bf1[6] = bf0[6];
5529 0 : bf1[7] = bf0[7];
5530 0 : bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit);
5531 0 : bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit);
5532 0 : bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit);
5533 0 : bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit);
5534 0 : bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit);
5535 0 : bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit);
5536 0 : bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit);
5537 0 : bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit);
5538 0 : bf1[16] = clamp_value(bf0[16] + bf0[17], stage_range[stage]);
5539 0 : bf1[17] = clamp_value(bf0[16] - bf0[17], stage_range[stage]);
5540 0 : bf1[18] = clamp_value(-bf0[18] + bf0[19], stage_range[stage]);
5541 0 : bf1[19] = clamp_value(bf0[18] + bf0[19], stage_range[stage]);
5542 0 : bf1[20] = clamp_value(bf0[20] + bf0[21], stage_range[stage]);
5543 0 : bf1[21] = clamp_value(bf0[20] - bf0[21], stage_range[stage]);
5544 0 : bf1[22] = clamp_value(-bf0[22] + bf0[23], stage_range[stage]);
5545 0 : bf1[23] = clamp_value(bf0[22] + bf0[23], stage_range[stage]);
5546 0 : bf1[24] = clamp_value(bf0[24] + bf0[25], stage_range[stage]);
5547 0 : bf1[25] = clamp_value(bf0[24] - bf0[25], stage_range[stage]);
5548 0 : bf1[26] = clamp_value(-bf0[26] + bf0[27], stage_range[stage]);
5549 0 : bf1[27] = clamp_value(bf0[26] + bf0[27], stage_range[stage]);
5550 0 : bf1[28] = clamp_value(bf0[28] + bf0[29], stage_range[stage]);
5551 0 : bf1[29] = clamp_value(bf0[28] - bf0[29], stage_range[stage]);
5552 0 : bf1[30] = clamp_value(-bf0[30] + bf0[31], stage_range[stage]);
5553 0 : bf1[31] = clamp_value(bf0[30] + bf0[31], stage_range[stage]);
5554 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5555 :
5556 : // stage 4
5557 0 : stage++;
5558 0 : bf0 = output;
5559 0 : bf1 = step;
5560 0 : bf1[0] = bf0[0];
5561 0 : bf1[1] = bf0[1];
5562 0 : bf1[2] = bf0[2];
5563 0 : bf1[3] = bf0[3];
5564 0 : bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
5565 0 : bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
5566 0 : bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
5567 0 : bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
5568 0 : bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]);
5569 0 : bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]);
5570 0 : bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]);
5571 0 : bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]);
5572 0 : bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]);
5573 0 : bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]);
5574 0 : bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]);
5575 0 : bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]);
5576 0 : bf1[16] = bf0[16];
5577 0 : bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit);
5578 0 : bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit);
5579 0 : bf1[19] = bf0[19];
5580 0 : bf1[20] = bf0[20];
5581 0 : bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit);
5582 0 : bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit);
5583 0 : bf1[23] = bf0[23];
5584 0 : bf1[24] = bf0[24];
5585 0 : bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit);
5586 0 : bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit);
5587 0 : bf1[27] = bf0[27];
5588 0 : bf1[28] = bf0[28];
5589 0 : bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit);
5590 0 : bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit);
5591 0 : bf1[31] = bf0[31];
5592 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5593 :
5594 : // stage 5
5595 0 : stage++;
5596 0 : bf0 = step;
5597 0 : bf1 = output;
5598 0 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
5599 0 : bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
5600 0 : bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
5601 0 : bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
5602 0 : bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
5603 0 : bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
5604 0 : bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
5605 0 : bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
5606 0 : bf1[8] = bf0[8];
5607 0 : bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
5608 0 : bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
5609 0 : bf1[11] = bf0[11];
5610 0 : bf1[12] = bf0[12];
5611 0 : bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit);
5612 0 : bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit);
5613 0 : bf1[15] = bf0[15];
5614 0 : bf1[16] = clamp_value(bf0[16] + bf0[19], stage_range[stage]);
5615 0 : bf1[17] = clamp_value(bf0[17] + bf0[18], stage_range[stage]);
5616 0 : bf1[18] = clamp_value(bf0[17] - bf0[18], stage_range[stage]);
5617 0 : bf1[19] = clamp_value(bf0[16] - bf0[19], stage_range[stage]);
5618 0 : bf1[20] = clamp_value(-bf0[20] + bf0[23], stage_range[stage]);
5619 0 : bf1[21] = clamp_value(-bf0[21] + bf0[22], stage_range[stage]);
5620 0 : bf1[22] = clamp_value(bf0[21] + bf0[22], stage_range[stage]);
5621 0 : bf1[23] = clamp_value(bf0[20] + bf0[23], stage_range[stage]);
5622 0 : bf1[24] = clamp_value(bf0[24] + bf0[27], stage_range[stage]);
5623 0 : bf1[25] = clamp_value(bf0[25] + bf0[26], stage_range[stage]);
5624 0 : bf1[26] = clamp_value(bf0[25] - bf0[26], stage_range[stage]);
5625 0 : bf1[27] = clamp_value(bf0[24] - bf0[27], stage_range[stage]);
5626 0 : bf1[28] = clamp_value(-bf0[28] + bf0[31], stage_range[stage]);
5627 0 : bf1[29] = clamp_value(-bf0[29] + bf0[30], stage_range[stage]);
5628 0 : bf1[30] = clamp_value(bf0[29] + bf0[30], stage_range[stage]);
5629 0 : bf1[31] = clamp_value(bf0[28] + bf0[31], stage_range[stage]);
5630 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5631 :
5632 : // stage 6
5633 0 : stage++;
5634 0 : bf0 = output;
5635 0 : bf1 = step;
5636 0 : bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
5637 0 : bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
5638 0 : bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
5639 0 : bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
5640 0 : bf1[4] = bf0[4];
5641 0 : bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
5642 0 : bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
5643 0 : bf1[7] = bf0[7];
5644 0 : bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]);
5645 0 : bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]);
5646 0 : bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]);
5647 0 : bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]);
5648 0 : bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]);
5649 0 : bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]);
5650 0 : bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]);
5651 0 : bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]);
5652 0 : bf1[16] = bf0[16];
5653 0 : bf1[17] = bf0[17];
5654 0 : bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit);
5655 0 : bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit);
5656 0 : bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit);
5657 0 : bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit);
5658 0 : bf1[22] = bf0[22];
5659 0 : bf1[23] = bf0[23];
5660 0 : bf1[24] = bf0[24];
5661 0 : bf1[25] = bf0[25];
5662 0 : bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit);
5663 0 : bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit);
5664 0 : bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit);
5665 0 : bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit);
5666 0 : bf1[30] = bf0[30];
5667 0 : bf1[31] = bf0[31];
5668 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5669 :
5670 : // stage 7
5671 0 : stage++;
5672 0 : bf0 = step;
5673 0 : bf1 = output;
5674 0 : bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
5675 0 : bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
5676 0 : bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
5677 0 : bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
5678 0 : bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
5679 0 : bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
5680 0 : bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
5681 0 : bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
5682 0 : bf1[8] = bf0[8];
5683 0 : bf1[9] = bf0[9];
5684 0 : bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
5685 0 : bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
5686 0 : bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
5687 0 : bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
5688 0 : bf1[14] = bf0[14];
5689 0 : bf1[15] = bf0[15];
5690 0 : bf1[16] = clamp_value(bf0[16] + bf0[23], stage_range[stage]);
5691 0 : bf1[17] = clamp_value(bf0[17] + bf0[22], stage_range[stage]);
5692 0 : bf1[18] = clamp_value(bf0[18] + bf0[21], stage_range[stage]);
5693 0 : bf1[19] = clamp_value(bf0[19] + bf0[20], stage_range[stage]);
5694 0 : bf1[20] = clamp_value(bf0[19] - bf0[20], stage_range[stage]);
5695 0 : bf1[21] = clamp_value(bf0[18] - bf0[21], stage_range[stage]);
5696 0 : bf1[22] = clamp_value(bf0[17] - bf0[22], stage_range[stage]);
5697 0 : bf1[23] = clamp_value(bf0[16] - bf0[23], stage_range[stage]);
5698 0 : bf1[24] = clamp_value(-bf0[24] + bf0[31], stage_range[stage]);
5699 0 : bf1[25] = clamp_value(-bf0[25] + bf0[30], stage_range[stage]);
5700 0 : bf1[26] = clamp_value(-bf0[26] + bf0[29], stage_range[stage]);
5701 0 : bf1[27] = clamp_value(-bf0[27] + bf0[28], stage_range[stage]);
5702 0 : bf1[28] = clamp_value(bf0[27] + bf0[28], stage_range[stage]);
5703 0 : bf1[29] = clamp_value(bf0[26] + bf0[29], stage_range[stage]);
5704 0 : bf1[30] = clamp_value(bf0[25] + bf0[30], stage_range[stage]);
5705 0 : bf1[31] = clamp_value(bf0[24] + bf0[31], stage_range[stage]);
5706 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5707 :
5708 : // stage 8
5709 0 : stage++;
5710 0 : bf0 = output;
5711 0 : bf1 = step;
5712 0 : bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]);
5713 0 : bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]);
5714 0 : bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]);
5715 0 : bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]);
5716 0 : bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]);
5717 0 : bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]);
5718 0 : bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]);
5719 0 : bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]);
5720 0 : bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]);
5721 0 : bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]);
5722 0 : bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]);
5723 0 : bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]);
5724 0 : bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]);
5725 0 : bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]);
5726 0 : bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]);
5727 0 : bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]);
5728 0 : bf1[16] = bf0[16];
5729 0 : bf1[17] = bf0[17];
5730 0 : bf1[18] = bf0[18];
5731 0 : bf1[19] = bf0[19];
5732 0 : bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
5733 0 : bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
5734 0 : bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
5735 0 : bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
5736 0 : bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
5737 0 : bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
5738 0 : bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
5739 0 : bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
5740 0 : bf1[28] = bf0[28];
5741 0 : bf1[29] = bf0[29];
5742 0 : bf1[30] = bf0[30];
5743 0 : bf1[31] = bf0[31];
5744 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5745 :
5746 : // stage 9
5747 0 : stage++;
5748 0 : bf0 = step;
5749 0 : bf1 = output;
5750 0 : bf1[0] = clamp_value(bf0[0] + bf0[31], stage_range[stage]);
5751 0 : bf1[1] = clamp_value(bf0[1] + bf0[30], stage_range[stage]);
5752 0 : bf1[2] = clamp_value(bf0[2] + bf0[29], stage_range[stage]);
5753 0 : bf1[3] = clamp_value(bf0[3] + bf0[28], stage_range[stage]);
5754 0 : bf1[4] = clamp_value(bf0[4] + bf0[27], stage_range[stage]);
5755 0 : bf1[5] = clamp_value(bf0[5] + bf0[26], stage_range[stage]);
5756 0 : bf1[6] = clamp_value(bf0[6] + bf0[25], stage_range[stage]);
5757 0 : bf1[7] = clamp_value(bf0[7] + bf0[24], stage_range[stage]);
5758 0 : bf1[8] = clamp_value(bf0[8] + bf0[23], stage_range[stage]);
5759 0 : bf1[9] = clamp_value(bf0[9] + bf0[22], stage_range[stage]);
5760 0 : bf1[10] = clamp_value(bf0[10] + bf0[21], stage_range[stage]);
5761 0 : bf1[11] = clamp_value(bf0[11] + bf0[20], stage_range[stage]);
5762 0 : bf1[12] = clamp_value(bf0[12] + bf0[19], stage_range[stage]);
5763 0 : bf1[13] = clamp_value(bf0[13] + bf0[18], stage_range[stage]);
5764 0 : bf1[14] = clamp_value(bf0[14] + bf0[17], stage_range[stage]);
5765 0 : bf1[15] = clamp_value(bf0[15] + bf0[16], stage_range[stage]);
5766 0 : bf1[16] = clamp_value(bf0[15] - bf0[16], stage_range[stage]);
5767 0 : bf1[17] = clamp_value(bf0[14] - bf0[17], stage_range[stage]);
5768 0 : bf1[18] = clamp_value(bf0[13] - bf0[18], stage_range[stage]);
5769 0 : bf1[19] = clamp_value(bf0[12] - bf0[19], stage_range[stage]);
5770 0 : bf1[20] = clamp_value(bf0[11] - bf0[20], stage_range[stage]);
5771 0 : bf1[21] = clamp_value(bf0[10] - bf0[21], stage_range[stage]);
5772 0 : bf1[22] = clamp_value(bf0[9] - bf0[22], stage_range[stage]);
5773 0 : bf1[23] = clamp_value(bf0[8] - bf0[23], stage_range[stage]);
5774 0 : bf1[24] = clamp_value(bf0[7] - bf0[24], stage_range[stage]);
5775 0 : bf1[25] = clamp_value(bf0[6] - bf0[25], stage_range[stage]);
5776 0 : bf1[26] = clamp_value(bf0[5] - bf0[26], stage_range[stage]);
5777 0 : bf1[27] = clamp_value(bf0[4] - bf0[27], stage_range[stage]);
5778 0 : bf1[28] = clamp_value(bf0[3] - bf0[28], stage_range[stage]);
5779 0 : bf1[29] = clamp_value(bf0[2] - bf0[29], stage_range[stage]);
5780 0 : bf1[30] = clamp_value(bf0[1] - bf0[30], stage_range[stage]);
5781 0 : bf1[31] = clamp_value(bf0[0] - bf0[31], stage_range[stage]);
5782 0 : }
5783 0 : void eb_av1_iadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
5784 : const int8_t *stage_range) {
5785 : (void)stage_range;
5786 0 : int32_t bit = cos_bit;
5787 0 : const int32_t *sinpi = sinpi_arr(bit);
5788 : int32_t s0, s1, s2, s3, s4, s5, s6, s7;
5789 :
5790 0 : int32_t x0 = input[0];
5791 0 : int32_t x1 = input[1];
5792 0 : int32_t x2 = input[2];
5793 0 : int32_t x3 = input[3];
5794 :
5795 0 : if (!(x0 | x1 | x2 | x3)) {
5796 0 : output[0] = output[1] = output[2] = output[3] = 0;
5797 0 : return;
5798 : }
5799 :
5800 0 : assert(sinpi[1] + sinpi[2] == sinpi[4]);
5801 :
5802 : // stage 1
5803 : //s0 = range_check_value(sinpi[1] * x0, stage_range[1] + bit);
5804 : //s1 = range_check_value(sinpi[2] * x0, stage_range[1] + bit);
5805 : //s2 = range_check_value(sinpi[3] * x1, stage_range[1] + bit);
5806 : //s3 = range_check_value(sinpi[4] * x2, stage_range[1] + bit);
5807 : //s4 = range_check_value(sinpi[1] * x2, stage_range[1] + bit);
5808 : //s5 = range_check_value(sinpi[2] * x3, stage_range[1] + bit);
5809 : //s6 = range_check_value(sinpi[4] * x3, stage_range[1] + bit);
5810 :
5811 0 : s0 = sinpi[1] * x0;
5812 0 : s1 = sinpi[2] * x0;
5813 0 : s2 = sinpi[3] * x1;
5814 0 : s3 = sinpi[4] * x2;
5815 0 : s4 = sinpi[1] * x2;
5816 0 : s5 = sinpi[2] * x3;
5817 0 : s6 = sinpi[4] * x3;
5818 :
5819 : // stage 2
5820 : // NOTICE: (x0 - x2) here may use one extra bit compared to the
5821 : // opt_range_row/col specified in eb_av1_gen_inv_stage_range()
5822 : //s7 = range_check_value((x0 - x2) + x3, stage_range[2]);
5823 :
5824 : //// stage 3
5825 : //s0 = range_check_value(s0 + s3, stage_range[3] + bit);
5826 : //s1 = range_check_value(s1 - s4, stage_range[3] + bit);
5827 : //s3 = range_check_value(s2, stage_range[3] + bit);
5828 : //s2 = range_check_value(sinpi[3] * s7, stage_range[3] + bit);
5829 :
5830 : //// stage 4
5831 : //s0 = range_check_value(s0 + s5, stage_range[4] + bit);
5832 : //s1 = range_check_value(s1 - s6, stage_range[4] + bit);
5833 :
5834 : //// stage 5
5835 : //x0 = range_check_value(s0 + s3, stage_range[5] + bit);
5836 : //x1 = range_check_value(s1 + s3, stage_range[5] + bit);
5837 : //x2 = range_check_value(s2, stage_range[5] + bit);
5838 : //x3 = range_check_value(s0 + s1, stage_range[5] + bit);
5839 :
5840 : //// stage 6
5841 : //x3 = range_check_value(x3 - s3, stage_range[6] + bit);
5842 :
5843 0 : s7 = (x0 - x2) + x3;
5844 :
5845 : // stage 3
5846 0 : s0 = s0 + s3;
5847 0 : s1 = s1 - s4;
5848 0 : s3 = s2;
5849 0 : s2 = sinpi[3] * s7;
5850 :
5851 : // stage 4
5852 0 : s0 = s0 + s5;
5853 0 : s1 = s1 - s6;
5854 :
5855 : // stage 5
5856 0 : x0 = s0 + s3;
5857 0 : x1 = s1 + s3;
5858 0 : x2 = s2;
5859 0 : x3 = s0 + s1;
5860 :
5861 : // stage 6
5862 0 : x3 = x3 - s3;
5863 :
5864 0 : output[0] = round_shift(x0, bit);
5865 0 : output[1] = round_shift(x1, bit);
5866 0 : output[2] = round_shift(x2, bit);
5867 0 : output[3] = round_shift(x3, bit);
5868 : //range_check_buf(6, input, output, 4, stage_range[6]);
5869 : }
5870 0 : static INLINE void clamp_buf(int32_t *buf, int32_t size, int8_t bit) {
5871 0 : for (int32_t i = 0; i < size; ++i) buf[i] = clamp_value(buf[i], bit);
5872 0 : }
5873 0 : void eb_av1_iadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
5874 : const int8_t *stage_range) {
5875 0 : assert(output != input);
5876 0 : const int32_t *cospi = cospi_arr(cos_bit);
5877 :
5878 0 : int32_t stage = 0;
5879 : int32_t *bf0, *bf1;
5880 : int32_t step[8];
5881 :
5882 : // stage 0;
5883 :
5884 : // stage 1;
5885 0 : stage++;
5886 0 : bf1 = output;
5887 0 : bf1[0] = input[7];
5888 0 : bf1[1] = input[0];
5889 0 : bf1[2] = input[5];
5890 0 : bf1[3] = input[2];
5891 0 : bf1[4] = input[3];
5892 0 : bf1[5] = input[4];
5893 0 : bf1[6] = input[1];
5894 0 : bf1[7] = input[6];
5895 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5896 :
5897 : // stage 2
5898 0 : stage++;
5899 0 : bf0 = output;
5900 0 : bf1 = step;
5901 0 : bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit);
5902 0 : bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit);
5903 0 : bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit);
5904 0 : bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit);
5905 0 : bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit);
5906 0 : bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit);
5907 0 : bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit);
5908 0 : bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit);
5909 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5910 :
5911 : // stage 3
5912 0 : stage++;
5913 0 : bf0 = step;
5914 0 : bf1 = output;
5915 0 : bf1[0] = clamp_value(bf0[0] + bf0[4], stage_range[stage]);
5916 0 : bf1[1] = clamp_value(bf0[1] + bf0[5], stage_range[stage]);
5917 0 : bf1[2] = clamp_value(bf0[2] + bf0[6], stage_range[stage]);
5918 0 : bf1[3] = clamp_value(bf0[3] + bf0[7], stage_range[stage]);
5919 0 : bf1[4] = clamp_value(bf0[0] - bf0[4], stage_range[stage]);
5920 0 : bf1[5] = clamp_value(bf0[1] - bf0[5], stage_range[stage]);
5921 0 : bf1[6] = clamp_value(bf0[2] - bf0[6], stage_range[stage]);
5922 0 : bf1[7] = clamp_value(bf0[3] - bf0[7], stage_range[stage]);
5923 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5924 :
5925 : // stage 4
5926 0 : stage++;
5927 0 : bf0 = output;
5928 0 : bf1 = step;
5929 0 : bf1[0] = bf0[0];
5930 0 : bf1[1] = bf0[1];
5931 0 : bf1[2] = bf0[2];
5932 0 : bf1[3] = bf0[3];
5933 0 : bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
5934 0 : bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit);
5935 0 : bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
5936 0 : bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit);
5937 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5938 :
5939 : // stage 5
5940 0 : stage++;
5941 0 : bf0 = step;
5942 0 : bf1 = output;
5943 0 : bf1[0] = clamp_value(bf0[0] + bf0[2], stage_range[stage]);
5944 0 : bf1[1] = clamp_value(bf0[1] + bf0[3], stage_range[stage]);
5945 0 : bf1[2] = clamp_value(bf0[0] - bf0[2], stage_range[stage]);
5946 0 : bf1[3] = clamp_value(bf0[1] - bf0[3], stage_range[stage]);
5947 0 : bf1[4] = clamp_value(bf0[4] + bf0[6], stage_range[stage]);
5948 0 : bf1[5] = clamp_value(bf0[5] + bf0[7], stage_range[stage]);
5949 0 : bf1[6] = clamp_value(bf0[4] - bf0[6], stage_range[stage]);
5950 0 : bf1[7] = clamp_value(bf0[5] - bf0[7], stage_range[stage]);
5951 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5952 :
5953 : // stage 6
5954 0 : stage++;
5955 0 : bf0 = output;
5956 0 : bf1 = step;
5957 0 : bf1[0] = bf0[0];
5958 0 : bf1[1] = bf0[1];
5959 0 : bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
5960 0 : bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit);
5961 0 : bf1[4] = bf0[4];
5962 0 : bf1[5] = bf0[5];
5963 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
5964 0 : bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit);
5965 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
5966 :
5967 : // stage 7
5968 0 : stage++;
5969 0 : bf0 = step;
5970 0 : bf1 = output;
5971 0 : bf1[0] = bf0[0];
5972 0 : bf1[1] = -bf0[4];
5973 0 : bf1[2] = bf0[6];
5974 0 : bf1[3] = -bf0[2];
5975 0 : bf1[4] = bf0[3];
5976 0 : bf1[5] = -bf0[7];
5977 0 : bf1[6] = bf0[5];
5978 0 : bf1[7] = -bf0[1];
5979 0 : }
5980 0 : void eb_av1_iadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
5981 : const int8_t *stage_range) {
5982 0 : assert(output != input);
5983 0 : const int32_t *cospi = cospi_arr(cos_bit);
5984 :
5985 0 : int32_t stage = 0;
5986 : int32_t *bf0, *bf1;
5987 : int32_t step[16];
5988 :
5989 : // stage 0;
5990 :
5991 : // stage 1;
5992 0 : stage++;
5993 0 : bf1 = output;
5994 0 : bf1[0] = input[15];
5995 0 : bf1[1] = input[0];
5996 0 : bf1[2] = input[13];
5997 0 : bf1[3] = input[2];
5998 0 : bf1[4] = input[11];
5999 0 : bf1[5] = input[4];
6000 0 : bf1[6] = input[9];
6001 0 : bf1[7] = input[6];
6002 0 : bf1[8] = input[7];
6003 0 : bf1[9] = input[8];
6004 0 : bf1[10] = input[5];
6005 0 : bf1[11] = input[10];
6006 0 : bf1[12] = input[3];
6007 0 : bf1[13] = input[12];
6008 0 : bf1[14] = input[1];
6009 0 : bf1[15] = input[14];
6010 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
6011 :
6012 : // stage 2
6013 0 : stage++;
6014 0 : bf0 = output;
6015 0 : bf1 = step;
6016 0 : bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit);
6017 0 : bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit);
6018 0 : bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit);
6019 0 : bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit);
6020 0 : bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit);
6021 0 : bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit);
6022 0 : bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit);
6023 0 : bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit);
6024 0 : bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit);
6025 0 : bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit);
6026 0 : bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit);
6027 0 : bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit);
6028 0 : bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit);
6029 0 : bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit);
6030 0 : bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit);
6031 0 : bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit);
6032 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
6033 :
6034 : // stage 3
6035 0 : stage++;
6036 0 : bf0 = step;
6037 0 : bf1 = output;
6038 0 : bf1[0] = clamp_value(bf0[0] + bf0[8], stage_range[stage]);
6039 0 : bf1[1] = clamp_value(bf0[1] + bf0[9], stage_range[stage]);
6040 0 : bf1[2] = clamp_value(bf0[2] + bf0[10], stage_range[stage]);
6041 0 : bf1[3] = clamp_value(bf0[3] + bf0[11], stage_range[stage]);
6042 0 : bf1[4] = clamp_value(bf0[4] + bf0[12], stage_range[stage]);
6043 0 : bf1[5] = clamp_value(bf0[5] + bf0[13], stage_range[stage]);
6044 0 : bf1[6] = clamp_value(bf0[6] + bf0[14], stage_range[stage]);
6045 0 : bf1[7] = clamp_value(bf0[7] + bf0[15], stage_range[stage]);
6046 0 : bf1[8] = clamp_value(bf0[0] - bf0[8], stage_range[stage]);
6047 0 : bf1[9] = clamp_value(bf0[1] - bf0[9], stage_range[stage]);
6048 0 : bf1[10] = clamp_value(bf0[2] - bf0[10], stage_range[stage]);
6049 0 : bf1[11] = clamp_value(bf0[3] - bf0[11], stage_range[stage]);
6050 0 : bf1[12] = clamp_value(bf0[4] - bf0[12], stage_range[stage]);
6051 0 : bf1[13] = clamp_value(bf0[5] - bf0[13], stage_range[stage]);
6052 0 : bf1[14] = clamp_value(bf0[6] - bf0[14], stage_range[stage]);
6053 0 : bf1[15] = clamp_value(bf0[7] - bf0[15], stage_range[stage]);
6054 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
6055 :
6056 : // stage 4
6057 0 : stage++;
6058 0 : bf0 = output;
6059 0 : bf1 = step;
6060 0 : bf1[0] = bf0[0];
6061 0 : bf1[1] = bf0[1];
6062 0 : bf1[2] = bf0[2];
6063 0 : bf1[3] = bf0[3];
6064 0 : bf1[4] = bf0[4];
6065 0 : bf1[5] = bf0[5];
6066 0 : bf1[6] = bf0[6];
6067 0 : bf1[7] = bf0[7];
6068 0 : bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit);
6069 0 : bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit);
6070 0 : bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit);
6071 0 : bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit);
6072 0 : bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit);
6073 0 : bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit);
6074 0 : bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit);
6075 0 : bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit);
6076 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
6077 :
6078 : // stage 5
6079 0 : stage++;
6080 0 : bf0 = step;
6081 0 : bf1 = output;
6082 0 : bf1[0] = clamp_value(bf0[0] + bf0[4], stage_range[stage]);
6083 0 : bf1[1] = clamp_value(bf0[1] + bf0[5], stage_range[stage]);
6084 0 : bf1[2] = clamp_value(bf0[2] + bf0[6], stage_range[stage]);
6085 0 : bf1[3] = clamp_value(bf0[3] + bf0[7], stage_range[stage]);
6086 0 : bf1[4] = clamp_value(bf0[0] - bf0[4], stage_range[stage]);
6087 0 : bf1[5] = clamp_value(bf0[1] - bf0[5], stage_range[stage]);
6088 0 : bf1[6] = clamp_value(bf0[2] - bf0[6], stage_range[stage]);
6089 0 : bf1[7] = clamp_value(bf0[3] - bf0[7], stage_range[stage]);
6090 0 : bf1[8] = clamp_value(bf0[8] + bf0[12], stage_range[stage]);
6091 0 : bf1[9] = clamp_value(bf0[9] + bf0[13], stage_range[stage]);
6092 0 : bf1[10] = clamp_value(bf0[10] + bf0[14], stage_range[stage]);
6093 0 : bf1[11] = clamp_value(bf0[11] + bf0[15], stage_range[stage]);
6094 0 : bf1[12] = clamp_value(bf0[8] - bf0[12], stage_range[stage]);
6095 0 : bf1[13] = clamp_value(bf0[9] - bf0[13], stage_range[stage]);
6096 0 : bf1[14] = clamp_value(bf0[10] - bf0[14], stage_range[stage]);
6097 0 : bf1[15] = clamp_value(bf0[11] - bf0[15], stage_range[stage]);
6098 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
6099 :
6100 : // stage 6
6101 0 : stage++;
6102 0 : bf0 = output;
6103 0 : bf1 = step;
6104 0 : bf1[0] = bf0[0];
6105 0 : bf1[1] = bf0[1];
6106 0 : bf1[2] = bf0[2];
6107 0 : bf1[3] = bf0[3];
6108 0 : bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
6109 0 : bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit);
6110 0 : bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
6111 0 : bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit);
6112 0 : bf1[8] = bf0[8];
6113 0 : bf1[9] = bf0[9];
6114 0 : bf1[10] = bf0[10];
6115 0 : bf1[11] = bf0[11];
6116 0 : bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit);
6117 0 : bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit);
6118 0 : bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit);
6119 0 : bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit);
6120 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
6121 :
6122 : // stage 7
6123 0 : stage++;
6124 0 : bf0 = step;
6125 0 : bf1 = output;
6126 0 : bf1[0] = clamp_value(bf0[0] + bf0[2], stage_range[stage]);
6127 0 : bf1[1] = clamp_value(bf0[1] + bf0[3], stage_range[stage]);
6128 0 : bf1[2] = clamp_value(bf0[0] - bf0[2], stage_range[stage]);
6129 0 : bf1[3] = clamp_value(bf0[1] - bf0[3], stage_range[stage]);
6130 0 : bf1[4] = clamp_value(bf0[4] + bf0[6], stage_range[stage]);
6131 0 : bf1[5] = clamp_value(bf0[5] + bf0[7], stage_range[stage]);
6132 0 : bf1[6] = clamp_value(bf0[4] - bf0[6], stage_range[stage]);
6133 0 : bf1[7] = clamp_value(bf0[5] - bf0[7], stage_range[stage]);
6134 0 : bf1[8] = clamp_value(bf0[8] + bf0[10], stage_range[stage]);
6135 0 : bf1[9] = clamp_value(bf0[9] + bf0[11], stage_range[stage]);
6136 0 : bf1[10] = clamp_value(bf0[8] - bf0[10], stage_range[stage]);
6137 0 : bf1[11] = clamp_value(bf0[9] - bf0[11], stage_range[stage]);
6138 0 : bf1[12] = clamp_value(bf0[12] + bf0[14], stage_range[stage]);
6139 0 : bf1[13] = clamp_value(bf0[13] + bf0[15], stage_range[stage]);
6140 0 : bf1[14] = clamp_value(bf0[12] - bf0[14], stage_range[stage]);
6141 0 : bf1[15] = clamp_value(bf0[13] - bf0[15], stage_range[stage]);
6142 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
6143 :
6144 : // stage 8
6145 0 : stage++;
6146 0 : bf0 = output;
6147 0 : bf1 = step;
6148 0 : bf1[0] = bf0[0];
6149 0 : bf1[1] = bf0[1];
6150 0 : bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
6151 0 : bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit);
6152 0 : bf1[4] = bf0[4];
6153 0 : bf1[5] = bf0[5];
6154 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
6155 0 : bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit);
6156 0 : bf1[8] = bf0[8];
6157 0 : bf1[9] = bf0[9];
6158 0 : bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit);
6159 0 : bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit);
6160 0 : bf1[12] = bf0[12];
6161 0 : bf1[13] = bf0[13];
6162 0 : bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit);
6163 0 : bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit);
6164 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
6165 :
6166 : // stage 9
6167 0 : stage++;
6168 0 : bf0 = step;
6169 0 : bf1 = output;
6170 0 : bf1[0] = bf0[0];
6171 0 : bf1[1] = -bf0[8];
6172 0 : bf1[2] = bf0[12];
6173 0 : bf1[3] = -bf0[4];
6174 0 : bf1[4] = bf0[6];
6175 0 : bf1[5] = -bf0[14];
6176 0 : bf1[6] = bf0[10];
6177 0 : bf1[7] = -bf0[2];
6178 0 : bf1[8] = bf0[3];
6179 0 : bf1[9] = -bf0[11];
6180 0 : bf1[10] = bf0[15];
6181 0 : bf1[11] = -bf0[7];
6182 0 : bf1[12] = bf0[5];
6183 0 : bf1[13] = -bf0[13];
6184 0 : bf1[14] = bf0[9];
6185 0 : bf1[15] = -bf0[1];
6186 0 : }
6187 0 : void av1_iadst32_new(const int32_t *input, int32_t *output, int8_t cos_bit,
6188 : const int8_t *stage_range) {
6189 0 : const int32_t size = 32;
6190 : const int32_t *cospi;
6191 :
6192 0 : int32_t stage = 0;
6193 : int32_t *bf0, *bf1;
6194 : int32_t step[32];
6195 :
6196 : // stage 0;
6197 0 : clamp_buf((int32_t *)input, size, stage_range[stage]);
6198 :
6199 : // stage 1;
6200 0 : stage++;
6201 0 : assert(output != input);
6202 0 : bf1 = output;
6203 0 : bf1[0] = input[0];
6204 0 : bf1[1] = -input[31];
6205 0 : bf1[2] = -input[15];
6206 0 : bf1[3] = input[16];
6207 0 : bf1[4] = -input[7];
6208 0 : bf1[5] = input[24];
6209 0 : bf1[6] = input[8];
6210 0 : bf1[7] = -input[23];
6211 0 : bf1[8] = -input[3];
6212 0 : bf1[9] = input[28];
6213 0 : bf1[10] = input[12];
6214 0 : bf1[11] = -input[19];
6215 0 : bf1[12] = input[4];
6216 0 : bf1[13] = -input[27];
6217 0 : bf1[14] = -input[11];
6218 0 : bf1[15] = input[20];
6219 0 : bf1[16] = -input[1];
6220 0 : bf1[17] = input[30];
6221 0 : bf1[18] = input[14];
6222 0 : bf1[19] = -input[17];
6223 0 : bf1[20] = input[6];
6224 0 : bf1[21] = -input[25];
6225 0 : bf1[22] = -input[9];
6226 0 : bf1[23] = input[22];
6227 0 : bf1[24] = input[2];
6228 0 : bf1[25] = -input[29];
6229 0 : bf1[26] = -input[13];
6230 0 : bf1[27] = input[18];
6231 0 : bf1[28] = -input[5];
6232 0 : bf1[29] = input[26];
6233 0 : bf1[30] = input[10];
6234 0 : bf1[31] = -input[21];
6235 0 : clamp_buf(bf1, size, stage_range[stage]);
6236 :
6237 : // stage 2
6238 0 : stage++;
6239 0 : cospi = cospi_arr(cos_bit);
6240 0 : bf0 = output;
6241 0 : bf1 = step;
6242 0 : bf1[0] = bf0[0];
6243 0 : bf1[1] = bf0[1];
6244 0 : bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
6245 0 : bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit);
6246 0 : bf1[4] = bf0[4];
6247 0 : bf1[5] = bf0[5];
6248 0 : bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
6249 0 : bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit);
6250 0 : bf1[8] = bf0[8];
6251 0 : bf1[9] = bf0[9];
6252 0 : bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit);
6253 0 : bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit);
6254 0 : bf1[12] = bf0[12];
6255 0 : bf1[13] = bf0[13];
6256 0 : bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit);
6257 0 : bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit);
6258 0 : bf1[16] = bf0[16];
6259 0 : bf1[17] = bf0[17];
6260 0 : bf1[18] = half_btf(cospi[32], bf0[18], cospi[32], bf0[19], cos_bit);
6261 0 : bf1[19] = half_btf(cospi[32], bf0[18], -cospi[32], bf0[19], cos_bit);
6262 0 : bf1[20] = bf0[20];
6263 0 : bf1[21] = bf0[21];
6264 0 : bf1[22] = half_btf(cospi[32], bf0[22], cospi[32], bf0[23], cos_bit);
6265 0 : bf1[23] = half_btf(cospi[32], bf0[22], -cospi[32], bf0[23], cos_bit);
6266 0 : bf1[24] = bf0[24];
6267 0 : bf1[25] = bf0[25];
6268 0 : bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[27], cos_bit);
6269 0 : bf1[27] = half_btf(cospi[32], bf0[26], -cospi[32], bf0[27], cos_bit);
6270 0 : bf1[28] = bf0[28];
6271 0 : bf1[29] = bf0[29];
6272 0 : bf1[30] = half_btf(cospi[32], bf0[30], cospi[32], bf0[31], cos_bit);
6273 0 : bf1[31] = half_btf(cospi[32], bf0[30], -cospi[32], bf0[31], cos_bit);
6274 0 : clamp_buf(bf1, size, stage_range[stage]);
6275 :
6276 : // stage 3
6277 0 : stage++;
6278 0 : bf0 = step;
6279 0 : bf1 = output;
6280 0 : bf1[0] = bf0[0] + bf0[2];
6281 0 : bf1[1] = bf0[1] + bf0[3];
6282 0 : bf1[2] = bf0[0] - bf0[2];
6283 0 : bf1[3] = bf0[1] - bf0[3];
6284 0 : bf1[4] = bf0[4] + bf0[6];
6285 0 : bf1[5] = bf0[5] + bf0[7];
6286 0 : bf1[6] = bf0[4] - bf0[6];
6287 0 : bf1[7] = bf0[5] - bf0[7];
6288 0 : bf1[8] = bf0[8] + bf0[10];
6289 0 : bf1[9] = bf0[9] + bf0[11];
6290 0 : bf1[10] = bf0[8] - bf0[10];
6291 0 : bf1[11] = bf0[9] - bf0[11];
6292 0 : bf1[12] = bf0[12] + bf0[14];
6293 0 : bf1[13] = bf0[13] + bf0[15];
6294 0 : bf1[14] = bf0[12] - bf0[14];
6295 0 : bf1[15] = bf0[13] - bf0[15];
6296 0 : bf1[16] = bf0[16] + bf0[18];
6297 0 : bf1[17] = bf0[17] + bf0[19];
6298 0 : bf1[18] = bf0[16] - bf0[18];
6299 0 : bf1[19] = bf0[17] - bf0[19];
6300 0 : bf1[20] = bf0[20] + bf0[22];
6301 0 : bf1[21] = bf0[21] + bf0[23];
6302 0 : bf1[22] = bf0[20] - bf0[22];
6303 0 : bf1[23] = bf0[21] - bf0[23];
6304 0 : bf1[24] = bf0[24] + bf0[26];
6305 0 : bf1[25] = bf0[25] + bf0[27];
6306 0 : bf1[26] = bf0[24] - bf0[26];
6307 0 : bf1[27] = bf0[25] - bf0[27];
6308 0 : bf1[28] = bf0[28] + bf0[30];
6309 0 : bf1[29] = bf0[29] + bf0[31];
6310 0 : bf1[30] = bf0[28] - bf0[30];
6311 0 : bf1[31] = bf0[29] - bf0[31];
6312 0 : clamp_buf(bf1, size, stage_range[stage]);
6313 :
6314 : // stage 4
6315 0 : stage++;
6316 0 : cospi = cospi_arr(cos_bit);
6317 0 : bf0 = output;
6318 0 : bf1 = step;
6319 0 : bf1[0] = bf0[0];
6320 0 : bf1[1] = bf0[1];
6321 0 : bf1[2] = bf0[2];
6322 0 : bf1[3] = bf0[3];
6323 0 : bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
6324 0 : bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit);
6325 0 : bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
6326 0 : bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit);
6327 0 : bf1[8] = bf0[8];
6328 0 : bf1[9] = bf0[9];
6329 0 : bf1[10] = bf0[10];
6330 0 : bf1[11] = bf0[11];
6331 0 : bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit);
6332 0 : bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit);
6333 0 : bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit);
6334 0 : bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit);
6335 0 : bf1[16] = bf0[16];
6336 0 : bf1[17] = bf0[17];
6337 0 : bf1[18] = bf0[18];
6338 0 : bf1[19] = bf0[19];
6339 0 : bf1[20] = half_btf(cospi[16], bf0[20], cospi[48], bf0[21], cos_bit);
6340 0 : bf1[21] = half_btf(cospi[48], bf0[20], -cospi[16], bf0[21], cos_bit);
6341 0 : bf1[22] = half_btf(-cospi[48], bf0[22], cospi[16], bf0[23], cos_bit);
6342 0 : bf1[23] = half_btf(cospi[16], bf0[22], cospi[48], bf0[23], cos_bit);
6343 0 : bf1[24] = bf0[24];
6344 0 : bf1[25] = bf0[25];
6345 0 : bf1[26] = bf0[26];
6346 0 : bf1[27] = bf0[27];
6347 0 : bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[29], cos_bit);
6348 0 : bf1[29] = half_btf(cospi[48], bf0[28], -cospi[16], bf0[29], cos_bit);
6349 0 : bf1[30] = half_btf(-cospi[48], bf0[30], cospi[16], bf0[31], cos_bit);
6350 0 : bf1[31] = half_btf(cospi[16], bf0[30], cospi[48], bf0[31], cos_bit);
6351 0 : clamp_buf(bf1, size, stage_range[stage]);
6352 :
6353 : // stage 5
6354 0 : stage++;
6355 0 : bf0 = step;
6356 0 : bf1 = output;
6357 0 : bf1[0] = bf0[0] + bf0[4];
6358 0 : bf1[1] = bf0[1] + bf0[5];
6359 0 : bf1[2] = bf0[2] + bf0[6];
6360 0 : bf1[3] = bf0[3] + bf0[7];
6361 0 : bf1[4] = bf0[0] - bf0[4];
6362 0 : bf1[5] = bf0[1] - bf0[5];
6363 0 : bf1[6] = bf0[2] - bf0[6];
6364 0 : bf1[7] = bf0[3] - bf0[7];
6365 0 : bf1[8] = bf0[8] + bf0[12];
6366 0 : bf1[9] = bf0[9] + bf0[13];
6367 0 : bf1[10] = bf0[10] + bf0[14];
6368 0 : bf1[11] = bf0[11] + bf0[15];
6369 0 : bf1[12] = bf0[8] - bf0[12];
6370 0 : bf1[13] = bf0[9] - bf0[13];
6371 0 : bf1[14] = bf0[10] - bf0[14];
6372 0 : bf1[15] = bf0[11] - bf0[15];
6373 0 : bf1[16] = bf0[16] + bf0[20];
6374 0 : bf1[17] = bf0[17] + bf0[21];
6375 0 : bf1[18] = bf0[18] + bf0[22];
6376 0 : bf1[19] = bf0[19] + bf0[23];
6377 0 : bf1[20] = bf0[16] - bf0[20];
6378 0 : bf1[21] = bf0[17] - bf0[21];
6379 0 : bf1[22] = bf0[18] - bf0[22];
6380 0 : bf1[23] = bf0[19] - bf0[23];
6381 0 : bf1[24] = bf0[24] + bf0[28];
6382 0 : bf1[25] = bf0[25] + bf0[29];
6383 0 : bf1[26] = bf0[26] + bf0[30];
6384 0 : bf1[27] = bf0[27] + bf0[31];
6385 0 : bf1[28] = bf0[24] - bf0[28];
6386 0 : bf1[29] = bf0[25] - bf0[29];
6387 0 : bf1[30] = bf0[26] - bf0[30];
6388 0 : bf1[31] = bf0[27] - bf0[31];
6389 0 : clamp_buf(bf1, size, stage_range[stage]);
6390 :
6391 : // stage 6
6392 0 : stage++;
6393 0 : cospi = cospi_arr(cos_bit);
6394 0 : bf0 = output;
6395 0 : bf1 = step;
6396 0 : bf1[0] = bf0[0];
6397 0 : bf1[1] = bf0[1];
6398 0 : bf1[2] = bf0[2];
6399 0 : bf1[3] = bf0[3];
6400 0 : bf1[4] = bf0[4];
6401 0 : bf1[5] = bf0[5];
6402 0 : bf1[6] = bf0[6];
6403 0 : bf1[7] = bf0[7];
6404 0 : bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit);
6405 0 : bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit);
6406 0 : bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit);
6407 0 : bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit);
6408 0 : bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit);
6409 0 : bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit);
6410 0 : bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit);
6411 0 : bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit);
6412 0 : bf1[16] = bf0[16];
6413 0 : bf1[17] = bf0[17];
6414 0 : bf1[18] = bf0[18];
6415 0 : bf1[19] = bf0[19];
6416 0 : bf1[20] = bf0[20];
6417 0 : bf1[21] = bf0[21];
6418 0 : bf1[22] = bf0[22];
6419 0 : bf1[23] = bf0[23];
6420 0 : bf1[24] = half_btf(cospi[8], bf0[24], cospi[56], bf0[25], cos_bit);
6421 0 : bf1[25] = half_btf(cospi[56], bf0[24], -cospi[8], bf0[25], cos_bit);
6422 0 : bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[27], cos_bit);
6423 0 : bf1[27] = half_btf(cospi[24], bf0[26], -cospi[40], bf0[27], cos_bit);
6424 0 : bf1[28] = half_btf(-cospi[56], bf0[28], cospi[8], bf0[29], cos_bit);
6425 0 : bf1[29] = half_btf(cospi[8], bf0[28], cospi[56], bf0[29], cos_bit);
6426 0 : bf1[30] = half_btf(-cospi[24], bf0[30], cospi[40], bf0[31], cos_bit);
6427 0 : bf1[31] = half_btf(cospi[40], bf0[30], cospi[24], bf0[31], cos_bit);
6428 0 : clamp_buf(bf1, size, stage_range[stage]);
6429 :
6430 : // stage 7
6431 0 : stage++;
6432 0 : bf0 = step;
6433 0 : bf1 = output;
6434 0 : bf1[0] = bf0[0] + bf0[8];
6435 0 : bf1[1] = bf0[1] + bf0[9];
6436 0 : bf1[2] = bf0[2] + bf0[10];
6437 0 : bf1[3] = bf0[3] + bf0[11];
6438 0 : bf1[4] = bf0[4] + bf0[12];
6439 0 : bf1[5] = bf0[5] + bf0[13];
6440 0 : bf1[6] = bf0[6] + bf0[14];
6441 0 : bf1[7] = bf0[7] + bf0[15];
6442 0 : bf1[8] = bf0[0] - bf0[8];
6443 0 : bf1[9] = bf0[1] - bf0[9];
6444 0 : bf1[10] = bf0[2] - bf0[10];
6445 0 : bf1[11] = bf0[3] - bf0[11];
6446 0 : bf1[12] = bf0[4] - bf0[12];
6447 0 : bf1[13] = bf0[5] - bf0[13];
6448 0 : bf1[14] = bf0[6] - bf0[14];
6449 0 : bf1[15] = bf0[7] - bf0[15];
6450 0 : bf1[16] = bf0[16] + bf0[24];
6451 0 : bf1[17] = bf0[17] + bf0[25];
6452 0 : bf1[18] = bf0[18] + bf0[26];
6453 0 : bf1[19] = bf0[19] + bf0[27];
6454 0 : bf1[20] = bf0[20] + bf0[28];
6455 0 : bf1[21] = bf0[21] + bf0[29];
6456 0 : bf1[22] = bf0[22] + bf0[30];
6457 0 : bf1[23] = bf0[23] + bf0[31];
6458 0 : bf1[24] = bf0[16] - bf0[24];
6459 0 : bf1[25] = bf0[17] - bf0[25];
6460 0 : bf1[26] = bf0[18] - bf0[26];
6461 0 : bf1[27] = bf0[19] - bf0[27];
6462 0 : bf1[28] = bf0[20] - bf0[28];
6463 0 : bf1[29] = bf0[21] - bf0[29];
6464 0 : bf1[30] = bf0[22] - bf0[30];
6465 0 : bf1[31] = bf0[23] - bf0[31];
6466 0 : clamp_buf(bf1, size, stage_range[stage]);
6467 :
6468 : // stage 8
6469 0 : stage++;
6470 0 : cospi = cospi_arr(cos_bit);
6471 0 : bf0 = output;
6472 0 : bf1 = step;
6473 0 : bf1[0] = bf0[0];
6474 0 : bf1[1] = bf0[1];
6475 0 : bf1[2] = bf0[2];
6476 0 : bf1[3] = bf0[3];
6477 0 : bf1[4] = bf0[4];
6478 0 : bf1[5] = bf0[5];
6479 0 : bf1[6] = bf0[6];
6480 0 : bf1[7] = bf0[7];
6481 0 : bf1[8] = bf0[8];
6482 0 : bf1[9] = bf0[9];
6483 0 : bf1[10] = bf0[10];
6484 0 : bf1[11] = bf0[11];
6485 0 : bf1[12] = bf0[12];
6486 0 : bf1[13] = bf0[13];
6487 0 : bf1[14] = bf0[14];
6488 0 : bf1[15] = bf0[15];
6489 0 : bf1[16] = half_btf(cospi[4], bf0[16], cospi[60], bf0[17], cos_bit);
6490 0 : bf1[17] = half_btf(cospi[60], bf0[16], -cospi[4], bf0[17], cos_bit);
6491 0 : bf1[18] = half_btf(cospi[20], bf0[18], cospi[44], bf0[19], cos_bit);
6492 0 : bf1[19] = half_btf(cospi[44], bf0[18], -cospi[20], bf0[19], cos_bit);
6493 0 : bf1[20] = half_btf(cospi[36], bf0[20], cospi[28], bf0[21], cos_bit);
6494 0 : bf1[21] = half_btf(cospi[28], bf0[20], -cospi[36], bf0[21], cos_bit);
6495 0 : bf1[22] = half_btf(cospi[52], bf0[22], cospi[12], bf0[23], cos_bit);
6496 0 : bf1[23] = half_btf(cospi[12], bf0[22], -cospi[52], bf0[23], cos_bit);
6497 0 : bf1[24] = half_btf(-cospi[60], bf0[24], cospi[4], bf0[25], cos_bit);
6498 0 : bf1[25] = half_btf(cospi[4], bf0[24], cospi[60], bf0[25], cos_bit);
6499 0 : bf1[26] = half_btf(-cospi[44], bf0[26], cospi[20], bf0[27], cos_bit);
6500 0 : bf1[27] = half_btf(cospi[20], bf0[26], cospi[44], bf0[27], cos_bit);
6501 0 : bf1[28] = half_btf(-cospi[28], bf0[28], cospi[36], bf0[29], cos_bit);
6502 0 : bf1[29] = half_btf(cospi[36], bf0[28], cospi[28], bf0[29], cos_bit);
6503 0 : bf1[30] = half_btf(-cospi[12], bf0[30], cospi[52], bf0[31], cos_bit);
6504 0 : bf1[31] = half_btf(cospi[52], bf0[30], cospi[12], bf0[31], cos_bit);
6505 0 : clamp_buf(bf1, size, stage_range[stage]);
6506 :
6507 : // stage 9
6508 0 : stage++;
6509 0 : bf0 = step;
6510 0 : bf1 = output;
6511 0 : bf1[0] = bf0[0] + bf0[16];
6512 0 : bf1[1] = bf0[1] + bf0[17];
6513 0 : bf1[2] = bf0[2] + bf0[18];
6514 0 : bf1[3] = bf0[3] + bf0[19];
6515 0 : bf1[4] = bf0[4] + bf0[20];
6516 0 : bf1[5] = bf0[5] + bf0[21];
6517 0 : bf1[6] = bf0[6] + bf0[22];
6518 0 : bf1[7] = bf0[7] + bf0[23];
6519 0 : bf1[8] = bf0[8] + bf0[24];
6520 0 : bf1[9] = bf0[9] + bf0[25];
6521 0 : bf1[10] = bf0[10] + bf0[26];
6522 0 : bf1[11] = bf0[11] + bf0[27];
6523 0 : bf1[12] = bf0[12] + bf0[28];
6524 0 : bf1[13] = bf0[13] + bf0[29];
6525 0 : bf1[14] = bf0[14] + bf0[30];
6526 0 : bf1[15] = bf0[15] + bf0[31];
6527 0 : bf1[16] = bf0[0] - bf0[16];
6528 0 : bf1[17] = bf0[1] - bf0[17];
6529 0 : bf1[18] = bf0[2] - bf0[18];
6530 0 : bf1[19] = bf0[3] - bf0[19];
6531 0 : bf1[20] = bf0[4] - bf0[20];
6532 0 : bf1[21] = bf0[5] - bf0[21];
6533 0 : bf1[22] = bf0[6] - bf0[22];
6534 0 : bf1[23] = bf0[7] - bf0[23];
6535 0 : bf1[24] = bf0[8] - bf0[24];
6536 0 : bf1[25] = bf0[9] - bf0[25];
6537 0 : bf1[26] = bf0[10] - bf0[26];
6538 0 : bf1[27] = bf0[11] - bf0[27];
6539 0 : bf1[28] = bf0[12] - bf0[28];
6540 0 : bf1[29] = bf0[13] - bf0[29];
6541 0 : bf1[30] = bf0[14] - bf0[30];
6542 0 : bf1[31] = bf0[15] - bf0[31];
6543 0 : clamp_buf(bf1, size, stage_range[stage]);
6544 :
6545 : // stage 10
6546 0 : stage++;
6547 0 : cospi = cospi_arr(cos_bit);
6548 0 : bf0 = output;
6549 0 : bf1 = step;
6550 0 : bf1[0] = half_btf(cospi[1], bf0[0], cospi[63], bf0[1], cos_bit);
6551 0 : bf1[1] = half_btf(cospi[63], bf0[0], -cospi[1], bf0[1], cos_bit);
6552 0 : bf1[2] = half_btf(cospi[5], bf0[2], cospi[59], bf0[3], cos_bit);
6553 0 : bf1[3] = half_btf(cospi[59], bf0[2], -cospi[5], bf0[3], cos_bit);
6554 0 : bf1[4] = half_btf(cospi[9], bf0[4], cospi[55], bf0[5], cos_bit);
6555 0 : bf1[5] = half_btf(cospi[55], bf0[4], -cospi[9], bf0[5], cos_bit);
6556 0 : bf1[6] = half_btf(cospi[13], bf0[6], cospi[51], bf0[7], cos_bit);
6557 0 : bf1[7] = half_btf(cospi[51], bf0[6], -cospi[13], bf0[7], cos_bit);
6558 0 : bf1[8] = half_btf(cospi[17], bf0[8], cospi[47], bf0[9], cos_bit);
6559 0 : bf1[9] = half_btf(cospi[47], bf0[8], -cospi[17], bf0[9], cos_bit);
6560 0 : bf1[10] = half_btf(cospi[21], bf0[10], cospi[43], bf0[11], cos_bit);
6561 0 : bf1[11] = half_btf(cospi[43], bf0[10], -cospi[21], bf0[11], cos_bit);
6562 0 : bf1[12] = half_btf(cospi[25], bf0[12], cospi[39], bf0[13], cos_bit);
6563 0 : bf1[13] = half_btf(cospi[39], bf0[12], -cospi[25], bf0[13], cos_bit);
6564 0 : bf1[14] = half_btf(cospi[29], bf0[14], cospi[35], bf0[15], cos_bit);
6565 0 : bf1[15] = half_btf(cospi[35], bf0[14], -cospi[29], bf0[15], cos_bit);
6566 0 : bf1[16] = half_btf(cospi[33], bf0[16], cospi[31], bf0[17], cos_bit);
6567 0 : bf1[17] = half_btf(cospi[31], bf0[16], -cospi[33], bf0[17], cos_bit);
6568 0 : bf1[18] = half_btf(cospi[37], bf0[18], cospi[27], bf0[19], cos_bit);
6569 0 : bf1[19] = half_btf(cospi[27], bf0[18], -cospi[37], bf0[19], cos_bit);
6570 0 : bf1[20] = half_btf(cospi[41], bf0[20], cospi[23], bf0[21], cos_bit);
6571 0 : bf1[21] = half_btf(cospi[23], bf0[20], -cospi[41], bf0[21], cos_bit);
6572 0 : bf1[22] = half_btf(cospi[45], bf0[22], cospi[19], bf0[23], cos_bit);
6573 0 : bf1[23] = half_btf(cospi[19], bf0[22], -cospi[45], bf0[23], cos_bit);
6574 0 : bf1[24] = half_btf(cospi[49], bf0[24], cospi[15], bf0[25], cos_bit);
6575 0 : bf1[25] = half_btf(cospi[15], bf0[24], -cospi[49], bf0[25], cos_bit);
6576 0 : bf1[26] = half_btf(cospi[53], bf0[26], cospi[11], bf0[27], cos_bit);
6577 0 : bf1[27] = half_btf(cospi[11], bf0[26], -cospi[53], bf0[27], cos_bit);
6578 0 : bf1[28] = half_btf(cospi[57], bf0[28], cospi[7], bf0[29], cos_bit);
6579 0 : bf1[29] = half_btf(cospi[7], bf0[28], -cospi[57], bf0[29], cos_bit);
6580 0 : bf1[30] = half_btf(cospi[61], bf0[30], cospi[3], bf0[31], cos_bit);
6581 0 : bf1[31] = half_btf(cospi[3], bf0[30], -cospi[61], bf0[31], cos_bit);
6582 0 : clamp_buf(bf1, size, stage_range[stage]);
6583 :
6584 : // stage 11
6585 0 : stage++;
6586 0 : bf0 = step;
6587 0 : bf1 = output;
6588 0 : bf1[0] = bf0[1];
6589 0 : bf1[1] = bf0[30];
6590 0 : bf1[2] = bf0[3];
6591 0 : bf1[3] = bf0[28];
6592 0 : bf1[4] = bf0[5];
6593 0 : bf1[5] = bf0[26];
6594 0 : bf1[6] = bf0[7];
6595 0 : bf1[7] = bf0[24];
6596 0 : bf1[8] = bf0[9];
6597 0 : bf1[9] = bf0[22];
6598 0 : bf1[10] = bf0[11];
6599 0 : bf1[11] = bf0[20];
6600 0 : bf1[12] = bf0[13];
6601 0 : bf1[13] = bf0[18];
6602 0 : bf1[14] = bf0[15];
6603 0 : bf1[15] = bf0[16];
6604 0 : bf1[16] = bf0[17];
6605 0 : bf1[17] = bf0[14];
6606 0 : bf1[18] = bf0[19];
6607 0 : bf1[19] = bf0[12];
6608 0 : bf1[20] = bf0[21];
6609 0 : bf1[21] = bf0[10];
6610 0 : bf1[22] = bf0[23];
6611 0 : bf1[23] = bf0[8];
6612 0 : bf1[24] = bf0[25];
6613 0 : bf1[25] = bf0[6];
6614 0 : bf1[26] = bf0[27];
6615 0 : bf1[27] = bf0[4];
6616 0 : bf1[28] = bf0[29];
6617 0 : bf1[29] = bf0[2];
6618 0 : bf1[30] = bf0[31];
6619 0 : bf1[31] = bf0[0];
6620 0 : clamp_buf(bf1, size, stage_range[stage]);
6621 0 : }
6622 0 : void eb_av1_idct64_new(const int32_t *input, int32_t *output, int8_t cos_bit,
6623 : const int8_t *stage_range) {
6624 0 : assert(output != input);
6625 0 : const int32_t *cospi = cospi_arr(cos_bit);
6626 :
6627 0 : int32_t stage = 0;
6628 : int32_t *bf0, *bf1;
6629 : int32_t step[64];
6630 :
6631 : // stage 0;
6632 :
6633 : // stage 1;
6634 0 : stage++;
6635 0 : bf1 = output;
6636 0 : bf1[0] = input[0];
6637 0 : bf1[1] = input[32];
6638 0 : bf1[2] = input[16];
6639 0 : bf1[3] = input[48];
6640 0 : bf1[4] = input[8];
6641 0 : bf1[5] = input[40];
6642 0 : bf1[6] = input[24];
6643 0 : bf1[7] = input[56];
6644 0 : bf1[8] = input[4];
6645 0 : bf1[9] = input[36];
6646 0 : bf1[10] = input[20];
6647 0 : bf1[11] = input[52];
6648 0 : bf1[12] = input[12];
6649 0 : bf1[13] = input[44];
6650 0 : bf1[14] = input[28];
6651 0 : bf1[15] = input[60];
6652 0 : bf1[16] = input[2];
6653 0 : bf1[17] = input[34];
6654 0 : bf1[18] = input[18];
6655 0 : bf1[19] = input[50];
6656 0 : bf1[20] = input[10];
6657 0 : bf1[21] = input[42];
6658 0 : bf1[22] = input[26];
6659 0 : bf1[23] = input[58];
6660 0 : bf1[24] = input[6];
6661 0 : bf1[25] = input[38];
6662 0 : bf1[26] = input[22];
6663 0 : bf1[27] = input[54];
6664 0 : bf1[28] = input[14];
6665 0 : bf1[29] = input[46];
6666 0 : bf1[30] = input[30];
6667 0 : bf1[31] = input[62];
6668 0 : bf1[32] = input[1];
6669 0 : bf1[33] = input[33];
6670 0 : bf1[34] = input[17];
6671 0 : bf1[35] = input[49];
6672 0 : bf1[36] = input[9];
6673 0 : bf1[37] = input[41];
6674 0 : bf1[38] = input[25];
6675 0 : bf1[39] = input[57];
6676 0 : bf1[40] = input[5];
6677 0 : bf1[41] = input[37];
6678 0 : bf1[42] = input[21];
6679 0 : bf1[43] = input[53];
6680 0 : bf1[44] = input[13];
6681 0 : bf1[45] = input[45];
6682 0 : bf1[46] = input[29];
6683 0 : bf1[47] = input[61];
6684 0 : bf1[48] = input[3];
6685 0 : bf1[49] = input[35];
6686 0 : bf1[50] = input[19];
6687 0 : bf1[51] = input[51];
6688 0 : bf1[52] = input[11];
6689 0 : bf1[53] = input[43];
6690 0 : bf1[54] = input[27];
6691 0 : bf1[55] = input[59];
6692 0 : bf1[56] = input[7];
6693 0 : bf1[57] = input[39];
6694 0 : bf1[58] = input[23];
6695 0 : bf1[59] = input[55];
6696 0 : bf1[60] = input[15];
6697 0 : bf1[61] = input[47];
6698 0 : bf1[62] = input[31];
6699 0 : bf1[63] = input[63];
6700 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
6701 :
6702 : // stage 2
6703 0 : stage++;
6704 0 : bf0 = output;
6705 0 : bf1 = step;
6706 0 : bf1[0] = bf0[0];
6707 0 : bf1[1] = bf0[1];
6708 0 : bf1[2] = bf0[2];
6709 0 : bf1[3] = bf0[3];
6710 0 : bf1[4] = bf0[4];
6711 0 : bf1[5] = bf0[5];
6712 0 : bf1[6] = bf0[6];
6713 0 : bf1[7] = bf0[7];
6714 0 : bf1[8] = bf0[8];
6715 0 : bf1[9] = bf0[9];
6716 0 : bf1[10] = bf0[10];
6717 0 : bf1[11] = bf0[11];
6718 0 : bf1[12] = bf0[12];
6719 0 : bf1[13] = bf0[13];
6720 0 : bf1[14] = bf0[14];
6721 0 : bf1[15] = bf0[15];
6722 0 : bf1[16] = bf0[16];
6723 0 : bf1[17] = bf0[17];
6724 0 : bf1[18] = bf0[18];
6725 0 : bf1[19] = bf0[19];
6726 0 : bf1[20] = bf0[20];
6727 0 : bf1[21] = bf0[21];
6728 0 : bf1[22] = bf0[22];
6729 0 : bf1[23] = bf0[23];
6730 0 : bf1[24] = bf0[24];
6731 0 : bf1[25] = bf0[25];
6732 0 : bf1[26] = bf0[26];
6733 0 : bf1[27] = bf0[27];
6734 0 : bf1[28] = bf0[28];
6735 0 : bf1[29] = bf0[29];
6736 0 : bf1[30] = bf0[30];
6737 0 : bf1[31] = bf0[31];
6738 0 : bf1[32] = half_btf(cospi[63], bf0[32], -cospi[1], bf0[63], cos_bit);
6739 0 : bf1[33] = half_btf(cospi[31], bf0[33], -cospi[33], bf0[62], cos_bit);
6740 0 : bf1[34] = half_btf(cospi[47], bf0[34], -cospi[17], bf0[61], cos_bit);
6741 0 : bf1[35] = half_btf(cospi[15], bf0[35], -cospi[49], bf0[60], cos_bit);
6742 0 : bf1[36] = half_btf(cospi[55], bf0[36], -cospi[9], bf0[59], cos_bit);
6743 0 : bf1[37] = half_btf(cospi[23], bf0[37], -cospi[41], bf0[58], cos_bit);
6744 0 : bf1[38] = half_btf(cospi[39], bf0[38], -cospi[25], bf0[57], cos_bit);
6745 0 : bf1[39] = half_btf(cospi[7], bf0[39], -cospi[57], bf0[56], cos_bit);
6746 0 : bf1[40] = half_btf(cospi[59], bf0[40], -cospi[5], bf0[55], cos_bit);
6747 0 : bf1[41] = half_btf(cospi[27], bf0[41], -cospi[37], bf0[54], cos_bit);
6748 0 : bf1[42] = half_btf(cospi[43], bf0[42], -cospi[21], bf0[53], cos_bit);
6749 0 : bf1[43] = half_btf(cospi[11], bf0[43], -cospi[53], bf0[52], cos_bit);
6750 0 : bf1[44] = half_btf(cospi[51], bf0[44], -cospi[13], bf0[51], cos_bit);
6751 0 : bf1[45] = half_btf(cospi[19], bf0[45], -cospi[45], bf0[50], cos_bit);
6752 0 : bf1[46] = half_btf(cospi[35], bf0[46], -cospi[29], bf0[49], cos_bit);
6753 0 : bf1[47] = half_btf(cospi[3], bf0[47], -cospi[61], bf0[48], cos_bit);
6754 0 : bf1[48] = half_btf(cospi[61], bf0[47], cospi[3], bf0[48], cos_bit);
6755 0 : bf1[49] = half_btf(cospi[29], bf0[46], cospi[35], bf0[49], cos_bit);
6756 0 : bf1[50] = half_btf(cospi[45], bf0[45], cospi[19], bf0[50], cos_bit);
6757 0 : bf1[51] = half_btf(cospi[13], bf0[44], cospi[51], bf0[51], cos_bit);
6758 0 : bf1[52] = half_btf(cospi[53], bf0[43], cospi[11], bf0[52], cos_bit);
6759 0 : bf1[53] = half_btf(cospi[21], bf0[42], cospi[43], bf0[53], cos_bit);
6760 0 : bf1[54] = half_btf(cospi[37], bf0[41], cospi[27], bf0[54], cos_bit);
6761 0 : bf1[55] = half_btf(cospi[5], bf0[40], cospi[59], bf0[55], cos_bit);
6762 0 : bf1[56] = half_btf(cospi[57], bf0[39], cospi[7], bf0[56], cos_bit);
6763 0 : bf1[57] = half_btf(cospi[25], bf0[38], cospi[39], bf0[57], cos_bit);
6764 0 : bf1[58] = half_btf(cospi[41], bf0[37], cospi[23], bf0[58], cos_bit);
6765 0 : bf1[59] = half_btf(cospi[9], bf0[36], cospi[55], bf0[59], cos_bit);
6766 0 : bf1[60] = half_btf(cospi[49], bf0[35], cospi[15], bf0[60], cos_bit);
6767 0 : bf1[61] = half_btf(cospi[17], bf0[34], cospi[47], bf0[61], cos_bit);
6768 0 : bf1[62] = half_btf(cospi[33], bf0[33], cospi[31], bf0[62], cos_bit);
6769 0 : bf1[63] = half_btf(cospi[1], bf0[32], cospi[63], bf0[63], cos_bit);
6770 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
6771 :
6772 : // stage 3
6773 0 : stage++;
6774 0 : bf0 = step;
6775 0 : bf1 = output;
6776 0 : bf1[0] = bf0[0];
6777 0 : bf1[1] = bf0[1];
6778 0 : bf1[2] = bf0[2];
6779 0 : bf1[3] = bf0[3];
6780 0 : bf1[4] = bf0[4];
6781 0 : bf1[5] = bf0[5];
6782 0 : bf1[6] = bf0[6];
6783 0 : bf1[7] = bf0[7];
6784 0 : bf1[8] = bf0[8];
6785 0 : bf1[9] = bf0[9];
6786 0 : bf1[10] = bf0[10];
6787 0 : bf1[11] = bf0[11];
6788 0 : bf1[12] = bf0[12];
6789 0 : bf1[13] = bf0[13];
6790 0 : bf1[14] = bf0[14];
6791 0 : bf1[15] = bf0[15];
6792 0 : bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit);
6793 0 : bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit);
6794 0 : bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit);
6795 0 : bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit);
6796 0 : bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit);
6797 0 : bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit);
6798 0 : bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit);
6799 0 : bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit);
6800 0 : bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit);
6801 0 : bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit);
6802 0 : bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit);
6803 0 : bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit);
6804 0 : bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit);
6805 0 : bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit);
6806 0 : bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit);
6807 0 : bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit);
6808 0 : bf1[32] = clamp_value(bf0[32] + bf0[33], stage_range[stage]);
6809 0 : bf1[33] = clamp_value(bf0[32] - bf0[33], stage_range[stage]);
6810 0 : bf1[34] = clamp_value(-bf0[34] + bf0[35], stage_range[stage]);
6811 0 : bf1[35] = clamp_value(bf0[34] + bf0[35], stage_range[stage]);
6812 0 : bf1[36] = clamp_value(bf0[36] + bf0[37], stage_range[stage]);
6813 0 : bf1[37] = clamp_value(bf0[36] - bf0[37], stage_range[stage]);
6814 0 : bf1[38] = clamp_value(-bf0[38] + bf0[39], stage_range[stage]);
6815 0 : bf1[39] = clamp_value(bf0[38] + bf0[39], stage_range[stage]);
6816 0 : bf1[40] = clamp_value(bf0[40] + bf0[41], stage_range[stage]);
6817 0 : bf1[41] = clamp_value(bf0[40] - bf0[41], stage_range[stage]);
6818 0 : bf1[42] = clamp_value(-bf0[42] + bf0[43], stage_range[stage]);
6819 0 : bf1[43] = clamp_value(bf0[42] + bf0[43], stage_range[stage]);
6820 0 : bf1[44] = clamp_value(bf0[44] + bf0[45], stage_range[stage]);
6821 0 : bf1[45] = clamp_value(bf0[44] - bf0[45], stage_range[stage]);
6822 0 : bf1[46] = clamp_value(-bf0[46] + bf0[47], stage_range[stage]);
6823 0 : bf1[47] = clamp_value(bf0[46] + bf0[47], stage_range[stage]);
6824 0 : bf1[48] = clamp_value(bf0[48] + bf0[49], stage_range[stage]);
6825 0 : bf1[49] = clamp_value(bf0[48] - bf0[49], stage_range[stage]);
6826 0 : bf1[50] = clamp_value(-bf0[50] + bf0[51], stage_range[stage]);
6827 0 : bf1[51] = clamp_value(bf0[50] + bf0[51], stage_range[stage]);
6828 0 : bf1[52] = clamp_value(bf0[52] + bf0[53], stage_range[stage]);
6829 0 : bf1[53] = clamp_value(bf0[52] - bf0[53], stage_range[stage]);
6830 0 : bf1[54] = clamp_value(-bf0[54] + bf0[55], stage_range[stage]);
6831 0 : bf1[55] = clamp_value(bf0[54] + bf0[55], stage_range[stage]);
6832 0 : bf1[56] = clamp_value(bf0[56] + bf0[57], stage_range[stage]);
6833 0 : bf1[57] = clamp_value(bf0[56] - bf0[57], stage_range[stage]);
6834 0 : bf1[58] = clamp_value(-bf0[58] + bf0[59], stage_range[stage]);
6835 0 : bf1[59] = clamp_value(bf0[58] + bf0[59], stage_range[stage]);
6836 0 : bf1[60] = clamp_value(bf0[60] + bf0[61], stage_range[stage]);
6837 0 : bf1[61] = clamp_value(bf0[60] - bf0[61], stage_range[stage]);
6838 0 : bf1[62] = clamp_value(-bf0[62] + bf0[63], stage_range[stage]);
6839 0 : bf1[63] = clamp_value(bf0[62] + bf0[63], stage_range[stage]);
6840 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
6841 :
6842 : // stage 4
6843 0 : stage++;
6844 0 : bf0 = output;
6845 0 : bf1 = step;
6846 0 : bf1[0] = bf0[0];
6847 0 : bf1[1] = bf0[1];
6848 0 : bf1[2] = bf0[2];
6849 0 : bf1[3] = bf0[3];
6850 0 : bf1[4] = bf0[4];
6851 0 : bf1[5] = bf0[5];
6852 0 : bf1[6] = bf0[6];
6853 0 : bf1[7] = bf0[7];
6854 0 : bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit);
6855 0 : bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit);
6856 0 : bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit);
6857 0 : bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit);
6858 0 : bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit);
6859 0 : bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit);
6860 0 : bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit);
6861 0 : bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit);
6862 0 : bf1[16] = clamp_value(bf0[16] + bf0[17], stage_range[stage]);
6863 0 : bf1[17] = clamp_value(bf0[16] - bf0[17], stage_range[stage]);
6864 0 : bf1[18] = clamp_value(-bf0[18] + bf0[19], stage_range[stage]);
6865 0 : bf1[19] = clamp_value(bf0[18] + bf0[19], stage_range[stage]);
6866 0 : bf1[20] = clamp_value(bf0[20] + bf0[21], stage_range[stage]);
6867 0 : bf1[21] = clamp_value(bf0[20] - bf0[21], stage_range[stage]);
6868 0 : bf1[22] = clamp_value(-bf0[22] + bf0[23], stage_range[stage]);
6869 0 : bf1[23] = clamp_value(bf0[22] + bf0[23], stage_range[stage]);
6870 0 : bf1[24] = clamp_value(bf0[24] + bf0[25], stage_range[stage]);
6871 0 : bf1[25] = clamp_value(bf0[24] - bf0[25], stage_range[stage]);
6872 0 : bf1[26] = clamp_value(-bf0[26] + bf0[27], stage_range[stage]);
6873 0 : bf1[27] = clamp_value(bf0[26] + bf0[27], stage_range[stage]);
6874 0 : bf1[28] = clamp_value(bf0[28] + bf0[29], stage_range[stage]);
6875 0 : bf1[29] = clamp_value(bf0[28] - bf0[29], stage_range[stage]);
6876 0 : bf1[30] = clamp_value(-bf0[30] + bf0[31], stage_range[stage]);
6877 0 : bf1[31] = clamp_value(bf0[30] + bf0[31], stage_range[stage]);
6878 0 : bf1[32] = bf0[32];
6879 0 : bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit);
6880 0 : bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit);
6881 0 : bf1[35] = bf0[35];
6882 0 : bf1[36] = bf0[36];
6883 0 : bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit);
6884 0 : bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit);
6885 0 : bf1[39] = bf0[39];
6886 0 : bf1[40] = bf0[40];
6887 0 : bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit);
6888 0 : bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit);
6889 0 : bf1[43] = bf0[43];
6890 0 : bf1[44] = bf0[44];
6891 0 : bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit);
6892 0 : bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit);
6893 0 : bf1[47] = bf0[47];
6894 0 : bf1[48] = bf0[48];
6895 0 : bf1[49] = half_btf(-cospi[52], bf0[46], cospi[12], bf0[49], cos_bit);
6896 0 : bf1[50] = half_btf(cospi[12], bf0[45], cospi[52], bf0[50], cos_bit);
6897 0 : bf1[51] = bf0[51];
6898 0 : bf1[52] = bf0[52];
6899 0 : bf1[53] = half_btf(-cospi[20], bf0[42], cospi[44], bf0[53], cos_bit);
6900 0 : bf1[54] = half_btf(cospi[44], bf0[41], cospi[20], bf0[54], cos_bit);
6901 0 : bf1[55] = bf0[55];
6902 0 : bf1[56] = bf0[56];
6903 0 : bf1[57] = half_btf(-cospi[36], bf0[38], cospi[28], bf0[57], cos_bit);
6904 0 : bf1[58] = half_btf(cospi[28], bf0[37], cospi[36], bf0[58], cos_bit);
6905 0 : bf1[59] = bf0[59];
6906 0 : bf1[60] = bf0[60];
6907 0 : bf1[61] = half_btf(-cospi[4], bf0[34], cospi[60], bf0[61], cos_bit);
6908 0 : bf1[62] = half_btf(cospi[60], bf0[33], cospi[4], bf0[62], cos_bit);
6909 0 : bf1[63] = bf0[63];
6910 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
6911 :
6912 : // stage 5
6913 0 : stage++;
6914 0 : bf0 = step;
6915 0 : bf1 = output;
6916 0 : bf1[0] = bf0[0];
6917 0 : bf1[1] = bf0[1];
6918 0 : bf1[2] = bf0[2];
6919 0 : bf1[3] = bf0[3];
6920 0 : bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
6921 0 : bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
6922 0 : bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
6923 0 : bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
6924 0 : bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]);
6925 0 : bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]);
6926 0 : bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]);
6927 0 : bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]);
6928 0 : bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]);
6929 0 : bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]);
6930 0 : bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]);
6931 0 : bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]);
6932 0 : bf1[16] = bf0[16];
6933 0 : bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit);
6934 0 : bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit);
6935 0 : bf1[19] = bf0[19];
6936 0 : bf1[20] = bf0[20];
6937 0 : bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit);
6938 0 : bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit);
6939 0 : bf1[23] = bf0[23];
6940 0 : bf1[24] = bf0[24];
6941 0 : bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit);
6942 0 : bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit);
6943 0 : bf1[27] = bf0[27];
6944 0 : bf1[28] = bf0[28];
6945 0 : bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit);
6946 0 : bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit);
6947 0 : bf1[31] = bf0[31];
6948 0 : bf1[32] = clamp_value(bf0[32] + bf0[35], stage_range[stage]);
6949 0 : bf1[33] = clamp_value(bf0[33] + bf0[34], stage_range[stage]);
6950 0 : bf1[34] = clamp_value(bf0[33] - bf0[34], stage_range[stage]);
6951 0 : bf1[35] = clamp_value(bf0[32] - bf0[35], stage_range[stage]);
6952 0 : bf1[36] = clamp_value(-bf0[36] + bf0[39], stage_range[stage]);
6953 0 : bf1[37] = clamp_value(-bf0[37] + bf0[38], stage_range[stage]);
6954 0 : bf1[38] = clamp_value(bf0[37] + bf0[38], stage_range[stage]);
6955 0 : bf1[39] = clamp_value(bf0[36] + bf0[39], stage_range[stage]);
6956 0 : bf1[40] = clamp_value(bf0[40] + bf0[43], stage_range[stage]);
6957 0 : bf1[41] = clamp_value(bf0[41] + bf0[42], stage_range[stage]);
6958 0 : bf1[42] = clamp_value(bf0[41] - bf0[42], stage_range[stage]);
6959 0 : bf1[43] = clamp_value(bf0[40] - bf0[43], stage_range[stage]);
6960 0 : bf1[44] = clamp_value(-bf0[44] + bf0[47], stage_range[stage]);
6961 0 : bf1[45] = clamp_value(-bf0[45] + bf0[46], stage_range[stage]);
6962 0 : bf1[46] = clamp_value(bf0[45] + bf0[46], stage_range[stage]);
6963 0 : bf1[47] = clamp_value(bf0[44] + bf0[47], stage_range[stage]);
6964 0 : bf1[48] = clamp_value(bf0[48] + bf0[51], stage_range[stage]);
6965 0 : bf1[49] = clamp_value(bf0[49] + bf0[50], stage_range[stage]);
6966 0 : bf1[50] = clamp_value(bf0[49] - bf0[50], stage_range[stage]);
6967 0 : bf1[51] = clamp_value(bf0[48] - bf0[51], stage_range[stage]);
6968 0 : bf1[52] = clamp_value(-bf0[52] + bf0[55], stage_range[stage]);
6969 0 : bf1[53] = clamp_value(-bf0[53] + bf0[54], stage_range[stage]);
6970 0 : bf1[54] = clamp_value(bf0[53] + bf0[54], stage_range[stage]);
6971 0 : bf1[55] = clamp_value(bf0[52] + bf0[55], stage_range[stage]);
6972 0 : bf1[56] = clamp_value(bf0[56] + bf0[59], stage_range[stage]);
6973 0 : bf1[57] = clamp_value(bf0[57] + bf0[58], stage_range[stage]);
6974 0 : bf1[58] = clamp_value(bf0[57] - bf0[58], stage_range[stage]);
6975 0 : bf1[59] = clamp_value(bf0[56] - bf0[59], stage_range[stage]);
6976 0 : bf1[60] = clamp_value(-bf0[60] + bf0[63], stage_range[stage]);
6977 0 : bf1[61] = clamp_value(-bf0[61] + bf0[62], stage_range[stage]);
6978 0 : bf1[62] = clamp_value(bf0[61] + bf0[62], stage_range[stage]);
6979 0 : bf1[63] = clamp_value(bf0[60] + bf0[63], stage_range[stage]);
6980 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
6981 :
6982 : // stage 6
6983 0 : stage++;
6984 0 : bf0 = output;
6985 0 : bf1 = step;
6986 0 : bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
6987 0 : bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
6988 0 : bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
6989 0 : bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
6990 0 : bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
6991 0 : bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
6992 0 : bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
6993 0 : bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
6994 0 : bf1[8] = bf0[8];
6995 0 : bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
6996 0 : bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
6997 0 : bf1[11] = bf0[11];
6998 0 : bf1[12] = bf0[12];
6999 0 : bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit);
7000 0 : bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit);
7001 0 : bf1[15] = bf0[15];
7002 0 : bf1[16] = clamp_value(bf0[16] + bf0[19], stage_range[stage]);
7003 0 : bf1[17] = clamp_value(bf0[17] + bf0[18], stage_range[stage]);
7004 0 : bf1[18] = clamp_value(bf0[17] - bf0[18], stage_range[stage]);
7005 0 : bf1[19] = clamp_value(bf0[16] - bf0[19], stage_range[stage]);
7006 0 : bf1[20] = clamp_value(-bf0[20] + bf0[23], stage_range[stage]);
7007 0 : bf1[21] = clamp_value(-bf0[21] + bf0[22], stage_range[stage]);
7008 0 : bf1[22] = clamp_value(bf0[21] + bf0[22], stage_range[stage]);
7009 0 : bf1[23] = clamp_value(bf0[20] + bf0[23], stage_range[stage]);
7010 0 : bf1[24] = clamp_value(bf0[24] + bf0[27], stage_range[stage]);
7011 0 : bf1[25] = clamp_value(bf0[25] + bf0[26], stage_range[stage]);
7012 0 : bf1[26] = clamp_value(bf0[25] - bf0[26], stage_range[stage]);
7013 0 : bf1[27] = clamp_value(bf0[24] - bf0[27], stage_range[stage]);
7014 0 : bf1[28] = clamp_value(-bf0[28] + bf0[31], stage_range[stage]);
7015 0 : bf1[29] = clamp_value(-bf0[29] + bf0[30], stage_range[stage]);
7016 0 : bf1[30] = clamp_value(bf0[29] + bf0[30], stage_range[stage]);
7017 0 : bf1[31] = clamp_value(bf0[28] + bf0[31], stage_range[stage]);
7018 0 : bf1[32] = bf0[32];
7019 0 : bf1[33] = bf0[33];
7020 0 : bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit);
7021 0 : bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit);
7022 0 : bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit);
7023 0 : bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit);
7024 0 : bf1[38] = bf0[38];
7025 0 : bf1[39] = bf0[39];
7026 0 : bf1[40] = bf0[40];
7027 0 : bf1[41] = bf0[41];
7028 0 : bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit);
7029 0 : bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit);
7030 0 : bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit);
7031 0 : bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit);
7032 0 : bf1[46] = bf0[46];
7033 0 : bf1[47] = bf0[47];
7034 0 : bf1[48] = bf0[48];
7035 0 : bf1[49] = bf0[49];
7036 0 : bf1[50] = half_btf(-cospi[40], bf0[45], cospi[24], bf0[50], cos_bit);
7037 0 : bf1[51] = half_btf(-cospi[40], bf0[44], cospi[24], bf0[51], cos_bit);
7038 0 : bf1[52] = half_btf(cospi[24], bf0[43], cospi[40], bf0[52], cos_bit);
7039 0 : bf1[53] = half_btf(cospi[24], bf0[42], cospi[40], bf0[53], cos_bit);
7040 0 : bf1[54] = bf0[54];
7041 0 : bf1[55] = bf0[55];
7042 0 : bf1[56] = bf0[56];
7043 0 : bf1[57] = bf0[57];
7044 0 : bf1[58] = half_btf(-cospi[8], bf0[37], cospi[56], bf0[58], cos_bit);
7045 0 : bf1[59] = half_btf(-cospi[8], bf0[36], cospi[56], bf0[59], cos_bit);
7046 0 : bf1[60] = half_btf(cospi[56], bf0[35], cospi[8], bf0[60], cos_bit);
7047 0 : bf1[61] = half_btf(cospi[56], bf0[34], cospi[8], bf0[61], cos_bit);
7048 0 : bf1[62] = bf0[62];
7049 0 : bf1[63] = bf0[63];
7050 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
7051 :
7052 : // stage 7
7053 0 : stage++;
7054 0 : bf0 = step;
7055 0 : bf1 = output;
7056 0 : bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
7057 0 : bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
7058 0 : bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
7059 0 : bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
7060 0 : bf1[4] = bf0[4];
7061 0 : bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
7062 0 : bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
7063 0 : bf1[7] = bf0[7];
7064 0 : bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]);
7065 0 : bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]);
7066 0 : bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]);
7067 0 : bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]);
7068 0 : bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]);
7069 0 : bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]);
7070 0 : bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]);
7071 0 : bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]);
7072 0 : bf1[16] = bf0[16];
7073 0 : bf1[17] = bf0[17];
7074 0 : bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit);
7075 0 : bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit);
7076 0 : bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit);
7077 0 : bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit);
7078 0 : bf1[22] = bf0[22];
7079 0 : bf1[23] = bf0[23];
7080 0 : bf1[24] = bf0[24];
7081 0 : bf1[25] = bf0[25];
7082 0 : bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit);
7083 0 : bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit);
7084 0 : bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit);
7085 0 : bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit);
7086 0 : bf1[30] = bf0[30];
7087 0 : bf1[31] = bf0[31];
7088 0 : bf1[32] = clamp_value(bf0[32] + bf0[39], stage_range[stage]);
7089 0 : bf1[33] = clamp_value(bf0[33] + bf0[38], stage_range[stage]);
7090 0 : bf1[34] = clamp_value(bf0[34] + bf0[37], stage_range[stage]);
7091 0 : bf1[35] = clamp_value(bf0[35] + bf0[36], stage_range[stage]);
7092 0 : bf1[36] = clamp_value(bf0[35] - bf0[36], stage_range[stage]);
7093 0 : bf1[37] = clamp_value(bf0[34] - bf0[37], stage_range[stage]);
7094 0 : bf1[38] = clamp_value(bf0[33] - bf0[38], stage_range[stage]);
7095 0 : bf1[39] = clamp_value(bf0[32] - bf0[39], stage_range[stage]);
7096 0 : bf1[40] = clamp_value(-bf0[40] + bf0[47], stage_range[stage]);
7097 0 : bf1[41] = clamp_value(-bf0[41] + bf0[46], stage_range[stage]);
7098 0 : bf1[42] = clamp_value(-bf0[42] + bf0[45], stage_range[stage]);
7099 0 : bf1[43] = clamp_value(-bf0[43] + bf0[44], stage_range[stage]);
7100 0 : bf1[44] = clamp_value(bf0[43] + bf0[44], stage_range[stage]);
7101 0 : bf1[45] = clamp_value(bf0[42] + bf0[45], stage_range[stage]);
7102 0 : bf1[46] = clamp_value(bf0[41] + bf0[46], stage_range[stage]);
7103 0 : bf1[47] = clamp_value(bf0[40] + bf0[47], stage_range[stage]);
7104 0 : bf1[48] = clamp_value(bf0[48] + bf0[55], stage_range[stage]);
7105 0 : bf1[49] = clamp_value(bf0[49] + bf0[54], stage_range[stage]);
7106 0 : bf1[50] = clamp_value(bf0[50] + bf0[53], stage_range[stage]);
7107 0 : bf1[51] = clamp_value(bf0[51] + bf0[52], stage_range[stage]);
7108 0 : bf1[52] = clamp_value(bf0[51] - bf0[52], stage_range[stage]);
7109 0 : bf1[53] = clamp_value(bf0[50] - bf0[53], stage_range[stage]);
7110 0 : bf1[54] = clamp_value(bf0[49] - bf0[54], stage_range[stage]);
7111 0 : bf1[55] = clamp_value(bf0[48] - bf0[55], stage_range[stage]);
7112 0 : bf1[56] = clamp_value(-bf0[56] + bf0[63], stage_range[stage]);
7113 0 : bf1[57] = clamp_value(-bf0[57] + bf0[62], stage_range[stage]);
7114 0 : bf1[58] = clamp_value(-bf0[58] + bf0[61], stage_range[stage]);
7115 0 : bf1[59] = clamp_value(-bf0[59] + bf0[60], stage_range[stage]);
7116 0 : bf1[60] = clamp_value(bf0[59] + bf0[60], stage_range[stage]);
7117 0 : bf1[61] = clamp_value(bf0[58] + bf0[61], stage_range[stage]);
7118 0 : bf1[62] = clamp_value(bf0[57] + bf0[62], stage_range[stage]);
7119 0 : bf1[63] = clamp_value(bf0[56] + bf0[63], stage_range[stage]);
7120 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
7121 :
7122 : // stage 8
7123 0 : stage++;
7124 0 : bf0 = output;
7125 0 : bf1 = step;
7126 0 : bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
7127 0 : bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
7128 0 : bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
7129 0 : bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
7130 0 : bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
7131 0 : bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
7132 0 : bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
7133 0 : bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
7134 0 : bf1[8] = bf0[8];
7135 0 : bf1[9] = bf0[9];
7136 0 : bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
7137 0 : bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
7138 0 : bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
7139 0 : bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
7140 0 : bf1[14] = bf0[14];
7141 0 : bf1[15] = bf0[15];
7142 0 : bf1[16] = clamp_value(bf0[16] + bf0[23], stage_range[stage]);
7143 0 : bf1[17] = clamp_value(bf0[17] + bf0[22], stage_range[stage]);
7144 0 : bf1[18] = clamp_value(bf0[18] + bf0[21], stage_range[stage]);
7145 0 : bf1[19] = clamp_value(bf0[19] + bf0[20], stage_range[stage]);
7146 0 : bf1[20] = clamp_value(bf0[19] - bf0[20], stage_range[stage]);
7147 0 : bf1[21] = clamp_value(bf0[18] - bf0[21], stage_range[stage]);
7148 0 : bf1[22] = clamp_value(bf0[17] - bf0[22], stage_range[stage]);
7149 0 : bf1[23] = clamp_value(bf0[16] - bf0[23], stage_range[stage]);
7150 0 : bf1[24] = clamp_value(-bf0[24] + bf0[31], stage_range[stage]);
7151 0 : bf1[25] = clamp_value(-bf0[25] + bf0[30], stage_range[stage]);
7152 0 : bf1[26] = clamp_value(-bf0[26] + bf0[29], stage_range[stage]);
7153 0 : bf1[27] = clamp_value(-bf0[27] + bf0[28], stage_range[stage]);
7154 0 : bf1[28] = clamp_value(bf0[27] + bf0[28], stage_range[stage]);
7155 0 : bf1[29] = clamp_value(bf0[26] + bf0[29], stage_range[stage]);
7156 0 : bf1[30] = clamp_value(bf0[25] + bf0[30], stage_range[stage]);
7157 0 : bf1[31] = clamp_value(bf0[24] + bf0[31], stage_range[stage]);
7158 0 : bf1[32] = bf0[32];
7159 0 : bf1[33] = bf0[33];
7160 0 : bf1[34] = bf0[34];
7161 0 : bf1[35] = bf0[35];
7162 0 : bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit);
7163 0 : bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit);
7164 0 : bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit);
7165 0 : bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit);
7166 0 : bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit);
7167 0 : bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit);
7168 0 : bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit);
7169 0 : bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit);
7170 0 : bf1[44] = bf0[44];
7171 0 : bf1[45] = bf0[45];
7172 0 : bf1[46] = bf0[46];
7173 0 : bf1[47] = bf0[47];
7174 0 : bf1[48] = bf0[48];
7175 0 : bf1[49] = bf0[49];
7176 0 : bf1[50] = bf0[50];
7177 0 : bf1[51] = bf0[51];
7178 0 : bf1[52] = half_btf(-cospi[16], bf0[43], cospi[48], bf0[52], cos_bit);
7179 0 : bf1[53] = half_btf(-cospi[16], bf0[42], cospi[48], bf0[53], cos_bit);
7180 0 : bf1[54] = half_btf(-cospi[16], bf0[41], cospi[48], bf0[54], cos_bit);
7181 0 : bf1[55] = half_btf(-cospi[16], bf0[40], cospi[48], bf0[55], cos_bit);
7182 0 : bf1[56] = half_btf(cospi[48], bf0[39], cospi[16], bf0[56], cos_bit);
7183 0 : bf1[57] = half_btf(cospi[48], bf0[38], cospi[16], bf0[57], cos_bit);
7184 0 : bf1[58] = half_btf(cospi[48], bf0[37], cospi[16], bf0[58], cos_bit);
7185 0 : bf1[59] = half_btf(cospi[48], bf0[36], cospi[16], bf0[59], cos_bit);
7186 0 : bf1[60] = bf0[60];
7187 0 : bf1[61] = bf0[61];
7188 0 : bf1[62] = bf0[62];
7189 0 : bf1[63] = bf0[63];
7190 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
7191 :
7192 : // stage 9
7193 0 : stage++;
7194 0 : bf0 = step;
7195 0 : bf1 = output;
7196 0 : bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]);
7197 0 : bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]);
7198 0 : bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]);
7199 0 : bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]);
7200 0 : bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]);
7201 0 : bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]);
7202 0 : bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]);
7203 0 : bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]);
7204 0 : bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]);
7205 0 : bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]);
7206 0 : bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]);
7207 0 : bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]);
7208 0 : bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]);
7209 0 : bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]);
7210 0 : bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]);
7211 0 : bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]);
7212 0 : bf1[16] = bf0[16];
7213 0 : bf1[17] = bf0[17];
7214 0 : bf1[18] = bf0[18];
7215 0 : bf1[19] = bf0[19];
7216 0 : bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
7217 0 : bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
7218 0 : bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
7219 0 : bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
7220 0 : bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
7221 0 : bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
7222 0 : bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
7223 0 : bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
7224 0 : bf1[28] = bf0[28];
7225 0 : bf1[29] = bf0[29];
7226 0 : bf1[30] = bf0[30];
7227 0 : bf1[31] = bf0[31];
7228 0 : bf1[32] = clamp_value(bf0[32] + bf0[47], stage_range[stage]);
7229 0 : bf1[33] = clamp_value(bf0[33] + bf0[46], stage_range[stage]);
7230 0 : bf1[34] = clamp_value(bf0[34] + bf0[45], stage_range[stage]);
7231 0 : bf1[35] = clamp_value(bf0[35] + bf0[44], stage_range[stage]);
7232 0 : bf1[36] = clamp_value(bf0[36] + bf0[43], stage_range[stage]);
7233 0 : bf1[37] = clamp_value(bf0[37] + bf0[42], stage_range[stage]);
7234 0 : bf1[38] = clamp_value(bf0[38] + bf0[41], stage_range[stage]);
7235 0 : bf1[39] = clamp_value(bf0[39] + bf0[40], stage_range[stage]);
7236 0 : bf1[40] = clamp_value(bf0[39] - bf0[40], stage_range[stage]);
7237 0 : bf1[41] = clamp_value(bf0[38] - bf0[41], stage_range[stage]);
7238 0 : bf1[42] = clamp_value(bf0[37] - bf0[42], stage_range[stage]);
7239 0 : bf1[43] = clamp_value(bf0[36] - bf0[43], stage_range[stage]);
7240 0 : bf1[44] = clamp_value(bf0[35] - bf0[44], stage_range[stage]);
7241 0 : bf1[45] = clamp_value(bf0[34] - bf0[45], stage_range[stage]);
7242 0 : bf1[46] = clamp_value(bf0[33] - bf0[46], stage_range[stage]);
7243 0 : bf1[47] = clamp_value(bf0[32] - bf0[47], stage_range[stage]);
7244 0 : bf1[48] = clamp_value(-bf0[48] + bf0[63], stage_range[stage]);
7245 0 : bf1[49] = clamp_value(-bf0[49] + bf0[62], stage_range[stage]);
7246 0 : bf1[50] = clamp_value(-bf0[50] + bf0[61], stage_range[stage]);
7247 0 : bf1[51] = clamp_value(-bf0[51] + bf0[60], stage_range[stage]);
7248 0 : bf1[52] = clamp_value(-bf0[52] + bf0[59], stage_range[stage]);
7249 0 : bf1[53] = clamp_value(-bf0[53] + bf0[58], stage_range[stage]);
7250 0 : bf1[54] = clamp_value(-bf0[54] + bf0[57], stage_range[stage]);
7251 0 : bf1[55] = clamp_value(-bf0[55] + bf0[56], stage_range[stage]);
7252 0 : bf1[56] = clamp_value(bf0[55] + bf0[56], stage_range[stage]);
7253 0 : bf1[57] = clamp_value(bf0[54] + bf0[57], stage_range[stage]);
7254 0 : bf1[58] = clamp_value(bf0[53] + bf0[58], stage_range[stage]);
7255 0 : bf1[59] = clamp_value(bf0[52] + bf0[59], stage_range[stage]);
7256 0 : bf1[60] = clamp_value(bf0[51] + bf0[60], stage_range[stage]);
7257 0 : bf1[61] = clamp_value(bf0[50] + bf0[61], stage_range[stage]);
7258 0 : bf1[62] = clamp_value(bf0[49] + bf0[62], stage_range[stage]);
7259 0 : bf1[63] = clamp_value(bf0[48] + bf0[63], stage_range[stage]);
7260 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
7261 :
7262 : // stage 10
7263 0 : stage++;
7264 0 : bf0 = output;
7265 0 : bf1 = step;
7266 0 : bf1[0] = clamp_value(bf0[0] + bf0[31], stage_range[stage]);
7267 0 : bf1[1] = clamp_value(bf0[1] + bf0[30], stage_range[stage]);
7268 0 : bf1[2] = clamp_value(bf0[2] + bf0[29], stage_range[stage]);
7269 0 : bf1[3] = clamp_value(bf0[3] + bf0[28], stage_range[stage]);
7270 0 : bf1[4] = clamp_value(bf0[4] + bf0[27], stage_range[stage]);
7271 0 : bf1[5] = clamp_value(bf0[5] + bf0[26], stage_range[stage]);
7272 0 : bf1[6] = clamp_value(bf0[6] + bf0[25], stage_range[stage]);
7273 0 : bf1[7] = clamp_value(bf0[7] + bf0[24], stage_range[stage]);
7274 0 : bf1[8] = clamp_value(bf0[8] + bf0[23], stage_range[stage]);
7275 0 : bf1[9] = clamp_value(bf0[9] + bf0[22], stage_range[stage]);
7276 0 : bf1[10] = clamp_value(bf0[10] + bf0[21], stage_range[stage]);
7277 0 : bf1[11] = clamp_value(bf0[11] + bf0[20], stage_range[stage]);
7278 0 : bf1[12] = clamp_value(bf0[12] + bf0[19], stage_range[stage]);
7279 0 : bf1[13] = clamp_value(bf0[13] + bf0[18], stage_range[stage]);
7280 0 : bf1[14] = clamp_value(bf0[14] + bf0[17], stage_range[stage]);
7281 0 : bf1[15] = clamp_value(bf0[15] + bf0[16], stage_range[stage]);
7282 0 : bf1[16] = clamp_value(bf0[15] - bf0[16], stage_range[stage]);
7283 0 : bf1[17] = clamp_value(bf0[14] - bf0[17], stage_range[stage]);
7284 0 : bf1[18] = clamp_value(bf0[13] - bf0[18], stage_range[stage]);
7285 0 : bf1[19] = clamp_value(bf0[12] - bf0[19], stage_range[stage]);
7286 0 : bf1[20] = clamp_value(bf0[11] - bf0[20], stage_range[stage]);
7287 0 : bf1[21] = clamp_value(bf0[10] - bf0[21], stage_range[stage]);
7288 0 : bf1[22] = clamp_value(bf0[9] - bf0[22], stage_range[stage]);
7289 0 : bf1[23] = clamp_value(bf0[8] - bf0[23], stage_range[stage]);
7290 0 : bf1[24] = clamp_value(bf0[7] - bf0[24], stage_range[stage]);
7291 0 : bf1[25] = clamp_value(bf0[6] - bf0[25], stage_range[stage]);
7292 0 : bf1[26] = clamp_value(bf0[5] - bf0[26], stage_range[stage]);
7293 0 : bf1[27] = clamp_value(bf0[4] - bf0[27], stage_range[stage]);
7294 0 : bf1[28] = clamp_value(bf0[3] - bf0[28], stage_range[stage]);
7295 0 : bf1[29] = clamp_value(bf0[2] - bf0[29], stage_range[stage]);
7296 0 : bf1[30] = clamp_value(bf0[1] - bf0[30], stage_range[stage]);
7297 0 : bf1[31] = clamp_value(bf0[0] - bf0[31], stage_range[stage]);
7298 0 : bf1[32] = bf0[32];
7299 0 : bf1[33] = bf0[33];
7300 0 : bf1[34] = bf0[34];
7301 0 : bf1[35] = bf0[35];
7302 0 : bf1[36] = bf0[36];
7303 0 : bf1[37] = bf0[37];
7304 0 : bf1[38] = bf0[38];
7305 0 : bf1[39] = bf0[39];
7306 0 : bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit);
7307 0 : bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit);
7308 0 : bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit);
7309 0 : bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit);
7310 0 : bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit);
7311 0 : bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit);
7312 0 : bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit);
7313 0 : bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit);
7314 0 : bf1[48] = half_btf(cospi[32], bf0[47], cospi[32], bf0[48], cos_bit);
7315 0 : bf1[49] = half_btf(cospi[32], bf0[46], cospi[32], bf0[49], cos_bit);
7316 0 : bf1[50] = half_btf(cospi[32], bf0[45], cospi[32], bf0[50], cos_bit);
7317 0 : bf1[51] = half_btf(cospi[32], bf0[44], cospi[32], bf0[51], cos_bit);
7318 0 : bf1[52] = half_btf(cospi[32], bf0[43], cospi[32], bf0[52], cos_bit);
7319 0 : bf1[53] = half_btf(cospi[32], bf0[42], cospi[32], bf0[53], cos_bit);
7320 0 : bf1[54] = half_btf(cospi[32], bf0[41], cospi[32], bf0[54], cos_bit);
7321 0 : bf1[55] = half_btf(cospi[32], bf0[40], cospi[32], bf0[55], cos_bit);
7322 0 : bf1[56] = bf0[56];
7323 0 : bf1[57] = bf0[57];
7324 0 : bf1[58] = bf0[58];
7325 0 : bf1[59] = bf0[59];
7326 0 : bf1[60] = bf0[60];
7327 0 : bf1[61] = bf0[61];
7328 0 : bf1[62] = bf0[62];
7329 0 : bf1[63] = bf0[63];
7330 : //range_check_buf(stage, input, bf1, size, stage_range[stage]);
7331 :
7332 : // stage 11
7333 0 : stage++;
7334 0 : bf0 = step;
7335 0 : bf1 = output;
7336 0 : bf1[0] = clamp_value(bf0[0] + bf0[63], stage_range[stage]);
7337 0 : bf1[1] = clamp_value(bf0[1] + bf0[62], stage_range[stage]);
7338 0 : bf1[2] = clamp_value(bf0[2] + bf0[61], stage_range[stage]);
7339 0 : bf1[3] = clamp_value(bf0[3] + bf0[60], stage_range[stage]);
7340 0 : bf1[4] = clamp_value(bf0[4] + bf0[59], stage_range[stage]);
7341 0 : bf1[5] = clamp_value(bf0[5] + bf0[58], stage_range[stage]);
7342 0 : bf1[6] = clamp_value(bf0[6] + bf0[57], stage_range[stage]);
7343 0 : bf1[7] = clamp_value(bf0[7] + bf0[56], stage_range[stage]);
7344 0 : bf1[8] = clamp_value(bf0[8] + bf0[55], stage_range[stage]);
7345 0 : bf1[9] = clamp_value(bf0[9] + bf0[54], stage_range[stage]);
7346 0 : bf1[10] = clamp_value(bf0[10] + bf0[53], stage_range[stage]);
7347 0 : bf1[11] = clamp_value(bf0[11] + bf0[52], stage_range[stage]);
7348 0 : bf1[12] = clamp_value(bf0[12] + bf0[51], stage_range[stage]);
7349 0 : bf1[13] = clamp_value(bf0[13] + bf0[50], stage_range[stage]);
7350 0 : bf1[14] = clamp_value(bf0[14] + bf0[49], stage_range[stage]);
7351 0 : bf1[15] = clamp_value(bf0[15] + bf0[48], stage_range[stage]);
7352 0 : bf1[16] = clamp_value(bf0[16] + bf0[47], stage_range[stage]);
7353 0 : bf1[17] = clamp_value(bf0[17] + bf0[46], stage_range[stage]);
7354 0 : bf1[18] = clamp_value(bf0[18] + bf0[45], stage_range[stage]);
7355 0 : bf1[19] = clamp_value(bf0[19] + bf0[44], stage_range[stage]);
7356 0 : bf1[20] = clamp_value(bf0[20] + bf0[43], stage_range[stage]);
7357 0 : bf1[21] = clamp_value(bf0[21] + bf0[42], stage_range[stage]);
7358 0 : bf1[22] = clamp_value(bf0[22] + bf0[41], stage_range[stage]);
7359 0 : bf1[23] = clamp_value(bf0[23] + bf0[40], stage_range[stage]);
7360 0 : bf1[24] = clamp_value(bf0[24] + bf0[39], stage_range[stage]);
7361 0 : bf1[25] = clamp_value(bf0[25] + bf0[38], stage_range[stage]);
7362 0 : bf1[26] = clamp_value(bf0[26] + bf0[37], stage_range[stage]);
7363 0 : bf1[27] = clamp_value(bf0[27] + bf0[36], stage_range[stage]);
7364 0 : bf1[28] = clamp_value(bf0[28] + bf0[35], stage_range[stage]);
7365 0 : bf1[29] = clamp_value(bf0[29] + bf0[34], stage_range[stage]);
7366 0 : bf1[30] = clamp_value(bf0[30] + bf0[33], stage_range[stage]);
7367 0 : bf1[31] = clamp_value(bf0[31] + bf0[32], stage_range[stage]);
7368 0 : bf1[32] = clamp_value(bf0[31] - bf0[32], stage_range[stage]);
7369 0 : bf1[33] = clamp_value(bf0[30] - bf0[33], stage_range[stage]);
7370 0 : bf1[34] = clamp_value(bf0[29] - bf0[34], stage_range[stage]);
7371 0 : bf1[35] = clamp_value(bf0[28] - bf0[35], stage_range[stage]);
7372 0 : bf1[36] = clamp_value(bf0[27] - bf0[36], stage_range[stage]);
7373 0 : bf1[37] = clamp_value(bf0[26] - bf0[37], stage_range[stage]);
7374 0 : bf1[38] = clamp_value(bf0[25] - bf0[38], stage_range[stage]);
7375 0 : bf1[39] = clamp_value(bf0[24] - bf0[39], stage_range[stage]);
7376 0 : bf1[40] = clamp_value(bf0[23] - bf0[40], stage_range[stage]);
7377 0 : bf1[41] = clamp_value(bf0[22] - bf0[41], stage_range[stage]);
7378 0 : bf1[42] = clamp_value(bf0[21] - bf0[42], stage_range[stage]);
7379 0 : bf1[43] = clamp_value(bf0[20] - bf0[43], stage_range[stage]);
7380 0 : bf1[44] = clamp_value(bf0[19] - bf0[44], stage_range[stage]);
7381 0 : bf1[45] = clamp_value(bf0[18] - bf0[45], stage_range[stage]);
7382 0 : bf1[46] = clamp_value(bf0[17] - bf0[46], stage_range[stage]);
7383 0 : bf1[47] = clamp_value(bf0[16] - bf0[47], stage_range[stage]);
7384 0 : bf1[48] = clamp_value(bf0[15] - bf0[48], stage_range[stage]);
7385 0 : bf1[49] = clamp_value(bf0[14] - bf0[49], stage_range[stage]);
7386 0 : bf1[50] = clamp_value(bf0[13] - bf0[50], stage_range[stage]);
7387 0 : bf1[51] = clamp_value(bf0[12] - bf0[51], stage_range[stage]);
7388 0 : bf1[52] = clamp_value(bf0[11] - bf0[52], stage_range[stage]);
7389 0 : bf1[53] = clamp_value(bf0[10] - bf0[53], stage_range[stage]);
7390 0 : bf1[54] = clamp_value(bf0[9] - bf0[54], stage_range[stage]);
7391 0 : bf1[55] = clamp_value(bf0[8] - bf0[55], stage_range[stage]);
7392 0 : bf1[56] = clamp_value(bf0[7] - bf0[56], stage_range[stage]);
7393 0 : bf1[57] = clamp_value(bf0[6] - bf0[57], stage_range[stage]);
7394 0 : bf1[58] = clamp_value(bf0[5] - bf0[58], stage_range[stage]);
7395 0 : bf1[59] = clamp_value(bf0[4] - bf0[59], stage_range[stage]);
7396 0 : bf1[60] = clamp_value(bf0[3] - bf0[60], stage_range[stage]);
7397 0 : bf1[61] = clamp_value(bf0[2] - bf0[61], stage_range[stage]);
7398 0 : bf1[62] = clamp_value(bf0[1] - bf0[62], stage_range[stage]);
7399 0 : bf1[63] = clamp_value(bf0[0] - bf0[63], stage_range[stage]);
7400 0 : }
7401 0 : void eb_av1_iidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
7402 : const int8_t *stage_range) {
7403 : (void)cos_bit;
7404 : (void)stage_range;
7405 0 : for (int32_t i = 0; i < 4; ++i) {
7406 : // Normal input should fit into 32-bit. Cast to 64-bit here to avoid
7407 : // overflow with corrupted/fuzzed input. The same for av1_iidentity/16/64_c.
7408 0 : output[i] = round_shift((int64_t)NewSqrt2 * input[i], NewSqrt2Bits);
7409 : }
7410 0 : assert(stage_range[0] + NewSqrt2Bits <= 32);
7411 0 : }
7412 0 : void eb_av1_iidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,
7413 : const int8_t *stage_range) {
7414 : (void)cos_bit;
7415 : (void)stage_range;
7416 0 : for (int32_t i = 0; i < 8; ++i) output[i] = (int32_t)((int64_t)input[i] * 2);
7417 0 : }
7418 0 : void eb_av1_iidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit,
7419 : const int8_t *stage_range) {
7420 : (void)cos_bit;
7421 : (void)stage_range;
7422 0 : for (int32_t i = 0; i < 16; ++i)
7423 0 : output[i] = round_shift((int64_t)NewSqrt2 * 2 * input[i], NewSqrt2Bits);
7424 0 : assert(stage_range[0] + NewSqrt2Bits <= 32);
7425 0 : }
7426 0 : void eb_av1_iidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit,
7427 : const int8_t *stage_range) {
7428 : (void)cos_bit;
7429 : (void)stage_range;
7430 0 : for (int32_t i = 0; i < 32; ++i) output[i] = (int32_t)((int64_t)input[i] * 4);
7431 0 : }
7432 0 : void av1_iidentity64_c(const int32_t *input, int32_t *output, int8_t cos_bit,
7433 : const int8_t *stage_range) {
7434 : (void)cos_bit;
7435 : (void)stage_range;
7436 0 : for (int32_t i = 0; i < 64; ++i)
7437 0 : output[i] = round_shift((int64_t)NewSqrt2 * 4 * input[i], NewSqrt2Bits);
7438 0 : assert(stage_range[0] + NewSqrt2Bits <= 32);
7439 0 : }
7440 0 : static INLINE TxfmFunc inv_txfm_type_to_func(TxfmType TxfmType) {
7441 0 : switch (TxfmType) {
7442 0 : case TXFM_TYPE_DCT4: return eb_av1_idct4_new;
7443 0 : case TXFM_TYPE_DCT8: return eb_av1_idct8_new;
7444 0 : case TXFM_TYPE_DCT16: return eb_av1_idct16_new;
7445 0 : case TXFM_TYPE_DCT32: return eb_av1_idct32_new;
7446 0 : case TXFM_TYPE_DCT64: return eb_av1_idct64_new;
7447 0 : case TXFM_TYPE_ADST4: return eb_av1_iadst4_new;
7448 0 : case TXFM_TYPE_ADST8: return eb_av1_iadst8_new;
7449 0 : case TXFM_TYPE_ADST16: return eb_av1_iadst16_new;
7450 0 : case TXFM_TYPE_ADST32: return av1_iadst32_new;
7451 0 : case TXFM_TYPE_IDENTITY4: return eb_av1_iidentity4_c;
7452 0 : case TXFM_TYPE_IDENTITY8: return eb_av1_iidentity8_c;
7453 0 : case TXFM_TYPE_IDENTITY16: return eb_av1_iidentity16_c;
7454 0 : case TXFM_TYPE_IDENTITY32: return eb_av1_iidentity32_c;
7455 0 : case TXFM_TYPE_IDENTITY64: return av1_iidentity64_c;
7456 0 : default: assert(0); return NULL;
7457 : }
7458 : }
7459 :
7460 : //void eb_av1_round_shift_array_c(int32_t *arr, int32_t size, int32_t bit) {
7461 : // int32_t i;
7462 : // if (bit == 0) {
7463 : // return;
7464 : // }
7465 : // else {
7466 : // if (bit > 0) {
7467 : // for (i = 0; i < size; i++) {
7468 : // arr[i] = round_shift(arr[i], bit);
7469 : // }
7470 : // }
7471 : // else {
7472 : // for (i = 0; i < size; i++) {
7473 : // arr[i] = arr[i] * (1 << (-bit));
7474 : // }
7475 : // }
7476 : // }
7477 : //}
7478 0 : static INLINE TranHigh check_range(TranHigh input, int32_t bd) {
7479 : // AV1 TX case
7480 : // - 8 bit: signed 16 bit integer
7481 : // - 10 bit: signed 18 bit integer
7482 : // - 12 bit: signed 20 bit integer
7483 : // - max quantization error = 1828 << (bd - 8)
7484 0 : const int32_t int_max = (1 << (7 + bd)) - 1 + (914 << (bd - 7));
7485 0 : const int32_t int_min = -int_max - 1;
7486 : #if CONFIG_COEFFICIENT_RANGE_CHECKING
7487 : assert(int_min <= input);
7488 : assert(input <= int_max);
7489 : #endif // CONFIG_COEFFICIENT_RANGE_CHECKING
7490 0 : return (TranHigh)clamp64(input, int_min, int_max);
7491 : }
7492 : #define HIGHBD_WRAPLOW(x, bd) ((int32_t)check_range((x), bd))
7493 0 : static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, TranHigh trans,
7494 : int32_t bd) {
7495 0 : trans = HIGHBD_WRAPLOW(trans, bd);
7496 0 : return clip_pixel_highbd(dest + (int32_t)trans, bd);
7497 : }
7498 0 : static INLINE void Av1InverseTransformTwoDCore_c(
7499 : const int32_t *input,
7500 : int32_t inpuStride,
7501 : TranLow *output,
7502 : int32_t ouputStride,
7503 : Txfm2DFlipCfg *cfg,
7504 : int32_t *txfm_buf,
7505 : TxSize tx_size,
7506 : int32_t bd)
7507 : {
7508 : // Note when assigning txfm_size_col, we use the txfm_size from the
7509 : // row configuration and vice versa. This is intentionally done to
7510 : // accurately perform rectangular transforms. When the transform is
7511 : // rectangular, the number of columns will be the same as the
7512 : // txfm_size stored in the row cfg struct. It will make no difference
7513 : // for square transforms.
7514 0 : const int32_t txfm_size_col = tx_size_wide[cfg->tx_size];
7515 0 : const int32_t txfm_size_row = tx_size_high[cfg->tx_size];
7516 : // Take the shift from the larger dimension in the rectangular case.
7517 0 : const int8_t *shift = cfg->shift;
7518 0 : const int32_t rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
7519 : int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
7520 : int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
7521 0 : assert(cfg->stage_num_row <= MAX_TXFM_STAGE_NUM);
7522 0 : assert(cfg->stage_num_col <= MAX_TXFM_STAGE_NUM);
7523 0 : eb_av1_gen_inv_stage_range(stage_range_col, stage_range_row, cfg, tx_size, bd);
7524 :
7525 0 : const int8_t cos_bit_col = cfg->cos_bit_col;
7526 0 : const int8_t cos_bit_row = cfg->cos_bit_row;
7527 0 : const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->txfm_type_col);
7528 0 : const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->txfm_type_row);
7529 : ASSERT(txfm_func_col);
7530 : ASSERT(txfm_func_row);
7531 : // txfm_buf's length is txfm_size_row * txfm_size_col + 2 *
7532 : // AOMMAX(txfm_size_row, txfm_size_col)
7533 : // it is used for intermediate data buffering
7534 0 : const int32_t buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
7535 0 : int32_t *temp_in = txfm_buf;
7536 0 : int32_t *temp_out = temp_in + buf_offset;
7537 0 : int32_t *buf = temp_out + buf_offset;
7538 0 : int32_t *buf_ptr = buf;
7539 : int32_t c, r;
7540 :
7541 : // Rows
7542 0 : for (r = 0; r < txfm_size_row; ++r) {
7543 0 : if (abs(rect_type) == 1) {
7544 0 : for (c = 0; c < txfm_size_col; ++c)
7545 0 : temp_in[c] = round_shift((int64_t)input[c] * NewInvSqrt2, NewSqrt2Bits);
7546 0 : clamp_buf(temp_in, txfm_size_col, (int8_t)(bd + 8));
7547 0 : txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row);
7548 : }
7549 : else {
7550 0 : for (c = 0; c < txfm_size_col; ++c)
7551 0 : temp_in[c] = input[c];
7552 0 : clamp_buf(temp_in, txfm_size_col, (int8_t)(bd + 8));
7553 0 : txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row);
7554 : }
7555 0 : eb_av1_round_shift_array_c(buf_ptr, txfm_size_col, -shift[0]);
7556 0 : input += inpuStride;// txfm_size_col;
7557 0 : buf_ptr += txfm_size_col;
7558 : }
7559 : // Columns
7560 0 : for (c = 0; c < txfm_size_col; ++c) {
7561 0 : if (cfg->lr_flip == 0) {
7562 0 : for (r = 0; r < txfm_size_row; ++r)
7563 0 : temp_in[r] = buf[r * txfm_size_col + c];
7564 : }
7565 : else {
7566 : // flip left right
7567 0 : for (r = 0; r < txfm_size_row; ++r)
7568 0 : temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
7569 : }
7570 0 : clamp_buf(temp_in, txfm_size_row, (int8_t)AOMMAX(bd + 6, 16));
7571 0 : txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
7572 0 : eb_av1_round_shift_array_c(temp_out, txfm_size_row, -shift[1]);
7573 0 : if (cfg->ud_flip == 0) {
7574 0 : for (r = 0; r < txfm_size_row; ++r)
7575 0 : output[r * ouputStride + c] = temp_out[r];
7576 : }
7577 : else {
7578 : // flip upside down
7579 0 : for (r = 0; r < txfm_size_row; ++r)
7580 0 : output[r * ouputStride + c] = temp_out[txfm_size_row - r - 1];
7581 : }
7582 : }
7583 0 : }
7584 :
7585 0 : void Av1InverseTransformTwoD_4x4_c(
7586 : int32_t *input,
7587 : uint32_t input_stride,
7588 : int32_t *output,
7589 : uint32_t outputStride,
7590 : TxType transform_type,
7591 : uint8_t bit_depth)
7592 : {
7593 : DECLARE_ALIGNED(32, int32_t, intermediateInverseTransformBuffer[4 * 4 + 4 + 4]);
7594 : Txfm2DFlipCfg cfg;
7595 :
7596 0 : Av1InverseTransformConfig(
7597 : transform_type,
7598 : TX_4X4,
7599 : &cfg);
7600 : // Forward shift sum uses larger square size, to be consistent with what
7601 : // eb_av1_gen_inv_stage_range() does for inverse shifts.
7602 0 : Av1InverseTransformTwoDCore_c(
7603 : input,
7604 : input_stride,
7605 : output,
7606 : outputStride,
7607 : &cfg,
7608 : intermediateInverseTransformBuffer,
7609 : TX_4X4,
7610 : bit_depth);
7611 0 : }
7612 :
7613 0 : void Av1InverseTransformTwoD_8x8_c(
7614 : int32_t *input,
7615 : uint32_t input_stride,
7616 : int32_t *output,
7617 : uint32_t outputStride,
7618 : TxType transform_type,
7619 : uint8_t bit_depth)
7620 : {
7621 : DECLARE_ALIGNED(32, int32_t, intermediateInverseTransformBuffer[8 * 8 + 8 + 8]);
7622 : Txfm2DFlipCfg cfg;
7623 :
7624 0 : Av1InverseTransformConfig(
7625 : transform_type,
7626 : TX_8X8,
7627 : &cfg);
7628 : // Forward shift sum uses larger square size, to be consistent with what
7629 : // eb_av1_gen_inv_stage_range() does for inverse shifts.
7630 0 : Av1InverseTransformTwoDCore_c(
7631 : input,
7632 : input_stride,
7633 : output,
7634 : outputStride,
7635 : &cfg,
7636 : intermediateInverseTransformBuffer,
7637 : TX_8X8,
7638 : bit_depth);
7639 0 : }
7640 :
7641 0 : void Av1InverseTransformTwoD_16x16_c(
7642 : int32_t *input,
7643 : uint32_t input_stride,
7644 : int32_t *output,
7645 : uint32_t outputStride,
7646 : TxType transform_type,
7647 : uint8_t bit_depth)
7648 : {
7649 : DECLARE_ALIGNED(32, int32_t, intermediateInverseTransformBuffer[16 * 16 + 16 + 16]);
7650 : Txfm2DFlipCfg cfg;
7651 :
7652 0 : Av1InverseTransformConfig(
7653 : transform_type,
7654 : TX_16X16,
7655 : &cfg);
7656 : // Forward shift sum uses larger square size, to be consistent with what
7657 : // eb_av1_gen_inv_stage_range() does for inverse shifts.
7658 0 : Av1InverseTransformTwoDCore_c(
7659 : input,
7660 : input_stride,
7661 : output,
7662 : outputStride,
7663 : &cfg,
7664 : intermediateInverseTransformBuffer,
7665 : TX_16X16,
7666 : bit_depth);
7667 0 : }
7668 :
7669 0 : void Av1InverseTransformTwoD_32x32_c(
7670 : int32_t *input,
7671 : uint32_t input_stride,
7672 : int32_t *output,
7673 : uint32_t outputStride,
7674 : TxType transform_type,
7675 : uint8_t bit_depth)
7676 : {
7677 : DECLARE_ALIGNED(32, int32_t, intermediateInverseTransformBuffer[32 * 32 + 32 + 32]);
7678 : Txfm2DFlipCfg cfg;
7679 :
7680 0 : Av1InverseTransformConfig(
7681 : transform_type,
7682 : TX_32X32,
7683 : &cfg);
7684 : // Forward shift sum uses larger square size, to be consistent with what
7685 : // eb_av1_gen_inv_stage_range() does for inverse shifts.
7686 0 : Av1InverseTransformTwoDCore_c(
7687 : input,
7688 : input_stride,
7689 : output,
7690 : outputStride,
7691 : &cfg,
7692 : intermediateInverseTransformBuffer,
7693 : TX_32X32,
7694 : bit_depth);
7695 0 : }
7696 :
7697 0 : void Av1InverseTransformTwoD_64x64_c(
7698 : int32_t *input,
7699 : uint32_t input_stride,
7700 : int32_t *output,
7701 : uint32_t outputStride,
7702 : TxType transform_type,
7703 : uint8_t bit_depth)
7704 : {
7705 : (void)input_stride;
7706 : // TODO(urvang): Can the same array be reused, instead of using a new array?
7707 : // Remap 32x32 input into a modified 64x64 by:
7708 : // - Copying over these values in top-left 32x32 locations.
7709 : // - Setting the rest of the locations to 0.
7710 : uint32_t row;
7711 : int32_t mod_input[64 * 64];
7712 :
7713 0 : for (row = 0; row < 32; ++row) {
7714 0 : memcpy(mod_input + row * 64, input + row * 64, 32 * sizeof(*mod_input));
7715 0 : memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
7716 : }
7717 0 : memset(mod_input + 32 * 64, 0, 32 * 64 * sizeof(*mod_input));
7718 :
7719 : DECLARE_ALIGNED(32, int32_t, intermediateInverseTransformBuffer[64 * 64 + 64 + 64]);
7720 :
7721 : Txfm2DFlipCfg cfg;
7722 :
7723 0 : Av1InverseTransformConfig(
7724 : transform_type,
7725 : TX_64X64,
7726 : &cfg);
7727 : // Forward shift sum uses larger square size, to be consistent with what
7728 : // eb_av1_gen_inv_stage_range() does for inverse shifts.
7729 0 : Av1InverseTransformTwoDCore_c(
7730 : mod_input,
7731 : 64,
7732 : output,
7733 : outputStride,
7734 : &cfg,
7735 : intermediateInverseTransformBuffer,
7736 : TX_64X64,
7737 : bit_depth);
7738 0 : }
7739 :
7740 : /*********************************************************************
7741 : * Estimate Inverse Transform
7742 : *********************************************************************/
7743 0 : EbErrorType av1_estimate_inv_transform(
7744 : int32_t *coeff_buffer,
7745 : uint32_t coeff_stride,
7746 : int32_t *recon_buffer,
7747 : uint32_t recon_stride,
7748 : TxSize transform_size,
7749 : int16_t *transform_inner_array_ptr,
7750 : uint32_t bit_increment,
7751 : TxType transform_type,
7752 : uint32_t eob,
7753 : uint32_t partial_frequency_n2_flag)
7754 : {
7755 0 : EbErrorType return_error = EB_ErrorNone;
7756 :
7757 : // Nader inverse tranform
7758 : (void)transform_inner_array_ptr;
7759 : (void)partial_frequency_n2_flag;
7760 :
7761 : //TxSetType transformSetType = transform_type == DCT_DCT ? EXT_TX_SET_DCTONLY : /*ADST_ADST*/ EXT_TX_SET_DTT4_IDTX ; // NM - Set to zero for the moment
7762 :
7763 0 : uint8_t bit_depth = bit_increment ? 10 : 8;// NM - Set to zero for the moment
7764 :
7765 0 : if (eob) {
7766 : // assert(av1_ext_tx_used[transformSetType][transform_type]);
7767 :
7768 0 : switch (transform_size) {
7769 0 : case TX_32X32:
7770 0 : Av1InverseTransformTwoD_32x32_c(
7771 : coeff_buffer,
7772 : coeff_stride,
7773 : recon_buffer,
7774 : recon_stride,
7775 : transform_type,
7776 : bit_depth);
7777 0 : break;
7778 0 : case TX_16X16:
7779 0 : Av1InverseTransformTwoD_16x16_c(
7780 : coeff_buffer,
7781 : coeff_stride,
7782 : recon_buffer,
7783 : recon_stride,
7784 : transform_type,
7785 : bit_depth);
7786 0 : break;
7787 0 : case TX_8X8:
7788 0 : Av1InverseTransformTwoD_8x8_c(
7789 : coeff_buffer,
7790 : coeff_stride,
7791 : recon_buffer,
7792 : recon_stride,
7793 : transform_type,
7794 : bit_depth);
7795 0 : break;
7796 0 : case TX_64X64:
7797 0 : Av1InverseTransformTwoD_64x64_c(
7798 : coeff_buffer,
7799 : coeff_stride,
7800 : recon_buffer,
7801 : recon_stride,
7802 : transform_type,
7803 : bit_depth);
7804 0 : break;
7805 0 : case TX_4X4:
7806 : // this is like av1_short_idct4x4 but has a special case around eob<=1
7807 : // which is significant (not just an optimization) for the lossless
7808 : // case.
7809 0 : Av1InverseTransformTwoD_4x4_c(
7810 : coeff_buffer,
7811 : coeff_stride,
7812 : recon_buffer,
7813 : recon_stride,
7814 : transform_type,
7815 : bit_depth);
7816 0 : break;
7817 :
7818 : break;
7819 0 : default: assert(0 && "Invalid transform size"); break;
7820 : }
7821 0 : }
7822 :
7823 0 : return return_error;
7824 : }
7825 :
7826 0 : static const int32_t *cast_to_int32(const TranLow *input) {
7827 : assert(sizeof(int32_t) == sizeof(TranLow));
7828 0 : return (const int32_t *)input;
7829 : }
7830 0 : void eb_av1_get_inv_txfm_cfg(TxType tx_type, TxSize tx_size,
7831 : Txfm2DFlipCfg *cfg) {
7832 0 : assert(cfg != NULL);
7833 0 : cfg->tx_size = tx_size;
7834 0 : set_flip_cfg(tx_type, cfg);
7835 0 : av1_zero(cfg->stage_range_col);
7836 0 : av1_zero(cfg->stage_range_row);
7837 0 : set_flip_cfg(tx_type, cfg);
7838 0 : const TxType1D tx_type_1d_col = vtx_tab[tx_type];
7839 0 : const TxType1D tx_type_1d_row = htx_tab[tx_type];
7840 0 : cfg->shift = eb_inv_txfm_shift_ls[tx_size];
7841 0 : const int32_t txw_idx = get_txw_idx(tx_size);
7842 0 : const int32_t txh_idx = get_txh_idx(tx_size);
7843 0 : cfg->cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
7844 0 : cfg->cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
7845 0 : cfg->txfm_type_col = av1_txfm_type_ls[txh_idx][tx_type_1d_col];
7846 0 : if (cfg->txfm_type_col == TXFM_TYPE_ADST4)
7847 0 : memcpy(cfg->stage_range_col, iadst4_range, sizeof(iadst4_range));
7848 0 : cfg->txfm_type_row = av1_txfm_type_ls[txw_idx][tx_type_1d_row];
7849 0 : if (cfg->txfm_type_row == TXFM_TYPE_ADST4)
7850 0 : memcpy(cfg->stage_range_row, iadst4_range, sizeof(iadst4_range));
7851 0 : cfg->stage_num_col = av1_txfm_stage_num_list[cfg->txfm_type_col];
7852 0 : cfg->stage_num_row = av1_txfm_stage_num_list[cfg->txfm_type_row];
7853 0 : }
7854 0 : static INLINE void inv_txfm2d_add_c(const int32_t *input,
7855 : uint16_t *output_r, int32_t stride_r,
7856 : uint16_t *output_w, int32_t stride_w,
7857 : Txfm2DFlipCfg *cfg,
7858 : int32_t *txfm_buf, TxSize tx_size,
7859 : int32_t bd) {
7860 : // Note when assigning txfm_size_col, we use the txfm_size from the
7861 : // row configuration and vice versa. This is intentionally done to
7862 : // accurately perform rectangular transforms. When the transform is
7863 : // rectangular, the number of columns will be the same as the
7864 : // txfm_size stored in the row cfg struct. It will make no difference
7865 : // for square transforms.
7866 0 : const int32_t txfm_size_col = tx_size_wide[cfg->tx_size];
7867 0 : const int32_t txfm_size_row = tx_size_high[cfg->tx_size];
7868 : // Take the shift from the larger dimension in the rectangular case.
7869 0 : const int8_t *shift = cfg->shift;
7870 0 : const int32_t rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
7871 : int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
7872 : int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
7873 0 : assert(cfg->stage_num_row <= MAX_TXFM_STAGE_NUM);
7874 0 : assert(cfg->stage_num_col <= MAX_TXFM_STAGE_NUM);
7875 0 : eb_av1_gen_inv_stage_range(stage_range_col, stage_range_row, cfg, tx_size, bd);
7876 :
7877 0 : const int8_t cos_bit_col = cfg->cos_bit_col;
7878 0 : const int8_t cos_bit_row = cfg->cos_bit_row;
7879 0 : const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->txfm_type_col);
7880 0 : const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->txfm_type_row);
7881 : ASSERT(txfm_func_col);
7882 : ASSERT(txfm_func_row);
7883 : // txfm_buf's length is txfm_size_row * txfm_size_col + 2 *
7884 : // AOMMAX(txfm_size_row, txfm_size_col)
7885 : // it is used for intermediate data buffering
7886 0 : const int32_t buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
7887 0 : int32_t *temp_in = txfm_buf;
7888 0 : int32_t *temp_out = temp_in + buf_offset;
7889 0 : int32_t *buf = temp_out + buf_offset;
7890 0 : int32_t *buf_ptr = buf;
7891 : int32_t c, r;
7892 :
7893 : // Rows
7894 0 : for (r = 0; r < txfm_size_row; ++r) {
7895 0 : if (abs(rect_type) == 1) {
7896 0 : for (c = 0; c < txfm_size_col; ++c)
7897 0 : temp_in[c] = round_shift((int64_t)input[c] * NewInvSqrt2, NewSqrt2Bits);
7898 0 : clamp_buf(temp_in, txfm_size_col, (int8_t)(bd + 8));
7899 0 : txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row);
7900 : }
7901 : else {
7902 0 : for (c = 0; c < txfm_size_col; ++c)
7903 0 : temp_in[c] = input[c];
7904 0 : clamp_buf(temp_in, txfm_size_col, (int8_t)(bd + 8));
7905 0 : txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row);
7906 : }
7907 0 : eb_av1_round_shift_array_c(buf_ptr, txfm_size_col, -shift[0]);
7908 0 : input += txfm_size_col;
7909 0 : buf_ptr += txfm_size_col;
7910 : }
7911 :
7912 : // Columns
7913 0 : for (c = 0; c < txfm_size_col; ++c) {
7914 0 : if (cfg->lr_flip == 0) {
7915 0 : for (r = 0; r < txfm_size_row; ++r)
7916 0 : temp_in[r] = buf[r * txfm_size_col + c];
7917 : }
7918 : else {
7919 : // flip left right
7920 0 : for (r = 0; r < txfm_size_row; ++r)
7921 0 : temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
7922 : }
7923 0 : clamp_buf(temp_in, txfm_size_row, (int8_t)(AOMMAX(bd + 6, 16)));
7924 0 : txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
7925 0 : eb_av1_round_shift_array_c(temp_out, txfm_size_row, -shift[1]);
7926 0 : if (cfg->ud_flip == 0) {
7927 0 : for (r = 0; r < txfm_size_row; ++r) {
7928 0 : output_w[r * stride_w + c] =
7929 0 : highbd_clip_pixel_add(output_r[r * stride_r + c], temp_out[r], bd);
7930 : }
7931 : }
7932 : else {
7933 : // flip upside down
7934 0 : for (r = 0; r < txfm_size_row; ++r) {
7935 0 : output_w[r * stride_w + c] = highbd_clip_pixel_add(
7936 0 : output_r[r * stride_r + c], temp_out[txfm_size_row - r - 1], bd);
7937 : }
7938 : }
7939 : }
7940 0 : }
7941 0 : static INLINE void inv_txfm2d_add_facade(const int32_t *input,
7942 : uint16_t *output_r, int32_t stride_r,
7943 : uint16_t *output_w, int32_t stride_w,
7944 : int32_t *txfm_buf,
7945 : TxType tx_type, TxSize tx_size,
7946 : int32_t bd) {
7947 : Txfm2DFlipCfg cfg;
7948 0 : eb_av1_get_inv_txfm_cfg(tx_type, tx_size, &cfg);
7949 : // Forward shift sum uses larger square size, to be consistent with what
7950 : // eb_av1_gen_inv_stage_range() does for inverse shifts.
7951 0 : inv_txfm2d_add_c(input, output_r, stride_r, output_w, stride_w,
7952 : &cfg, txfm_buf, tx_size, bd);
7953 0 : }
7954 0 : void eb_av1_inv_txfm2d_add_4x4_c(const int32_t *input,
7955 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w,
7956 : int32_t stride_w, TxType tx_type, int32_t bd) {
7957 : DECLARE_ALIGNED(32, int32_t, txfm_buf[4 * 4 + 4 + 4]);
7958 0 : inv_txfm2d_add_facade(input, output_r, stride_r, output_w, stride_w,
7959 : txfm_buf, tx_type, TX_4X4, bd);
7960 0 : }
7961 0 : void eb_av1_inv_txfm2d_add_8x8_c(const int32_t *input,
7962 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
7963 : TxType tx_type, int32_t bd) {
7964 : DECLARE_ALIGNED(32, int32_t, txfm_buf[8 * 8 + 8 + 8]);
7965 0 : inv_txfm2d_add_facade(input, output_r, stride_r, output_w, stride_w,
7966 : txfm_buf, tx_type, TX_8X8, bd);
7967 0 : }
7968 0 : void eb_av1_inv_txfm2d_add_16x16_c(const int32_t *input,
7969 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
7970 : TxType tx_type, int32_t bd) {
7971 : DECLARE_ALIGNED(32, int32_t, txfm_buf[16 * 16 + 16 + 16]);
7972 0 : inv_txfm2d_add_facade(input, output_r, stride_r, output_w, stride_w,
7973 : txfm_buf, tx_type, TX_16X16, bd);
7974 0 : }
7975 :
7976 0 : void eb_av1_inv_txfm2d_add_32x32_c(const int32_t *input,
7977 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
7978 : TxType tx_type, int32_t bd) {
7979 : DECLARE_ALIGNED(32, int32_t, txfm_buf[32 * 32 + 32 + 32]);
7980 0 : inv_txfm2d_add_facade(input, output_r, stride_r, output_w, stride_w,
7981 : txfm_buf, tx_type, TX_32X32, bd);
7982 0 : }
7983 :
7984 0 : void eb_av1_inv_txfm2d_add_64x64_c(const int32_t *input,
7985 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
7986 : TxType tx_type, int32_t bd) {
7987 : // TODO(urvang): Can the same array be reused, instead of using a new array?
7988 : // Remap 32x32 input into a modified 64x64 by:
7989 : // - Copying over these values in top-left 32x32 locations.
7990 : // - Setting the rest of the locations to 0.
7991 : int32_t mod_input[64 * 64];
7992 0 : for (int32_t row = 0; row < 32; ++row) {
7993 0 : memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
7994 0 : memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
7995 : }
7996 0 : memset(mod_input + 32 * 64, 0, 32 * 64 * sizeof(*mod_input));
7997 : DECLARE_ALIGNED(32, int32_t, txfm_buf[64 * 64 + 64 + 64]);
7998 0 : inv_txfm2d_add_facade(mod_input, output_r, stride_r, output_w, stride_w,
7999 : txfm_buf, tx_type, TX_64X64, bd);
8000 0 : }
8001 :
8002 0 : void eb_av1_inv_txfm2d_add_4x8_c(const int32_t *input,
8003 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
8004 : TxType tx_type, TxSize tx_size, int32_t bd) {
8005 : (void)tx_size;
8006 : DECLARE_ALIGNED(32, int32_t, txfm_buf[4 * 8 + 8 + 8]);
8007 0 : inv_txfm2d_add_facade(input, output_r, stride_r, output_w, stride_w,
8008 : txfm_buf, tx_type, TX_4X8, bd);
8009 0 : }
8010 :
8011 0 : void eb_av1_inv_txfm2d_add_8x4_c(const int32_t *input,
8012 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
8013 : TxType tx_type, TxSize tx_size, int32_t bd) {
8014 : (void)tx_size;
8015 : DECLARE_ALIGNED(32, int32_t, txfm_buf[8 * 4 + 8 + 8]);
8016 0 : inv_txfm2d_add_facade(input, output_r, stride_r, output_w, stride_w,
8017 : txfm_buf, tx_type, TX_8X4, bd);
8018 0 : }
8019 :
8020 0 : void eb_av1_inv_txfm2d_add_8x16_c(const int32_t *input,
8021 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
8022 : TxType tx_type, TxSize tx_size, int32_t eob, int32_t bd) {
8023 : UNUSED(tx_size);
8024 : UNUSED(eob);
8025 : DECLARE_ALIGNED(32, int32_t, txfm_buf[8 * 16 + 16 + 16]);
8026 0 : inv_txfm2d_add_facade(input, output_r, stride_r, output_w, stride_w,
8027 : txfm_buf, tx_type, TX_8X16, bd);
8028 0 : }
8029 :
8030 0 : void eb_av1_inv_txfm2d_add_16x8_c(const int32_t *input,
8031 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
8032 : TxType tx_type, TxSize tx_size, int32_t eob, int32_t bd) {
8033 : UNUSED(tx_size);
8034 : UNUSED(eob);
8035 : DECLARE_ALIGNED(32, int32_t, txfm_buf[16 * 8 + 16 + 16]);
8036 0 : inv_txfm2d_add_facade(input, output_r, stride_r, output_w, stride_w,
8037 : txfm_buf, tx_type, TX_16X8, bd);
8038 0 : }
8039 :
8040 0 : void eb_av1_inv_txfm2d_add_16x32_c(const int32_t *input,
8041 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
8042 : TxType tx_type, TxSize tx_size, int32_t eob, int32_t bd) {
8043 : UNUSED(tx_size);
8044 : UNUSED(eob);
8045 : DECLARE_ALIGNED(32, int32_t, txfm_buf[16 * 32 + 32 + 32]);
8046 0 : inv_txfm2d_add_facade(input, output_r, stride_r, output_w, stride_w,
8047 : txfm_buf, tx_type, TX_16X32, bd);
8048 0 : }
8049 :
8050 0 : void eb_av1_inv_txfm2d_add_32x16_c(const int32_t *input,
8051 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
8052 : TxType tx_type, TxSize tx_size, int32_t eob, int32_t bd) {
8053 : UNUSED(tx_size);
8054 : UNUSED(eob);
8055 : DECLARE_ALIGNED(32, int32_t, txfm_buf[32 * 16 + 32 + 32]);
8056 0 : inv_txfm2d_add_facade(input, output_r, stride_r, output_w, stride_w,
8057 : txfm_buf, tx_type, TX_32X16, bd);
8058 0 : }
8059 :
8060 0 : void eb_av1_inv_txfm2d_add_64x32_c(const int32_t *input,
8061 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
8062 : TxType tx_type, TxSize tx_size, int32_t eob, int32_t bd) {
8063 : UNUSED(tx_size);
8064 : UNUSED(eob);
8065 : // Remap 32x32 input into a modified 64x32 by:
8066 : // - Copying over these values in top-left 32x32 locations.
8067 : // - Setting the rest of the locations to 0.
8068 : int32_t mod_input[64 * 32];
8069 0 : for (int32_t row = 0; row < 32; ++row) {
8070 0 : memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
8071 0 : memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
8072 : }
8073 : DECLARE_ALIGNED(32, int32_t, txfm_buf[64 * 32 + 64 + 64]);
8074 0 : inv_txfm2d_add_facade(mod_input, output_r, stride_r, output_w, stride_w,
8075 : txfm_buf, tx_type, TX_64X32,
8076 : bd);
8077 0 : }
8078 :
8079 0 : void eb_av1_inv_txfm2d_add_32x64_c(const int32_t *input,
8080 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
8081 : TxType tx_type, TxSize tx_size, int32_t eob, int32_t bd) {
8082 : UNUSED(tx_size);
8083 : UNUSED(eob);
8084 : // Remap 32x32 input into a modified 32x64 input by:
8085 : // - Copying over these values in top-left 32x32 locations.
8086 : // - Setting the rest of the locations to 0.
8087 : int32_t mod_input[32 * 64];
8088 0 : memcpy(mod_input, input, 32 * 32 * sizeof(*mod_input));
8089 0 : memset(mod_input + 32 * 32, 0, 32 * 32 * sizeof(*mod_input));
8090 : DECLARE_ALIGNED(32, int32_t, txfm_buf[64 * 32 + 64 + 64]);
8091 0 : inv_txfm2d_add_facade(mod_input, output_r, stride_r, output_w, stride_w,
8092 : txfm_buf, tx_type, TX_32X64, bd);
8093 0 : }
8094 :
8095 0 : void eb_av1_inv_txfm2d_add_16x64_c(const int32_t *input,
8096 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
8097 : TxType tx_type, TxSize tx_size, int32_t eob, int32_t bd) {
8098 : UNUSED(tx_size);
8099 : UNUSED(eob);
8100 : // Remap 16x32 input into a modified 16x64 input by:
8101 : // - Copying over these values in top-left 16x32 locations.
8102 : // - Setting the rest of the locations to 0.
8103 : int32_t mod_input[16 * 64];
8104 0 : memcpy(mod_input, input, 16 * 32 * sizeof(*mod_input));
8105 0 : memset(mod_input + 16 * 32, 0, 16 * 32 * sizeof(*mod_input));
8106 : DECLARE_ALIGNED(32, int32_t, txfm_buf[16 * 64 + 64 + 64]);
8107 0 : inv_txfm2d_add_facade(mod_input, output_r, stride_r, output_w, stride_w,
8108 : txfm_buf, tx_type, TX_16X64, bd);
8109 0 : }
8110 :
8111 0 : void eb_av1_inv_txfm2d_add_64x16_c(const int32_t *input,
8112 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
8113 : TxType tx_type, TxSize tx_size, int32_t eob, int32_t bd) {
8114 : UNUSED(tx_size);
8115 : UNUSED(eob);
8116 : // Remap 32x16 input into a modified 64x16 by:
8117 : // - Copying over these values in top-left 32x16 locations.
8118 : // - Setting the rest of the locations to 0.
8119 : int32_t mod_input[64 * 16];
8120 0 : for (int32_t row = 0; row < 16; ++row) {
8121 0 : memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
8122 0 : memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
8123 : }
8124 : DECLARE_ALIGNED(32, int32_t, txfm_buf[16 * 64 + 64 + 64]);
8125 0 : inv_txfm2d_add_facade(mod_input, output_r, stride_r, output_w, stride_w,
8126 : txfm_buf, tx_type, TX_64X16, bd);
8127 0 : }
8128 :
8129 0 : void eb_av1_inv_txfm2d_add_4x16_c(const int32_t *input,
8130 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
8131 : TxType tx_type, TxSize tx_size, int32_t bd) {
8132 : UNUSED(tx_size);
8133 : DECLARE_ALIGNED(32, int32_t, txfm_buf[4 * 16 + 16 + 16]);
8134 0 : inv_txfm2d_add_facade(input, output_r, stride_r, output_w, stride_w,
8135 : txfm_buf, tx_type, TX_4X16, bd);
8136 0 : }
8137 :
8138 0 : void eb_av1_inv_txfm2d_add_16x4_c(const int32_t *input,
8139 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
8140 : TxType tx_type, TxSize tx_size, int32_t bd) {
8141 : UNUSED(tx_size);
8142 : DECLARE_ALIGNED(32, int32_t, txfm_buf[4 * 16 + 16 + 16]);
8143 0 : inv_txfm2d_add_facade(input, output_r, stride_r, output_w, stride_w,
8144 : txfm_buf, tx_type, TX_16X4, bd);
8145 0 : }
8146 :
8147 0 : void eb_av1_inv_txfm2d_add_8x32_c(const int32_t *input,
8148 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
8149 : TxType tx_type, TxSize tx_size, int32_t eob, int32_t bd) {
8150 : UNUSED(tx_size);
8151 : UNUSED(eob);
8152 : DECLARE_ALIGNED(32, int32_t, txfm_buf[8 * 32 + 32 + 32]);
8153 0 : inv_txfm2d_add_facade(input, output_r, stride_r, output_w, stride_w,
8154 : txfm_buf, tx_type, TX_8X32, bd);
8155 0 : }
8156 :
8157 0 : void eb_av1_inv_txfm2d_add_32x8_c(const int32_t *input,
8158 : uint16_t *output_r, int32_t stride_r, uint16_t *output_w, int32_t stride_w,
8159 : TxType tx_type, TxSize tx_size, int32_t eob, int32_t bd) {
8160 : UNUSED(tx_size);
8161 : UNUSED(eob);
8162 : DECLARE_ALIGNED(32, int32_t, txfm_buf[8 * 32 + 32 + 32]);
8163 0 : inv_txfm2d_add_facade(input, output_r, stride_r, output_w, stride_w,
8164 : txfm_buf, tx_type, TX_32X8, bd);
8165 0 : }
8166 :
8167 0 : static INLINE int32_t range_check_value(int32_t value, int8_t bit) {
8168 : #if CONFIG_COEFFICIENT_RANGE_CHECKING
8169 : const int64_t max_value = (1LL << (bit - 1)) - 1;
8170 : const int64_t min_value = -(1LL << (bit - 1));
8171 : if (value < min_value || value > max_value) {
8172 : fprintf(stderr, "coeff out of bit range, value: %d bit %d\n", value, bit);
8173 : assert(0);
8174 : }
8175 : #endif // CONFIG_COEFFICIENT_RANGE_CHECKING
8176 : #if DO_RANGE_CHECK_CLAMP
8177 : bit = AOMMIN(bit, 31);
8178 : return clamp(value, (1 << (bit - 1)) - 1, -(1 << (bit - 1)));
8179 : #endif // DO_RANGE_CHECK_CLAMP
8180 : (void)bit;
8181 0 : return value;
8182 : }
8183 :
8184 0 : void eb_av1_highbd_iwht4x4_16_add_c(const TranLow *input,
8185 : uint8_t *dest8_r, int32_t stride_r, uint8_t *dest8_w, int32_t stride_w,
8186 : int32_t bd) {
8187 : /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
8188 : 0.5 shifts per pixel. */
8189 : int32_t i;
8190 : TranLow output[16];
8191 : TranLow a1, b1, c1, d1, e1;
8192 0 : const TranLow *ip = input;
8193 0 : TranLow *op = output;
8194 0 : uint16_t *dest_r = CONVERT_TO_SHORTPTR(dest8_r);
8195 0 : uint16_t *dest_w = CONVERT_TO_SHORTPTR(dest8_w);
8196 :
8197 0 : for (i = 0; i < 4; i++) {
8198 0 : a1 = ip[0] >> UNIT_QUANT_SHIFT;
8199 0 : c1 = ip[1] >> UNIT_QUANT_SHIFT;
8200 0 : d1 = ip[2] >> UNIT_QUANT_SHIFT;
8201 0 : b1 = ip[3] >> UNIT_QUANT_SHIFT;
8202 0 : a1 += c1;
8203 0 : d1 -= b1;
8204 0 : e1 = (a1 - d1) >> 1;
8205 0 : b1 = e1 - b1;
8206 0 : c1 = e1 - c1;
8207 0 : a1 -= b1;
8208 0 : d1 += c1;
8209 0 : op[0] = a1;
8210 0 : op[1] = b1;
8211 0 : op[2] = c1;
8212 0 : op[3] = d1;
8213 0 : ip += 4;
8214 0 : op += 4;
8215 : }
8216 :
8217 0 : ip = output;
8218 0 : for (i = 0; i < 4; i++) {
8219 0 : a1 = ip[4 * 0];
8220 0 : c1 = ip[4 * 1];
8221 0 : d1 = ip[4 * 2];
8222 0 : b1 = ip[4 * 3];
8223 0 : a1 += c1;
8224 0 : d1 -= b1;
8225 0 : e1 = (a1 - d1) >> 1;
8226 0 : b1 = e1 - b1;
8227 0 : c1 = e1 - c1;
8228 0 : a1 -= b1;
8229 0 : d1 += c1;
8230 0 : range_check_value(a1, (int8_t)(bd + 1));
8231 0 : range_check_value(b1, (int8_t)(bd + 1));
8232 0 : range_check_value(c1, (int8_t)(bd + 1));
8233 0 : range_check_value(d1, (int8_t)(bd + 1));
8234 :
8235 0 : dest_w[stride_w * 0] = highbd_clip_pixel_add(dest_r[stride_r * 0], a1, bd);
8236 0 : dest_w[stride_w * 1] = highbd_clip_pixel_add(dest_r[stride_r * 1], b1, bd);
8237 0 : dest_w[stride_w * 2] = highbd_clip_pixel_add(dest_r[stride_r * 2], c1, bd);
8238 0 : dest_w[stride_w * 3] = highbd_clip_pixel_add(dest_r[stride_r * 3], d1, bd);
8239 :
8240 0 : ip++;
8241 0 : dest_r++;
8242 0 : dest_w++;
8243 : }
8244 0 : }
8245 :
8246 0 : void eb_av1_highbd_iwht4x4_1_add_c(const TranLow *in,
8247 : uint8_t *dest8_r, int32_t dest_stride_r,
8248 : uint8_t *dest8_w, int32_t dest_stride_w,
8249 : int32_t bd) {
8250 : int32_t i;
8251 : TranLow a1, e1;
8252 : TranLow tmp[4];
8253 0 : const TranLow *ip = in;
8254 0 : TranLow *op = tmp;
8255 0 : uint16_t *dest_r = CONVERT_TO_SHORTPTR(dest8_r);
8256 0 : uint16_t *dest_w = CONVERT_TO_SHORTPTR(dest8_w);
8257 : (void)bd;
8258 :
8259 0 : a1 = ip[0] >> UNIT_QUANT_SHIFT;
8260 0 : e1 = a1 >> 1;
8261 0 : a1 -= e1;
8262 0 : op[0] = a1;
8263 0 : op[1] = op[2] = op[3] = e1;
8264 :
8265 0 : ip = tmp;
8266 0 : for (i = 0; i < 4; i++) {
8267 0 : e1 = ip[0] >> 1;
8268 0 : a1 = ip[0] - e1;
8269 0 : dest_w[dest_stride_w * 0] =
8270 0 : highbd_clip_pixel_add(dest_r[dest_stride_r * 0], a1, bd);
8271 0 : dest_w[dest_stride_w * 1] =
8272 0 : highbd_clip_pixel_add(dest_r[dest_stride_r * 1], e1, bd);
8273 0 : dest_w[dest_stride_w * 2] =
8274 0 : highbd_clip_pixel_add(dest_r[dest_stride_r * 2], e1, bd);
8275 0 : dest_w[dest_stride_w * 3] =
8276 0 : highbd_clip_pixel_add(dest_r[dest_stride_r * 3], e1, bd);
8277 0 : ip++;
8278 0 : dest_r++;
8279 0 : dest_w++;
8280 : }
8281 0 : }
8282 0 : static void highbd_iwht4x4_add(const TranLow *input,
8283 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8284 : int32_t eob, int32_t bd) {
8285 0 : if (eob > 1)
8286 0 : eb_av1_highbd_iwht4x4_16_add_c(input,
8287 : dest_r, stride_r, dest_w, stride_w, bd);
8288 : else
8289 0 : eb_av1_highbd_iwht4x4_1_add_c(input,
8290 : dest_r, stride_r, dest_w, stride_w, bd);
8291 0 : }
8292 0 : void eb_av1_highbd_inv_txfm_add_4x4(const TranLow *input,
8293 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8294 : const TxfmParam *txfm_param) {
8295 : // assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
8296 0 : int32_t eob = txfm_param->eob;
8297 0 : int32_t bd = txfm_param->bd;
8298 0 : int32_t lossless = txfm_param->lossless;
8299 0 : const int32_t *src = cast_to_int32(input);
8300 0 : const TxType tx_type = txfm_param->tx_type;
8301 0 : if (lossless) {
8302 0 : assert(tx_type == DCT_DCT);
8303 0 : highbd_iwht4x4_add(input,
8304 : dest_r, stride_r, dest_w, stride_w, eob, bd);
8305 0 : return;
8306 : }
8307 0 : eb_av1_inv_txfm2d_add_4x4(src,
8308 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8309 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8310 : tx_type, bd);
8311 : }
8312 0 : static void highbd_inv_txfm_add_8x8(const TranLow *input,
8313 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8314 : const TxfmParam *txfm_param) {
8315 0 : int32_t bd = txfm_param->bd;
8316 0 : const TxType tx_type = txfm_param->tx_type;
8317 0 : const int32_t *src = cast_to_int32(input);
8318 0 : eb_av1_inv_txfm2d_add_8x8(src,
8319 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8320 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8321 : tx_type, bd);
8322 0 : }
8323 :
8324 0 : static void highbd_inv_txfm_add_16x16(const TranLow *input,
8325 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8326 : const TxfmParam *txfm_param) {
8327 0 : int32_t bd = txfm_param->bd;
8328 0 : const TxType tx_type = txfm_param->tx_type;
8329 0 : const int32_t *src = cast_to_int32(input);
8330 0 : eb_av1_inv_txfm2d_add_16x16(src,
8331 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8332 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8333 : tx_type, bd);
8334 0 : }
8335 :
8336 0 : static void highbd_inv_txfm_add_32x32(const TranLow *input,
8337 : uint8_t *dest_r, int32_t stride_r,
8338 : uint8_t *dest_w, int32_t stride_w,
8339 : const TxfmParam *txfm_param) {
8340 0 : const int32_t bd = txfm_param->bd;
8341 0 : const TxType tx_type = txfm_param->tx_type;
8342 0 : const int32_t *src = cast_to_int32(input);
8343 0 : switch (tx_type) {
8344 0 : case DCT_DCT:
8345 : case IDTX:
8346 0 : eb_av1_inv_txfm2d_add_32x32(src,
8347 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8348 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8349 : tx_type, bd);
8350 0 : break;
8351 0 : default:
8352 0 : assert(0);
8353 : }
8354 0 : }
8355 :
8356 0 : static void highbd_inv_txfm_add_64x64(const TranLow *input,
8357 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8358 : const TxfmParam *txfm_param) {
8359 0 : const int32_t bd = txfm_param->bd;
8360 0 : const TxType tx_type = txfm_param->tx_type;
8361 0 : const int32_t *src = cast_to_int32(input);
8362 0 : assert(tx_type == DCT_DCT);
8363 0 : eb_av1_inv_txfm2d_add_64x64(src,
8364 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8365 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8366 : tx_type, bd);
8367 0 : }
8368 :
8369 0 : static void highbd_inv_txfm_add_4x8(const TranLow *input,
8370 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8371 : const TxfmParam *txfm_param) {
8372 : //TODO: add this assert once we fill tx_set_type assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
8373 0 : const int32_t *src = cast_to_int32(input);
8374 0 : eb_av1_inv_txfm2d_add_4x8(src,
8375 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8376 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8377 0 : txfm_param->tx_type, txfm_param->tx_size, txfm_param->bd);
8378 0 : }
8379 :
8380 0 : static void highbd_inv_txfm_add_8x4(const TranLow *input,
8381 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8382 : const TxfmParam *txfm_param) {
8383 : //TODO: add this assert once we fill tx_set_type assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
8384 0 : const int32_t *src = cast_to_int32(input);
8385 0 : eb_av1_inv_txfm2d_add_8x4(src,
8386 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8387 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8388 0 : txfm_param->tx_type, txfm_param->tx_size, txfm_param->bd);
8389 0 : }
8390 :
8391 0 : static void highbd_inv_txfm_add_8x16(const TranLow *input,
8392 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8393 : const TxfmParam *txfm_param) {
8394 0 : const int32_t *src = cast_to_int32(input);
8395 0 : eb_av1_inv_txfm2d_add_8x16(src,
8396 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8397 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8398 0 : txfm_param->tx_type, txfm_param->tx_size, txfm_param->eob, txfm_param->bd);
8399 0 : }
8400 :
8401 0 : static void highbd_inv_txfm_add_16x8(const TranLow *input,
8402 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8403 : const TxfmParam *txfm_param) {
8404 0 : const int32_t *src = cast_to_int32(input);
8405 0 : eb_av1_inv_txfm2d_add_16x8(src,
8406 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8407 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8408 0 : txfm_param->tx_type, txfm_param->tx_size, txfm_param->eob, txfm_param->bd);
8409 0 : }
8410 :
8411 0 : static void highbd_inv_txfm_add_16x32(const TranLow *input,
8412 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8413 : const TxfmParam *txfm_param) {
8414 0 : const int32_t *src = cast_to_int32(input);
8415 0 : eb_av1_inv_txfm2d_add_16x32(src,
8416 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8417 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8418 0 : txfm_param->tx_type, txfm_param->tx_size, txfm_param->eob, txfm_param->bd);
8419 0 : }
8420 :
8421 0 : static void highbd_inv_txfm_add_32x16(const TranLow *input,
8422 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8423 : const TxfmParam *txfm_param) {
8424 0 : const int32_t *src = cast_to_int32(input);
8425 0 : eb_av1_inv_txfm2d_add_32x16(src,
8426 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8427 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8428 0 : txfm_param->tx_type, txfm_param->tx_size, txfm_param->eob, txfm_param->bd);
8429 0 : }
8430 :
8431 0 : static void highbd_inv_txfm_add_16x4(const TranLow *input,
8432 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8433 : const TxfmParam *txfm_param) {
8434 0 : const int32_t *src = cast_to_int32(input);
8435 0 : eb_av1_inv_txfm2d_add_16x4(src,
8436 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8437 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8438 0 : txfm_param->tx_type, txfm_param->tx_size, txfm_param->bd);
8439 0 : }
8440 :
8441 0 : static void highbd_inv_txfm_add_4x16(const TranLow *input,
8442 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8443 : const TxfmParam *txfm_param) {
8444 0 : const int32_t *src = cast_to_int32(input);
8445 0 : eb_av1_inv_txfm2d_add_4x16(src,
8446 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8447 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8448 0 : txfm_param->tx_type, txfm_param->tx_size, txfm_param->bd);
8449 0 : }
8450 :
8451 0 : static void highbd_inv_txfm_add_32x8(const TranLow *input,
8452 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8453 : const TxfmParam *txfm_param) {
8454 0 : const int32_t *src = cast_to_int32(input);
8455 0 : eb_av1_inv_txfm2d_add_32x8(src,
8456 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8457 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8458 0 : txfm_param->tx_type, txfm_param->tx_size, txfm_param->eob, txfm_param->bd);
8459 0 : }
8460 :
8461 0 : static void highbd_inv_txfm_add_8x32(const TranLow *input,
8462 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8463 : const TxfmParam *txfm_param) {
8464 0 : const int32_t *src = cast_to_int32(input);
8465 0 : eb_av1_inv_txfm2d_add_8x32(src,
8466 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8467 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8468 0 : txfm_param->tx_type, txfm_param->tx_size, txfm_param->eob, txfm_param->bd);
8469 0 : }
8470 :
8471 0 : static void highbd_inv_txfm_add_32x64(const TranLow *input,
8472 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8473 : const TxfmParam *txfm_param) {
8474 0 : const int32_t *src = cast_to_int32(input);
8475 0 : eb_av1_inv_txfm2d_add_32x64(src,
8476 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8477 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8478 0 : txfm_param->tx_type, txfm_param->tx_size, txfm_param->eob, txfm_param->bd);
8479 0 : }
8480 :
8481 0 : static void highbd_inv_txfm_add_64x32(const TranLow *input,
8482 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8483 : const TxfmParam *txfm_param) {
8484 0 : const int32_t *src = cast_to_int32(input);
8485 0 : eb_av1_inv_txfm2d_add_64x32(src,
8486 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8487 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8488 0 : txfm_param->tx_type, txfm_param->tx_size, txfm_param->eob, txfm_param->bd);
8489 0 : }
8490 :
8491 0 : static void highbd_inv_txfm_add_16x64(const TranLow *input,
8492 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8493 : const TxfmParam *txfm_param) {
8494 0 : const int32_t *src = cast_to_int32(input);
8495 0 : eb_av1_inv_txfm2d_add_16x64(src,
8496 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8497 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8498 0 : txfm_param->tx_type, txfm_param->tx_size, txfm_param->eob, txfm_param->bd);
8499 0 : }
8500 :
8501 0 : static void highbd_inv_txfm_add_64x16(const TranLow *input,
8502 : uint8_t *dest_r, int32_t stride_r, uint8_t *dest_w, int32_t stride_w,
8503 : const TxfmParam *txfm_param) {
8504 0 : const int32_t *src = cast_to_int32(input);
8505 0 : eb_av1_inv_txfm2d_add_64x16(src,
8506 0 : CONVERT_TO_SHORTPTR(dest_r), stride_r,
8507 0 : CONVERT_TO_SHORTPTR(dest_w), stride_w,
8508 0 : txfm_param->tx_type, txfm_param->tx_size, txfm_param->eob, txfm_param->bd);
8509 0 : }
8510 :
8511 0 : static void highbd_inv_txfm_add(const TranLow *input,
8512 : uint8_t *dest_r, int32_t stride_r,
8513 : uint8_t *dest_w, int32_t stride_w,
8514 : const TxfmParam *txfm_param) {
8515 : //assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
8516 0 : const TxSize tx_size = txfm_param->tx_size;
8517 0 : switch (tx_size) {
8518 0 : case TX_32X32:
8519 0 : highbd_inv_txfm_add_32x32(input, dest_r, stride_r, dest_w, stride_w,
8520 : txfm_param);
8521 0 : break;
8522 0 : case TX_16X16:
8523 0 : highbd_inv_txfm_add_16x16(input, dest_r, stride_r, dest_w, stride_w,
8524 : txfm_param);
8525 0 : break;
8526 0 : case TX_8X8:
8527 0 : highbd_inv_txfm_add_8x8(input, dest_r, stride_r, dest_w, stride_w,
8528 : txfm_param);
8529 0 : break;
8530 0 : case TX_4X8:
8531 0 : highbd_inv_txfm_add_4x8(input, dest_r, stride_r, dest_w, stride_w,
8532 : txfm_param);
8533 0 : break;
8534 0 : case TX_8X4:
8535 0 : highbd_inv_txfm_add_8x4(input, dest_r, stride_r, dest_w, stride_w,
8536 : txfm_param);
8537 0 : break;
8538 0 : case TX_8X16:
8539 0 : highbd_inv_txfm_add_8x16(input, dest_r, stride_r, dest_w, stride_w,
8540 : txfm_param);
8541 0 : break;
8542 0 : case TX_16X8:
8543 0 : highbd_inv_txfm_add_16x8(input, dest_r, stride_r, dest_w, stride_w,
8544 : txfm_param);
8545 0 : break;
8546 0 : case TX_16X32:
8547 0 : highbd_inv_txfm_add_16x32(input, dest_r, stride_r, dest_w, stride_w,
8548 : txfm_param);
8549 0 : break;
8550 0 : case TX_32X16:
8551 0 : highbd_inv_txfm_add_32x16(input, dest_r, stride_r, dest_w, stride_w,
8552 : txfm_param);
8553 0 : break;
8554 0 : case TX_64X64:
8555 0 : highbd_inv_txfm_add_64x64(input, dest_r, stride_r, dest_w, stride_w,
8556 : txfm_param);
8557 0 : break;
8558 0 : case TX_32X64:
8559 0 : highbd_inv_txfm_add_32x64(input, dest_r, stride_r, dest_w, stride_w,
8560 : txfm_param);
8561 0 : break;
8562 0 : case TX_64X32:
8563 0 : highbd_inv_txfm_add_64x32(input, dest_r, stride_r, dest_w, stride_w,
8564 : txfm_param);
8565 0 : break;
8566 0 : case TX_16X64:
8567 0 : highbd_inv_txfm_add_16x64(input, dest_r, stride_r, dest_w, stride_w,
8568 : txfm_param);
8569 0 : break;
8570 0 : case TX_64X16:
8571 0 : highbd_inv_txfm_add_64x16(input, dest_r, stride_r, dest_w, stride_w,
8572 : txfm_param);
8573 0 : break;
8574 0 : case TX_4X4:
8575 : // this is like av1_short_idct4x4 but has a special case around eob<=1
8576 : // which is significant (not just an optimization) for the lossless
8577 : // case.
8578 0 : eb_av1_highbd_inv_txfm_add_4x4(input,
8579 : dest_r, stride_r, dest_w, stride_w,
8580 : txfm_param);
8581 0 : break;
8582 0 : case TX_16X4:
8583 0 : highbd_inv_txfm_add_16x4(input, dest_r, stride_r, dest_w, stride_w,
8584 : txfm_param);
8585 0 : break;
8586 0 : case TX_4X16:
8587 0 : highbd_inv_txfm_add_4x16(input, dest_r, stride_r, dest_w, stride_w,
8588 : txfm_param);
8589 0 : break;
8590 0 : case TX_8X32:
8591 0 : highbd_inv_txfm_add_8x32(input, dest_r, stride_r, dest_w, stride_w,
8592 : txfm_param);
8593 0 : break;
8594 0 : case TX_32X8:
8595 0 : highbd_inv_txfm_add_32x8(input, dest_r, stride_r, dest_w, stride_w,
8596 : txfm_param);
8597 0 : break;
8598 0 : default: assert(0 && "Invalid transform size"); break;
8599 : }
8600 0 : }
8601 :
8602 0 : void eb_av1_inv_txfm_add_c(const TranLow *dqcoeff,
8603 : uint8_t *dst_r, int32_t stride_r,
8604 : uint8_t *dst_w, int32_t stride_w,
8605 : const TxfmParam *txfm_param) {
8606 0 : const TxSize tx_size = txfm_param->tx_size;
8607 : DECLARE_ALIGNED(32, uint16_t, tmp[MAX_TX_SQUARE]);
8608 0 : int32_t tmp_stride = MAX_TX_SIZE;
8609 0 : int32_t w = tx_size_wide[tx_size];
8610 0 : int32_t h = tx_size_high[tx_size];
8611 0 : for (int32_t r = 0; r < h; ++r) {
8612 0 : for (int32_t c = 0; c < w; ++c)
8613 0 : tmp[r * tmp_stride + c] = dst_r[r * stride_r + c];
8614 : }
8615 :
8616 0 : highbd_inv_txfm_add(dqcoeff,
8617 0 : CONVERT_TO_BYTEPTR(tmp), tmp_stride,
8618 0 : CONVERT_TO_BYTEPTR(tmp), tmp_stride,
8619 : txfm_param);
8620 :
8621 0 : for (int32_t r = 0; r < h; ++r) {
8622 0 : for (int32_t c = 0; c < w; ++c)
8623 0 : dst_w[r * stride_w + c] = (uint8_t)tmp[r * tmp_stride + c];
8624 : }
8625 0 : }
8626 :
8627 0 : EbErrorType av1_inv_transform_recon(
8628 : int32_t *coeff_buffer,//1D buffer
8629 : uint8_t *recon_buffer_r,
8630 : uint32_t recon_stride_r,
8631 : uint8_t *recon_buffer_w,
8632 : uint32_t recon_stride_w,
8633 : TxSize txsize,
8634 : uint32_t bit_increment,
8635 : TxType transform_type,
8636 : PlaneType component_type,
8637 : uint32_t eob,
8638 : uint8_t lossless)
8639 : {
8640 : UNUSED(component_type);
8641 0 : EbErrorType return_error = EB_ErrorNone;
8642 : TxfmParam txfm_param;
8643 0 : txfm_param.tx_type = transform_type;
8644 0 : txfm_param.tx_size = txsize;
8645 0 : txfm_param.eob = eob;
8646 0 : txfm_param.lossless = lossless;
8647 0 : txfm_param.bd = bit_increment + EB_8BIT;
8648 0 : txfm_param.is_hbd = 1;
8649 : //TxfmParam.tx_set_type = av1_get_ext_tx_set_type( txfm_param->tx_size, is_inter_block(xd->mi[0]), reduced_tx_set);
8650 :
8651 0 : if (recon_buffer_r != recon_buffer_w) {
8652 : /* When output pointers to read and write are differents,
8653 : * then kernel copy also all buffer from read to write,
8654 : * and cannot be limited by End Of Buffer calculations. */
8655 0 : txfm_param.eob = av1_get_max_eob(txsize);
8656 : }
8657 :
8658 0 : highbd_inv_txfm_add((const TranLow *)coeff_buffer,
8659 : recon_buffer_r, recon_stride_r,
8660 : recon_buffer_w, recon_stride_w,
8661 : &txfm_param);
8662 :
8663 0 : return return_error;
8664 : }
8665 :
8666 44090500 : EbErrorType av1_inv_transform_recon8bit(
8667 : int32_t *coeff_buffer,//1D buffer
8668 : uint8_t *recon_buffer_r,
8669 : uint32_t recon_stride_r,
8670 : uint8_t *recon_buffer_w,
8671 : uint32_t recon_stride_w,
8672 : TxSize txsize,
8673 : TxType transform_type,
8674 : PlaneType component_type,
8675 : uint32_t eob,
8676 : uint8_t lossless
8677 : )
8678 : {
8679 : UNUSED(component_type);
8680 44090500 : EbErrorType return_error = EB_ErrorNone;
8681 : TxfmParam txfm_param;
8682 44090500 : txfm_param.tx_type = transform_type;
8683 44090500 : txfm_param.tx_size = txsize;
8684 44090500 : txfm_param.eob = eob;
8685 44090500 : txfm_param.lossless = lossless;
8686 44090500 : txfm_param.bd = 8;
8687 44090500 : txfm_param.is_hbd = 1;
8688 : //TxfmParam.tx_set_type = av1_get_ext_tx_set_type( txfm_param->tx_size, is_inter_block(xd->mi[0]), reduced_tx_set);
8689 :
8690 44090500 : if (recon_buffer_r != recon_buffer_w) {
8691 : /* When output pointers to read and write are differents,
8692 : * then kernel copy also all buffer from read to write,
8693 : * and cannot be limited by End Of Buffer calculations. */
8694 44056800 : txfm_param.eob = av1_get_max_eob(txsize);
8695 : }
8696 :
8697 44099300 : eb_av1_inv_txfm_add((const TranLow *)coeff_buffer,
8698 : recon_buffer_r, recon_stride_r,
8699 : recon_buffer_w, recon_stride_w,
8700 : &txfm_param);
8701 :
8702 44078500 : return return_error;
8703 : }
8704 :
8705 : /*********************************************************************
8706 : * Map Chroma QP
8707 : *********************************************************************/
8708 0 : uint8_t map_chroma_qp(
8709 : uint8_t qp)
8710 : {
8711 0 : return qp;
8712 : }
8713 :
8714 0 : uint8_t ConstructPmTransCoeffShapingKnob(const uint16_t *masking_matrix, uint8_t txb_size) // M_Processing is an function of type uint16_t
8715 : {
8716 0 : uint8_t stride = txb_size;
8717 0 : uint8_t strideN2 = stride >> 1;
8718 0 : uint8_t strideN4 = stride >> 2;
8719 :
8720 : uint16_t index, row_index, columnIndex;
8721 0 : uint64_t h1 = 0, h2 = 0, h3 = 0, q1 = 0, q2 = 0, q3 = 0, dc = 0;
8722 :
8723 0 : for (index = 0; index < txb_size*txb_size; index++)
8724 : {
8725 0 : row_index = index / stride;
8726 0 : columnIndex = index % stride;
8727 0 : if ((columnIndex >= strideN2) && (row_index < strideN2))
8728 0 : h1 += masking_matrix[index];
8729 0 : else if ((row_index >= strideN2) && (columnIndex < strideN2))
8730 0 : h2 += masking_matrix[index];
8731 0 : else if ((row_index > strideN2) && (columnIndex > strideN2))
8732 0 : h3 += masking_matrix[index];
8733 0 : else if ((columnIndex >= strideN4) && (row_index < strideN4))
8734 0 : q1 += masking_matrix[index];
8735 0 : else if ((row_index >= strideN4) && (columnIndex < strideN4))
8736 0 : q2 += masking_matrix[index];
8737 0 : else if ((row_index > strideN4) && (columnIndex > strideN4))
8738 0 : q3 += masking_matrix[index];
8739 0 : else if ((row_index != 0) && (columnIndex != 0))
8740 0 : dc += masking_matrix[index];
8741 : }
8742 :
8743 0 : if ((h1 == 0) && (h2 == 0) && (h3 == 0)) {
8744 0 : if ((q1 == 0) && (q2 == 0) && (q3 == 0))
8745 : {
8746 : {
8747 : // SHAPE_N4 not supported for TU 4x4
8748 0 : if (txb_size == 4)
8749 0 : return(0);
8750 : // SHAPE_N4 not supported for TU 8x8
8751 0 : else if (txb_size == 8)
8752 0 : return(1);
8753 : else
8754 0 : return(2);
8755 : }
8756 : }
8757 : else {
8758 : // SHAPE_N2 not supported for TU 4x4
8759 0 : if (txb_size == 4)
8760 0 : return(0);
8761 : else
8762 0 : return(1);
8763 : }
8764 : }
8765 : else
8766 0 : return(0);
8767 : }
8768 0 : void construct_pm_trans_coeff_shaping(
8769 : SequenceControlSet *sequence_control_set_ptr)
8770 : {
8771 : uint8_t resolutionIndex; // 4K or 1080p Index
8772 : uint8_t levelIndex; // PM Level Index
8773 : uint8_t tuSizeIndex; // TU Size Index
8774 0 : uint8_t arrayLength[4] = { 4, 8, 16, 32 }; // TU Size LUT
8775 :
8776 0 : for (resolutionIndex = 0; resolutionIndex < 2; resolutionIndex++) {
8777 0 : for (levelIndex = 0; levelIndex < 8; levelIndex++) {
8778 0 : for (tuSizeIndex = 0; tuSizeIndex < 4; tuSizeIndex++)
8779 0 : sequence_control_set_ptr->trans_coeff_shape_array[resolutionIndex][levelIndex][tuSizeIndex] = ConstructPmTransCoeffShapingKnob(masking_matrix[resolutionIndex][levelIndex][tuSizeIndex], arrayLength[tuSizeIndex]);
8780 : }
8781 : }
8782 0 : }
|