Line data Source code
1 : /*
2 : * Copyright(c) 2019 Netflix, Inc.
3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 : */
5 :
6 : /*
7 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
8 : *
9 : * This source code is subject to the terms of the BSD 2 Clause License and
10 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
11 : * was not distributed with this source code in the LICENSE file, you can
12 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
13 : * Media Patent License 1.0 was not distributed with this source code in the
14 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
15 : */
16 :
17 : #include "EbEntropyCoding.h"
18 : #include "EbInterPrediction.h"
19 :
20 : #define FILTER_BITS 7
21 :
22 : #define UPSCALE_NORMATIVE_TAPS 8
23 :
24 : const int16_t av1_resize_filter_normative[(
25 : 1 << RS_SUBPEL_BITS)][UPSCALE_NORMATIVE_TAPS] = {
26 : #if UPSCALE_NORMATIVE_TAPS == 8
27 : { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -1, 128, 2, -1, 0, 0 },
28 : { 0, 1, -3, 127, 4, -2, 1, 0 }, { 0, 1, -4, 127, 6, -3, 1, 0 },
29 : { 0, 2, -6, 126, 8, -3, 1, 0 }, { 0, 2, -7, 125, 11, -4, 1, 0 },
30 : { -1, 2, -8, 125, 13, -5, 2, 0 }, { -1, 3, -9, 124, 15, -6, 2, 0 },
31 : { -1, 3, -10, 123, 18, -6, 2, -1 }, { -1, 3, -11, 122, 20, -7, 3, -1 },
32 : { -1, 4, -12, 121, 22, -8, 3, -1 }, { -1, 4, -13, 120, 25, -9, 3, -1 },
33 : { -1, 4, -14, 118, 28, -9, 3, -1 }, { -1, 4, -15, 117, 30, -10, 4, -1 },
34 : { -1, 5, -16, 116, 32, -11, 4, -1 }, { -1, 5, -16, 114, 35, -12, 4, -1 },
35 : { -1, 5, -17, 112, 38, -12, 4, -1 }, { -1, 5, -18, 111, 40, -13, 5, -1 },
36 : { -1, 5, -18, 109, 43, -14, 5, -1 }, { -1, 6, -19, 107, 45, -14, 5, -1 },
37 : { -1, 6, -19, 105, 48, -15, 5, -1 }, { -1, 6, -19, 103, 51, -16, 5, -1 },
38 : { -1, 6, -20, 101, 53, -16, 6, -1 }, { -1, 6, -20, 99, 56, -17, 6, -1 },
39 : { -1, 6, -20, 97, 58, -17, 6, -1 }, { -1, 6, -20, 95, 61, -18, 6, -1 },
40 : { -2, 7, -20, 93, 64, -18, 6, -2 }, { -2, 7, -20, 91, 66, -19, 6, -1 },
41 : { -2, 7, -20, 88, 69, -19, 6, -1 }, { -2, 7, -20, 86, 71, -19, 6, -1 },
42 : { -2, 7, -20, 84, 74, -20, 7, -2 }, { -2, 7, -20, 81, 76, -20, 7, -1 },
43 : { -2, 7, -20, 79, 79, -20, 7, -2 }, { -1, 7, -20, 76, 81, -20, 7, -2 },
44 : { -2, 7, -20, 74, 84, -20, 7, -2 }, { -1, 6, -19, 71, 86, -20, 7, -2 },
45 : { -1, 6, -19, 69, 88, -20, 7, -2 }, { -1, 6, -19, 66, 91, -20, 7, -2 },
46 : { -2, 6, -18, 64, 93, -20, 7, -2 }, { -1, 6, -18, 61, 95, -20, 6, -1 },
47 : { -1, 6, -17, 58, 97, -20, 6, -1 }, { -1, 6, -17, 56, 99, -20, 6, -1 },
48 : { -1, 6, -16, 53, 101, -20, 6, -1 }, { -1, 5, -16, 51, 103, -19, 6, -1 },
49 : { -1, 5, -15, 48, 105, -19, 6, -1 }, { -1, 5, -14, 45, 107, -19, 6, -1 },
50 : { -1, 5, -14, 43, 109, -18, 5, -1 }, { -1, 5, -13, 40, 111, -18, 5, -1 },
51 : { -1, 4, -12, 38, 112, -17, 5, -1 }, { -1, 4, -12, 35, 114, -16, 5, -1 },
52 : { -1, 4, -11, 32, 116, -16, 5, -1 }, { -1, 4, -10, 30, 117, -15, 4, -1 },
53 : { -1, 3, -9, 28, 118, -14, 4, -1 }, { -1, 3, -9, 25, 120, -13, 4, -1 },
54 : { -1, 3, -8, 22, 121, -12, 4, -1 }, { -1, 3, -7, 20, 122, -11, 3, -1 },
55 : { -1, 2, -6, 18, 123, -10, 3, -1 }, { 0, 2, -6, 15, 124, -9, 3, -1 },
56 : { 0, 2, -5, 13, 125, -8, 2, -1 }, { 0, 1, -4, 11, 125, -7, 2, 0 },
57 : { 0, 1, -3, 8, 126, -6, 2, 0 }, { 0, 1, -3, 6, 127, -4, 1, 0 },
58 : { 0, 1, -2, 4, 127, -3, 1, 0 }, { 0, 0, -1, 2, 128, -1, 0, 0 },
59 : #else
60 : #error "Invalid value of UPSCALE_NORMATIVE_TAPS"
61 : #endif // UPSCALE_NORMATIVE_TAPS == 8
62 : };
63 :
64 0 : int32_t av1_get_upscale_convolve_step(int in_length, int out_length) {
65 0 : return ((in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) / out_length;
66 : }
67 :
68 0 : int32_t get_upscale_convolve_x0(int in_length, int out_length,
69 : int32_t x_step_qn)
70 : {
71 0 : const int err = out_length * x_step_qn -
72 0 : (in_length << RS_SCALE_SUBPEL_BITS);
73 0 : const int32_t x0 =
74 0 : (-((out_length - in_length) << (RS_SCALE_SUBPEL_BITS - 1)) +
75 0 : out_length / 2) / out_length + RS_SCALE_EXTRA_OFF - err / 2;
76 0 : return (int32_t)((uint32_t)x0 & RS_SCALE_SUBPEL_MASK);
77 : }
78 :
79 0 : void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst,
80 : int dst_stride, int w, int h, const int16_t *x_filters,
81 : int x0_qn, int x_step_qn)
82 : {
83 0 : src -= UPSCALE_NORMATIVE_TAPS / 2 - 1;
84 0 : for (int y = 0; y < h; ++y) {
85 0 : int x_qn = x0_qn;
86 0 : for (int x = 0; x < w; ++x) {
87 0 : const uint8_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS];
88 0 : const int x_filter_idx =
89 0 : (x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
90 0 : assert(x_filter_idx <= RS_SUBPEL_MASK);
91 0 : const int16_t *const x_filter =
92 0 : &x_filters[x_filter_idx * UPSCALE_NORMATIVE_TAPS];
93 0 : int sum = 0;
94 0 : for (int k = 0; k < UPSCALE_NORMATIVE_TAPS; ++k)
95 0 : sum += src_x[k] * x_filter[k];
96 0 : dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
97 0 : x_qn += x_step_qn;
98 : }
99 0 : src += src_stride;
100 0 : dst += dst_stride;
101 : }
102 0 : }
103 :
104 0 : void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride,
105 : uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters,
106 : int x0_qn, int x_step_qn, int bd)
107 : {
108 0 : src -= UPSCALE_NORMATIVE_TAPS / 2 - 1;
109 0 : for (int y = 0; y < h; ++y) {
110 0 : int x_qn = x0_qn;
111 0 : for (int x = 0; x < w; ++x) {
112 0 : const uint16_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS];
113 0 : const int x_filter_idx =
114 0 : (x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
115 0 : assert(x_filter_idx <= RS_SUBPEL_MASK);
116 0 : const int16_t *const x_filter =
117 0 : &x_filters[x_filter_idx * UPSCALE_NORMATIVE_TAPS];
118 0 : int sum = 0;
119 0 : for (int k = 0; k < UPSCALE_NORMATIVE_TAPS; ++k)
120 0 : sum += src_x[k] * x_filter[k];
121 0 : dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
122 0 : x_qn += x_step_qn;
123 : }
124 0 : src += src_stride;
125 0 : dst += dst_stride;
126 : }
127 0 : }
128 :
129 0 : void upscale_normative_rect(const uint8_t *const input, int height,
130 : int width, int in_stride, uint8_t *output, int height2, int width2,
131 : int out_stride, int x_step_qn, int x0_qn, int pad_left, int pad_right)
132 : {
133 0 : assert(width > 0);
134 0 : assert(height > 0);
135 0 : assert(width2 > 0);
136 0 : assert(height2 > 0);
137 0 : assert(height2 == height);
138 :
139 : /* Extend the left/right pixels of the tile column if needed
140 : (either because we can't sample from other tiles, or because we're at
141 : a frame edge).
142 : Save the overwritten pixels into tmp_left and tmp_right.
143 : Note: Because we pass input-1 to av1_convolve_horiz_rs, we need one extra
144 : column of border pixels compared to what we'd naively think.*/
145 0 : const int border_cols = UPSCALE_NORMATIVE_TAPS / 2 + 1;
146 0 : uint8_t *tmp_left = NULL;
147 0 : uint8_t *tmp_right = NULL;
148 0 : uint8_t *const in_tl = (uint8_t *)(input - border_cols);
149 0 : uint8_t *const in_tr = (uint8_t *)(input + width);
150 :
151 0 : if (pad_left) {
152 0 : tmp_left = (uint8_t *)eb_aom_malloc(sizeof(*tmp_left) *
153 : border_cols * height);
154 0 : for (int i = 0; i < height; i++) {
155 0 : memcpy(tmp_left + i * border_cols,
156 0 : in_tl + i * in_stride, border_cols);
157 0 : memset(in_tl + i * in_stride,
158 0 : input[i * in_stride], border_cols);
159 : }
160 : }
161 0 : if (pad_right) {
162 0 : tmp_right = (uint8_t *)eb_aom_malloc(sizeof(*tmp_right) *
163 : border_cols * height);
164 0 : for (int i = 0; i < height; i++) {
165 0 : memcpy(tmp_right + i * border_cols,
166 0 : in_tr + i * in_stride, border_cols);
167 0 : memset(in_tr + i * in_stride,
168 0 : input[i * in_stride + width - 1], border_cols);
169 : }
170 : }
171 :
172 0 : av1_convolve_horiz_rs_c(input - 1, in_stride, output, out_stride,
173 : width2, height2, &av1_resize_filter_normative[0][0], x0_qn,
174 : x_step_qn);
175 :
176 : /* Restore the left/right border pixels */
177 0 : if (pad_left) {
178 0 : for (int i = 0; i < height; i++) {
179 0 : memcpy(in_tl + i * in_stride,
180 0 : tmp_left + i * border_cols, border_cols);
181 : }
182 0 : eb_aom_free(tmp_left);
183 : }
184 0 : if (pad_right) {
185 0 : for (int i = 0; i < height; i++) {
186 0 : memcpy(in_tr + i * in_stride,
187 0 : tmp_right + i * border_cols, border_cols);
188 : }
189 0 : eb_aom_free(tmp_right);
190 : }
191 0 : }
192 :
193 0 : void highbd_upscale_normative_rect(
194 : const uint8_t *const input, int height, int width, int in_stride,
195 : uint8_t *output, int height2, int width2, int out_stride,
196 : int x_step_qn, int x0_qn, int pad_left, int pad_right, int bd)
197 : {
198 0 : assert(width > 0);
199 0 : assert(height > 0);
200 0 : assert(width2 > 0);
201 0 : assert(height2 > 0);
202 0 : assert(height2 == height);
203 :
204 : /* Extend the left/right pixels of the tile column if needed
205 : (either because we can't sample from other tiles, or because we're at
206 : a frame edge).
207 : Save the overwritten pixels into tmp_left and tmp_right.
208 : Note: Because we pass input-1 to av1_convolve_horiz_rs, we need one extra
209 : column of border pixels compared to what we'd naively think.*/
210 0 : const int border_cols = UPSCALE_NORMATIVE_TAPS / 2 + 1;
211 0 : const int border_size = border_cols * sizeof(uint16_t);
212 0 : uint16_t *tmp_left = NULL;
213 0 : uint16_t *tmp_right = NULL;
214 0 : uint16_t *const input16 = (uint16_t*)input;//CONVERT_TO_SHORTPTR(input);
215 0 : uint16_t *const in_tl = input16 - border_cols;
216 0 : uint16_t *const in_tr = input16 + width;
217 0 : if (pad_left) {
218 0 : tmp_left = (uint16_t *)eb_aom_malloc(sizeof(*tmp_left)
219 0 : * border_cols * height);
220 0 : for (int i = 0; i < height; i++) {
221 0 : memcpy(tmp_left + i * border_cols,
222 0 : in_tl + i * in_stride, border_size);
223 0 : eb_aom_memset16(in_tl + i * in_stride,
224 0 : input16[i * in_stride], border_cols);
225 : }
226 : }
227 0 : if (pad_right) {
228 : tmp_right =
229 0 : (uint16_t *)eb_aom_malloc(sizeof(*tmp_right) *border_cols * height);
230 0 : for (int i = 0; i < height; i++) {
231 0 : memcpy(tmp_right + i * border_cols,
232 0 : in_tr + i * in_stride, border_size);
233 0 : eb_aom_memset16(in_tr + i * in_stride,
234 0 : input16[i * in_stride + width - 1], border_cols);
235 : }
236 : }
237 :
238 0 : av1_highbd_convolve_horiz_rs_c(((uint16_t*)(input)-1), in_stride,
239 : (uint16_t*)(output), out_stride, width2, height2,
240 : &av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd);
241 :
242 : /*Restore the left/right border pixels*/
243 0 : if (pad_left) {
244 0 : for (int i = 0; i < height; i++) {
245 0 : memcpy(in_tl + i * in_stride,
246 0 : tmp_left + i * border_cols, border_size);
247 : }
248 0 : eb_aom_free(tmp_left);
249 : }
250 0 : if (pad_right) {
251 0 : for (int i = 0; i < height; i++) {
252 0 : memcpy(in_tr + i * in_stride,
253 0 : tmp_right + i * border_cols, border_size);
254 : }
255 0 : eb_aom_free(tmp_right);
256 : }
257 0 : }
258 :
259 0 : void av1_upscale_normative_rows(const Av1Common *cm, const uint8_t *src,
260 : int src_stride, uint8_t *dst, int dst_stride, int rows, int sub_x, int bd)
261 : {
262 0 : int high_bd = bd > 8;
263 0 : const int downscaled_plane_width = ROUND_POWER_OF_TWO(
264 : cm->frm_size.frame_width, sub_x);
265 0 : const int upscaled_plane_width = ROUND_POWER_OF_TWO(
266 : cm->frm_size.superres_upscaled_width, sub_x);
267 0 : const int superres_denom = cm->frm_size.superres_denominator;
268 :
269 : TileInfo tile_col;
270 0 : const int32_t x_step_qn = av1_get_upscale_convolve_step(
271 : downscaled_plane_width, upscaled_plane_width);
272 0 : int32_t x0_qn = get_upscale_convolve_x0(downscaled_plane_width,
273 : upscaled_plane_width, x_step_qn);
274 0 : for (int j = 0; j < cm->tiles_info.tile_cols; j++) {
275 0 : eb_av1_tile_set_col(&tile_col, &cm->tiles_info, cm->mi_cols, j);
276 :
277 : /*Determine the limits of this tile column in both the source
278 : and destination images.
279 : Note: The actual location which we start sampling from is
280 : (downscaled_x0 - 1 + (x0_qn/2^14)), and this quantity increases
281 : by exactly dst_width * (x_step_qn/2^14) pixels each iteration.*/
282 0 : const int downscaled_x0 = tile_col. mi_col_start << (MI_SIZE_LOG2 - sub_x);
283 0 : const int downscaled_x1 = tile_col.mi_col_end << (MI_SIZE_LOG2 - sub_x);
284 0 : const int src_width = downscaled_x1 - downscaled_x0;
285 :
286 0 : const int upscaled_x0 = (downscaled_x0*superres_denom) / SCALE_NUMERATOR;
287 : int upscaled_x1;
288 0 : if (j == cm->tiles_info.tile_cols - 1) {
289 : /*Note that we can't just use AOMMIN here - due to rounding,
290 : (downscaled_x1 * superres_denom) / SCALE_NUMERATOR may be less than
291 : upscaled_plane_width.*/
292 0 : upscaled_x1 = upscaled_plane_width;
293 : }
294 : else
295 0 : upscaled_x1 = (downscaled_x1 * superres_denom) / SCALE_NUMERATOR;
296 :
297 0 : const uint8_t *const src_ptr = src + (downscaled_x0 << high_bd);
298 0 : uint8_t *const dst_ptr = dst + (upscaled_x0 << high_bd);
299 0 : const int dst_width = upscaled_x1 - upscaled_x0;
300 :
301 0 : const int pad_left = (j == 0);
302 0 : const int pad_right = (j == cm->tiles_info.tile_cols - 1);
303 :
304 :
305 0 : if (high_bd)
306 0 : highbd_upscale_normative_rect(src_ptr, rows, src_width, src_stride,
307 : dst_ptr, rows, dst_width, dst_stride,
308 : x_step_qn, x0_qn, pad_left, pad_right, bd);
309 : else
310 0 : upscale_normative_rect(src_ptr, rows, src_width, src_stride,
311 : dst_ptr, rows, dst_width, dst_stride, x_step_qn, x0_qn,
312 : pad_left, pad_right);
313 :
314 : /*Update the fractional pixel offset to prepare for the next tile col*/
315 0 : x0_qn += (dst_width * x_step_qn) - (src_width << RS_SCALE_SUBPEL_BITS);
316 : }
317 0 : }
|