Line data Source code
1 : /*
2 : * Copyright(c) 2019 Intel Corporation
3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 : */
5 :
6 : /*
7 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
8 : *
9 : * This source code is subject to the terms of the BSD 2 Clause License and
10 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
11 : * was not distributed with this source code in the LICENSE file, you can
12 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
13 : * Media Patent License 1.0 was not distributed with this source code in the
14 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
15 : */
16 :
17 : #include "EbDeblockingFilter_SSE2.h"
18 :
19 : #include "EbPredictionUnit.h"
20 : #include "EbNeighborArrays.h"
21 : #include "EbEncDecProcess.h"
22 : #include "EbDlfProcess.h"
23 :
24 : #ifndef EbDeblockingFilter_h
25 : #define EbDeblockingFilter_h
26 : #ifdef __cplusplus
27 : extern "C" {
28 : #endif
29 : #define BLK4X4_ADDR_TO_VERTICAL_EDGE_BS_ARRAY_IDX(blk_4x4_addr) (((blk_4x4_addr) & (MAX_LCU_SIZE_IN_4X4BLK - 1)) + (((blk_4x4_addr) / MAX_LCU_SIZE_IN_4X4BLK) * MAX_LCU_SIZE_IN_4X4BLK))
30 : #define BLK4X4_ADDR_TO_HORIZONTAL_EDGE_BS_ARRAY_IDX(blk_4x4_addr) (((blk_4x4_addr) & (MAX_LCU_SIZE_IN_4X4BLK - 1)) + (((blk_4x4_addr) / MAX_LCU_SIZE_IN_4X4BLK) * MAX_LCU_SIZE_IN_4X4BLK))
31 : #define GET_LUMA_4X4BLK_ADDR(luma_lcu_wise4x4_blk_pos_x, luma_lcu_wise4x4_blk_pos_y, log_max_lcu_size_in4x4blk) (((luma_lcu_wise4x4_blk_pos_x)>>2) + (((luma_lcu_wise4x4_blk_pos_y)>>2) << (log_max_lcu_size_in4x4blk)))
32 : #define GET_CHROMA_4X4BLK_ADDR(chroma_lcu_wise2x2_blk_pos_x, chroma_lcu_wise2x2_blk_pos_y, log_max_lcu_size_in4x4blk) (((chroma_lcu_wise2x2_blk_pos_x)>>1) + (((chroma_lcu_wise2x2_blk_pos_y)>>1) << (log_max_lcu_size_in4x4blk)))
33 : #define LUMA_SAMPLE_PIC_WISE_LOCATION_TO_QP_ARRAY_IDX(pos_x, pos_y, qp_array_stride) (((pos_x) >> LOG_MIN_BLOCK_SIZE) + ((pos_y) >> LOG_MIN_BLOCK_SIZE) * (qp_array_stride))
34 : #define CHROMA_SAMPLE_PIC_WISE_LOCATION_TO_QP_ARRAY_IDX(pos_x, pos_y, qp_array_stride) ((2*(pos_x) >> LOG_MIN_BLOCK_SIZE) + (2*(pos_y) >> LOG_MIN_BLOCK_SIZE) * (qp_array_stride))
35 : #define CHECK_MV_COMPONENT_EQUAL_OR_GREATER_THAN_4(pu1Ptr, pu2Ptr, pu1RefList, pu2RefList) ( \
36 : EB_ABS_DIFF((pu1Ptr)->mv[(pu1RefList)].x, (pu2Ptr)->mv[(pu2RefList)].x) >= 4 || \
37 : EB_ABS_DIFF((pu1Ptr)->mv[(pu1RefList)].y, (pu2Ptr)->mv[(pu2RefList)].y) >= 4 \
38 : )
39 :
40 : // Precision macros used in the mode decision
41 : #define BIT_ESTIMATE_PRECISION 15
42 : #define LAMBDA_PRECISION 16
43 : #define COST_PRECISION 8
44 : #define MD_SHIFT (BIT_ESTIMATE_PRECISION + LAMBDA_PRECISION - COST_PRECISION)
45 : #define MD_OFFSET (1 << (MD_SHIFT-1))
46 : #define VAR_QP 1
47 : #define MAX_QP_VALUE_PLUS_INTRA_TC_OFFSET 53
48 : #define BETA_OFFSET_VALUE 12 // range -12 to 12
49 : #define TC_OFFSET_VALUE 12//12 // range -12 to 12
50 :
51 : #if AV1_LF
52 : typedef enum LpfPickMethod
53 : {
54 : // Try the full image with different values.
55 : LPF_PICK_FROM_FULL_IMAGE,
56 : // Try a small portion of the image with different values.
57 : LPF_PICK_FROM_SUBIMAGE,
58 : // Estimate the level based on quantizer and frame type
59 : LPF_PICK_FROM_Q,
60 : // Pick 0 to disable LPF if LPF was enabled last frame
61 : LPF_PICK_MINIMAL_LPF
62 : } LpfPickMethod;
63 : #endif
64 :
65 : typedef enum EDGE_DIR { VERT_EDGE = 0, HORZ_EDGE = 1, NUM_EDGE_DIRS } EDGE_DIR;
66 :
67 : typedef struct AV1_DEBLOCKING_PARAMETERS {
68 : // length of the filter applied to the outer edge
69 : uint32_t filter_length;
70 : // deblocking limits
71 : const uint8_t *lim;
72 : const uint8_t *mblim;
73 : const uint8_t *hev_thr;
74 : } AV1_DEBLOCKING_PARAMETERS;
75 :
76 : void set_qp_array_based_on_cu(
77 : PictureControlSet *picture_control_set_ptr, //input parameter
78 : uint32_t cuPos_x, //input parameter, sample-based horizontal picture-wise locatin of the CU
79 : uint32_t cuPos_y, //input parameter, sample-based vertical picture-wise locatin of the CU
80 : uint32_t cu_size_in_min_cu_size, //input parameter
81 : uint32_t cu_qp); //input parameter, Qp of the CU
82 :
83 : /* assorted LoopFilter functions which get used elsewhere */
84 : struct AV1Common;
85 : struct macroblockd;
86 : struct AV1LfSyncData;
87 :
88 : void eb_av1_loop_filter_init(PictureControlSet *pcs_ptr);
89 : void eb_av1_loop_filter_frame_init(FrameHeader *frm_hdr,
90 : LoopFilterInfoN *lf_info, int32_t plane_start, int32_t plane_end);
91 :
92 : void loop_filter_sb(
93 : EbPictureBufferDesc *frame_buffer,//reconpicture,
94 : //Yv12BufferConfig *frame_buffer,
95 : PictureControlSet *pcs_ptr,
96 : MacroBlockD *xd, int32_t mi_row, int32_t mi_col,
97 : int32_t plane_start, int32_t plane_end,
98 : uint8_t LastCol);
99 :
100 : void eb_av1_loop_filter_frame(
101 : EbPictureBufferDesc *frame_buffer,//reconpicture,
102 : //Yv12BufferConfig *frame_buffer,
103 : PictureControlSet *pcs_ptr,
104 : /*MacroBlockD *xd,*/ int32_t plane_start, int32_t plane_end/*,
105 : int32_t partial_frame*/);
106 :
107 : void eb_av1_pick_filter_level(
108 : DlfContext *context_ptr,
109 : EbPictureBufferDesc *srcBuffer, // source input
110 : PictureControlSet *pcs_ptr,
111 : LpfPickMethod method);
112 :
113 : void eb_av1_filter_block_plane_vert(
114 : const PictureControlSet *const pcs_ptr,
115 : const MacroBlockD *const xd,
116 : const int32_t plane,
117 : const MacroblockdPlane *const plane_ptr,
118 : const uint32_t mi_row, const uint32_t mi_col);
119 :
120 : void eb_av1_filter_block_plane_horz(
121 : const PictureControlSet *const pcs_ptr,
122 : const MacroBlockD *const xd, const int32_t plane,
123 : const MacroblockdPlane *const plane_ptr,
124 : const uint32_t mi_row, const uint32_t mi_col);
125 :
126 : typedef struct LoopFilterWorkerData
127 : {
128 : EbPictureBufferDesc *frame_buffer;//reconpicture,
129 : PictureControlSet *pcs_ptr;
130 : struct MacroblockdPlane planes[MAX_MB_PLANE];
131 : // TODO(Ranjit): When the filter functions are modified to use xd->lossless
132 : // add lossless as a member here.
133 : MacroBlockD *xd;
134 : } LFWorkerData;
135 :
136 9451830 : static INLINE int32_t is_inter_block_no_intrabc(MvReferenceFrame ref_frame_0) {
137 9451830 : return /*is_intrabc_block(mbmi) ||*/ ref_frame_0 > INTRA_FRAME;
138 : }
139 :
140 : void update_sharpness(LoopFilterInfoN *lfi, int32_t sharpness_lvl);
141 :
142 : uint8_t get_filter_level(FrameHeader* frm_hdr, const LoopFilterInfoN *lfi_n,
143 : const int32_t dir_idx, int32_t plane, int32_t *sb_delta_lf, uint8_t seg_id,
144 : PredictionMode pred_mode, MvReferenceFrame ref_frame_0);
145 :
146 : void aom_highbd_lpf_horizontal_14_sse2(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
147 : #define aom_highbd_lpf_horizontal_14 aom_highbd_lpf_horizontal_14_sse2
148 :
149 : void aom_highbd_lpf_horizontal_14_dual_sse2(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
150 : void aom_highbd_lpf_horizontal_14_dual_avx2(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
151 : //RTCD_EXTERN void(*aom_highbd_lpf_horizontal_14_dual)(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
152 :
153 : void aom_highbd_lpf_horizontal_4_c(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
154 : void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
155 : #define aom_highbd_lpf_horizontal_4 aom_highbd_lpf_horizontal_4_sse2
156 :
157 : void aom_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int32_t pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int32_t bd);
158 : void aom_highbd_lpf_horizontal_4_dual_avx2(uint16_t *s, int32_t pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int32_t bd);
159 : //RTCD_EXTERN void(*aom_highbd_lpf_horizontal_4_dual)(uint16_t *s, int32_t pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int32_t bd);
160 :
161 : void aom_highbd_lpf_horizontal_6_sse2(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
162 : #define aom_highbd_lpf_horizontal_6 aom_highbd_lpf_horizontal_6_sse2
163 :
164 : void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
165 : void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
166 : #define aom_highbd_lpf_horizontal_8 aom_highbd_lpf_horizontal_8_sse2
167 :
168 : void aom_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int32_t pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int32_t bd);
169 : void aom_highbd_lpf_horizontal_8_dual_avx2(uint16_t *s, int32_t pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int32_t bd);
170 : //RTCD_EXTERN void(*aom_highbd_lpf_horizontal_8_dual)(uint16_t *s, int32_t pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int32_t bd);
171 :
172 : void aom_highbd_lpf_vertical_14_sse2(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
173 : #define aom_highbd_lpf_vertical_14 aom_highbd_lpf_vertical_14_sse2
174 :
175 : void aom_highbd_lpf_vertical_14_dual_sse2(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
176 : void aom_highbd_lpf_vertical_14_dual_avx2(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
177 : //RTCD_EXTERN void(*aom_highbd_lpf_vertical_14_dual)(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
178 :
179 : void aom_highbd_lpf_vertical_4_c(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
180 : void aom_highbd_lpf_vertical_4_sse2(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
181 : #define aom_highbd_lpf_vertical_4 aom_highbd_lpf_vertical_4_sse2
182 :
183 : void aom_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int32_t pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int32_t bd);
184 : void aom_highbd_lpf_vertical_4_dual_avx2(uint16_t *s, int32_t pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int32_t bd);
185 : //RTCD_EXTERN void(*aom_highbd_lpf_vertical_4_dual)(uint16_t *s, int32_t pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int32_t bd);
186 :
187 : void aom_highbd_lpf_vertical_6_sse2(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
188 : #define aom_highbd_lpf_vertical_6 aom_highbd_lpf_vertical_6_sse2
189 :
190 : void aom_highbd_lpf_vertical_8_c(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
191 : void aom_highbd_lpf_vertical_8_sse2(uint16_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int32_t bd);
192 : #define aom_highbd_lpf_vertical_8 aom_highbd_lpf_vertical_8_sse2
193 :
194 : void aom_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int32_t pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int32_t bd);
195 : void aom_highbd_lpf_vertical_8_dual_avx2(uint16_t *s, int32_t pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int32_t bd);
196 : //RTCD_EXTERN void(*aom_highbd_lpf_vertical_8_dual)(uint16_t *s, int32_t pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int32_t bd);
197 :
198 : void aom_lpf_horizontal_14_sse2(uint8_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
199 : #define aom_lpf_horizontal_14 aom_lpf_horizontal_14_sse2
200 :
201 : void aom_lpf_horizontal_14_dual_sse2(uint8_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
202 : #define aom_lpf_horizontal_14_dual aom_lpf_horizontal_14_dual_sse2
203 :
204 : void aom_lpf_horizontal_4_c(uint8_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
205 : void aom_lpf_horizontal_4_sse2(uint8_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
206 : #define aom_lpf_horizontal_4 aom_lpf_horizontal_4_sse2
207 :
208 : #define aom_lpf_horizontal_4_dual aom_lpf_horizontal_4_dual_sse2
209 :
210 : void aom_lpf_horizontal_6_c(uint8_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
211 : void aom_lpf_horizontal_6_sse2(uint8_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
212 : #define aom_lpf_horizontal_6 aom_lpf_horizontal_6_sse2
213 :
214 : void aom_lpf_horizontal_8_c(uint8_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
215 : void aom_lpf_horizontal_8_sse2(uint8_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
216 : #define aom_lpf_horizontal_8 aom_lpf_horizontal_8_sse2
217 :
218 : void aom_lpf_horizontal_8_dual_c(uint8_t *s, int32_t pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
219 : #define aom_lpf_horizontal_8_dual aom_lpf_horizontal_8_dual_c
220 :
221 : void aom_lpf_vertical_14_sse2(uint8_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
222 : #define aom_lpf_vertical_14 aom_lpf_vertical_14_sse2
223 :
224 : void aom_lpf_vertical_14_dual_c(uint8_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
225 : void aom_lpf_vertical_14_dual_sse2(uint8_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
226 : #define aom_lpf_vertical_14_dual aom_lpf_vertical_14_dual_c
227 :
228 : void aom_lpf_vertical_4_c(uint8_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
229 : void aom_lpf_vertical_4_sse2(uint8_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
230 : #define aom_lpf_vertical_4 aom_lpf_vertical_4_sse2
231 :
232 : void aom_lpf_vertical_4_dual_c(uint8_t *s, int32_t pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
233 : #define aom_lpf_vertical_4_dual aom_lpf_vertical_4_dual_c
234 :
235 : void aom_lpf_vertical_6_c(uint8_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
236 : void aom_lpf_vertical_6_sse2(uint8_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
237 : #define aom_lpf_vertical_6 aom_lpf_vertical_6_sse2
238 :
239 : void aom_lpf_vertical_8_c(uint8_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
240 : void aom_lpf_vertical_8_sse2(uint8_t *s, int32_t pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
241 : #define aom_lpf_vertical_8 aom_lpf_vertical_8_sse2
242 :
243 : void aom_lpf_vertical_8_dual_c(uint8_t *s, int32_t pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
244 : #define aom_lpf_vertical_8_dual aom_lpf_vertical_8_dual_c
245 :
246 : #ifdef __cplusplus
247 : }
248 : #endif
249 : #endif
|