Line data Source code
1 : /*
2 : * Copyright(c) 2019 Intel Corporation
3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 : */
5 :
6 : #include <stdlib.h>
7 :
8 : #include "EbUtility.h"
9 : #include "EbPictureControlSet.h"
10 : #include "EbSequenceControlSet.h"
11 : #include "EbPictureDecisionResults.h"
12 : #include "EbMotionEstimationProcess.h"
13 : #include "EbMotionEstimationResults.h"
14 : #include "EbReferenceObject.h"
15 : #include "EbMotionEstimation.h"
16 : #include "EbIntraPrediction.h"
17 : #include "EbLambdaRateTables.h"
18 : #include "EbComputeSAD.h"
19 :
20 : #include "emmintrin.h"
21 :
22 : #include "EbTemporalFiltering.h"
23 : #include "EbGlobalMotionEstimation.h"
24 :
25 : /* --32x32-
26 : |00||01|
27 : |02||03|
28 : --------*/
29 : /* ------16x16-----
30 : |00||01||04||05|
31 : |02||03||06||07|
32 : |08||09||12||13|
33 : |10||11||14||15|
34 : ----------------*/
35 : /* ------8x8----------------------------
36 : |00||01||04||05| |16||17||20||21|
37 : |02||03||06||07| |18||19||22||23|
38 : |08||09||12||13| |24||25||28||29|
39 : |10||11||14||15| |26||27||30||31|
40 :
41 : |32||33||36||37| |48||49||52||53|
42 : |34||35||38||39| |50||51||54||55|
43 : |40||41||44||45| |56||57||60||61|
44 : |42||43||46||47| |58||59||62||63|
45 : -------------------------------------*/
46 : EbErrorType CheckZeroZeroCenter(
47 : PictureParentControlSet *picture_control_set_ptr,
48 : EbPictureBufferDesc *refPicPtr,
49 : MeContext *context_ptr,
50 : uint32_t sb_origin_x,
51 : uint32_t sb_origin_y,
52 : uint32_t sb_width,
53 : uint32_t sb_height,
54 : int16_t *x_search_center,
55 : int16_t *y_search_center);
56 :
57 : /************************************************
58 : * Set ME/HME Params from Config
59 : ************************************************/
60 0 : void* set_me_hme_params_from_config(
61 : SequenceControlSet *sequence_control_set_ptr,
62 : MeContext *me_context_ptr)
63 : {
64 0 : uint16_t hmeRegionIndex = 0;
65 :
66 0 : me_context_ptr->search_area_width = (uint8_t)sequence_control_set_ptr->static_config.search_area_width;
67 0 : me_context_ptr->search_area_height = (uint8_t)sequence_control_set_ptr->static_config.search_area_height;
68 :
69 0 : me_context_ptr->number_hme_search_region_in_width = (uint16_t)sequence_control_set_ptr->static_config.number_hme_search_region_in_width;
70 0 : me_context_ptr->number_hme_search_region_in_height = (uint16_t)sequence_control_set_ptr->static_config.number_hme_search_region_in_height;
71 :
72 0 : me_context_ptr->hme_level0_total_search_area_width = (uint16_t)sequence_control_set_ptr->static_config.hme_level0_total_search_area_width;
73 0 : me_context_ptr->hme_level0_total_search_area_height = (uint16_t)sequence_control_set_ptr->static_config.hme_level0_total_search_area_height;
74 :
75 0 : for (hmeRegionIndex = 0; hmeRegionIndex < me_context_ptr->number_hme_search_region_in_width; ++hmeRegionIndex) {
76 0 : me_context_ptr->hme_level0_search_area_in_width_array[hmeRegionIndex] = (uint16_t)sequence_control_set_ptr->static_config.hme_level0_search_area_in_width_array[hmeRegionIndex];
77 0 : me_context_ptr->hme_level1_search_area_in_width_array[hmeRegionIndex] = (uint16_t)sequence_control_set_ptr->static_config.hme_level1_search_area_in_width_array[hmeRegionIndex];
78 0 : me_context_ptr->hme_level2_search_area_in_width_array[hmeRegionIndex] = (uint16_t)sequence_control_set_ptr->static_config.hme_level2_search_area_in_width_array[hmeRegionIndex];
79 : }
80 :
81 0 : for (hmeRegionIndex = 0; hmeRegionIndex < me_context_ptr->number_hme_search_region_in_height; ++hmeRegionIndex) {
82 0 : me_context_ptr->hme_level0_search_area_in_height_array[hmeRegionIndex] = (uint16_t)sequence_control_set_ptr->static_config.hme_level0_search_area_in_height_array[hmeRegionIndex];
83 0 : me_context_ptr->hme_level1_search_area_in_height_array[hmeRegionIndex] = (uint16_t)sequence_control_set_ptr->static_config.hme_level1_search_area_in_height_array[hmeRegionIndex];
84 0 : me_context_ptr->hme_level2_search_area_in_height_array[hmeRegionIndex] = (uint16_t)sequence_control_set_ptr->static_config.hme_level2_search_area_in_height_array[hmeRegionIndex];
85 : }
86 :
87 0 : return EB_NULL;
88 : }
89 :
90 : /************************************************
91 : * Set ME/HME Params
92 : ************************************************/
93 7199 : void* set_me_hme_params_oq(
94 : MeContext *me_context_ptr,
95 : PictureParentControlSet *picture_control_set_ptr,
96 : SequenceControlSet *sequence_control_set_ptr,
97 : EbInputResolution input_resolution)
98 : {
99 : UNUSED(sequence_control_set_ptr);
100 : #if TWO_PASS_USE_2NDP_ME_IN_1STP
101 7199 : uint8_t hmeMeLevel = sequence_control_set_ptr->use_output_stat_file ? picture_control_set_ptr->snd_pass_enc_mode : picture_control_set_ptr->enc_mode;
102 : #else
103 : uint8_t hmeMeLevel = picture_control_set_ptr->enc_mode; // OMK to be revised after new presets
104 : #endif
105 : // HME/ME default settings
106 7199 : me_context_ptr->number_hme_search_region_in_width = 2;
107 7199 : me_context_ptr->number_hme_search_region_in_height = 2;
108 :
109 7199 : uint8_t sc_content_detected = picture_control_set_ptr->sc_content_detected;
110 :
111 : // HME Level0
112 7199 : me_context_ptr->hme_level0_total_search_area_width = hme_level0_total_search_area_width[sc_content_detected][input_resolution][hmeMeLevel];
113 7199 : me_context_ptr->hme_level0_total_search_area_height = hme_level0_total_search_area_height[sc_content_detected][input_resolution][hmeMeLevel];
114 7199 : me_context_ptr->hme_level0_search_area_in_width_array[0] = hme_level0_search_area_in_width_array_right[sc_content_detected][input_resolution][hmeMeLevel];
115 7199 : me_context_ptr->hme_level0_search_area_in_width_array[1] = hme_level0_search_area_in_width_array_left[sc_content_detected][input_resolution][hmeMeLevel];
116 7199 : me_context_ptr->hme_level0_search_area_in_height_array[0] = hme_level0_search_area_in_height_array_top[sc_content_detected][input_resolution][hmeMeLevel];
117 7199 : me_context_ptr->hme_level0_search_area_in_height_array[1] = hme_level0_search_area_in_height_array_bottom[sc_content_detected][input_resolution][hmeMeLevel];
118 : // HME Level1
119 7199 : me_context_ptr->hme_level1_search_area_in_width_array[0] = hme_level1_search_area_in_width_array_right[sc_content_detected][input_resolution][hmeMeLevel];
120 7199 : me_context_ptr->hme_level1_search_area_in_width_array[1] = hme_level1_search_area_in_width_array_left[sc_content_detected][input_resolution][hmeMeLevel];
121 7199 : me_context_ptr->hme_level1_search_area_in_height_array[0] = hme_level1_search_area_in_height_array_top[sc_content_detected][input_resolution][hmeMeLevel];
122 7199 : me_context_ptr->hme_level1_search_area_in_height_array[1] = hme_level1_search_area_in_height_array_bottom[sc_content_detected][input_resolution][hmeMeLevel];
123 : // HME Level2
124 7199 : me_context_ptr->hme_level2_search_area_in_width_array[0] = hme_level2_search_area_in_width_array_right[sc_content_detected][input_resolution][hmeMeLevel];
125 7199 : me_context_ptr->hme_level2_search_area_in_width_array[1] = hme_level2_search_area_in_width_array_left[sc_content_detected][input_resolution][hmeMeLevel];
126 7199 : me_context_ptr->hme_level2_search_area_in_height_array[0] = hme_level2_search_area_in_height_array_top[sc_content_detected][input_resolution][hmeMeLevel];
127 7199 : me_context_ptr->hme_level2_search_area_in_height_array[1] = hme_level2_search_area_in_height_array_bottom[sc_content_detected][input_resolution][hmeMeLevel];
128 :
129 : // ME
130 7199 : me_context_ptr->search_area_width = search_area_width[sc_content_detected][input_resolution][hmeMeLevel];
131 7199 : me_context_ptr->search_area_height = search_area_height[sc_content_detected][input_resolution][hmeMeLevel];
132 :
133 7199 : assert(me_context_ptr->search_area_width <= MAX_SEARCH_AREA_WIDTH && "increase MAX_SEARCH_AREA_WIDTH" );
134 7199 : assert(me_context_ptr->search_area_height <= MAX_SEARCH_AREA_HEIGHT && "increase MAX_SEARCH_AREA_HEIGHT");
135 :
136 7199 : me_context_ptr->update_hme_search_center_flag = 1;
137 :
138 7199 : if (input_resolution <= INPUT_SIZE_576p_RANGE_OR_LOWER)
139 7198 : me_context_ptr->update_hme_search_center_flag = 0;
140 :
141 7199 : return EB_NULL;
142 : };
143 :
144 : /******************************************************
145 : * Derive ME Settings for OQ
146 : Input : encoder mode and tune
147 : Output : ME Kernel signal(s)
148 : ******************************************************/
149 : #if TWO_PASS_USE_2NDP_ME_IN_1STP
150 7198 : EbErrorType signal_derivation_me_kernel_oq(
151 : SequenceControlSet *sequence_control_set_ptr,
152 : PictureParentControlSet *picture_control_set_ptr,
153 : MotionEstimationContext_t *context_ptr) {
154 7198 : EbErrorType return_error = EB_ErrorNone;
155 :
156 7198 : uint8_t enc_mode = sequence_control_set_ptr->use_output_stat_file ? picture_control_set_ptr->snd_pass_enc_mode : picture_control_set_ptr->enc_mode;
157 : // Set ME/HME search regions
158 7198 : if (sequence_control_set_ptr->static_config.use_default_me_hme)
159 7198 : set_me_hme_params_oq(
160 : context_ptr->me_context_ptr,
161 : picture_control_set_ptr,
162 : sequence_control_set_ptr,
163 7198 : sequence_control_set_ptr->input_resolution);
164 :
165 : else
166 0 : set_me_hme_params_from_config(
167 : sequence_control_set_ptr,
168 : context_ptr->me_context_ptr);
169 7197 : if (picture_control_set_ptr->sc_content_detected)
170 0 : context_ptr->me_context_ptr->fractional_search_method = SUB_SAD_SEARCH;
171 : else
172 7197 : if (enc_mode <= ENC_M6)
173 3598 : context_ptr->me_context_ptr->fractional_search_method = SSD_SEARCH;
174 : else
175 3599 : context_ptr->me_context_ptr->fractional_search_method = FULL_SAD_SEARCH;
176 7197 : if (picture_control_set_ptr->sc_content_detected)
177 0 : context_ptr->me_context_ptr->fractional_search64x64 = EB_FALSE;
178 : else
179 7197 : context_ptr->me_context_ptr->fractional_search64x64 = EB_TRUE;
180 :
181 : // Set HME flags
182 7197 : context_ptr->me_context_ptr->enable_hme_flag = picture_control_set_ptr->enable_hme_flag;
183 7197 : context_ptr->me_context_ptr->enable_hme_level0_flag = picture_control_set_ptr->enable_hme_level0_flag;
184 7197 : context_ptr->me_context_ptr->enable_hme_level1_flag = picture_control_set_ptr->enable_hme_level1_flag;
185 7197 : context_ptr->me_context_ptr->enable_hme_level2_flag = picture_control_set_ptr->enable_hme_level2_flag;
186 :
187 : // Set the default settings of subpel
188 7197 : if (picture_control_set_ptr->sc_content_detected)
189 0 : if (enc_mode <= ENC_M1)
190 0 : context_ptr->me_context_ptr->use_subpel_flag = 1;
191 : else
192 0 : context_ptr->me_context_ptr->use_subpel_flag = 0;
193 : else
194 7197 : context_ptr->me_context_ptr->use_subpel_flag = 1;
195 : if (MR_MODE) {
196 : context_ptr->me_context_ptr->half_pel_mode =
197 : EX_HP_MODE;
198 : context_ptr->me_context_ptr->quarter_pel_mode =
199 : EX_QP_MODE;
200 : }
201 7197 : else if (enc_mode == ENC_M0) {
202 3600 : context_ptr->me_context_ptr->half_pel_mode =
203 : EX_HP_MODE;
204 3600 : context_ptr->me_context_ptr->quarter_pel_mode =
205 : REFINMENT_QP_MODE;
206 : }
207 : else {
208 3597 : context_ptr->me_context_ptr->half_pel_mode =
209 : REFINMENT_HP_MODE;
210 3597 : context_ptr->me_context_ptr->quarter_pel_mode =
211 : REFINMENT_QP_MODE;
212 : }
213 :
214 : // Set fractional search model
215 : // 0: search all blocks
216 : // 1: selective based on Full-Search SAD & MV.
217 : // 2: off
218 7197 : if (context_ptr->me_context_ptr->use_subpel_flag == 1) {
219 7199 : if (enc_mode <= ENC_M6)
220 3600 : context_ptr->me_context_ptr->fractional_search_model = 0;
221 : else
222 3599 : context_ptr->me_context_ptr->fractional_search_model = 1;
223 : }
224 : else
225 0 : context_ptr->me_context_ptr->fractional_search_model = 2;
226 :
227 : // HME Search Method
228 7197 : if (picture_control_set_ptr->sc_content_detected)
229 0 : if (enc_mode <= ENC_M6)
230 0 : context_ptr->me_context_ptr->hme_search_method = FULL_SAD_SEARCH;
231 : else
232 0 : context_ptr->me_context_ptr->hme_search_method = SUB_SAD_SEARCH;
233 : else
234 7197 : context_ptr->me_context_ptr->hme_search_method = FULL_SAD_SEARCH;
235 : // ME Search Method
236 7197 : if (picture_control_set_ptr->sc_content_detected)
237 0 : if (enc_mode <= ENC_M3)
238 0 : context_ptr->me_context_ptr->me_search_method = FULL_SAD_SEARCH;
239 : else
240 0 : context_ptr->me_context_ptr->me_search_method = SUB_SAD_SEARCH;
241 : else
242 7197 : context_ptr->me_context_ptr->me_search_method = (enc_mode <= ENC_M1) ?
243 7197 : FULL_SAD_SEARCH :
244 : SUB_SAD_SEARCH;
245 :
246 7197 : if (sequence_control_set_ptr->static_config.enable_global_motion == EB_TRUE)
247 : {
248 7200 : if (enc_mode == ENC_M0)
249 3600 : context_ptr->me_context_ptr->compute_global_motion = EB_TRUE;
250 : else
251 3600 : context_ptr->me_context_ptr->compute_global_motion = EB_FALSE;
252 : }
253 : else
254 0 : context_ptr->me_context_ptr->compute_global_motion = EB_FALSE;
255 :
256 7197 : return return_error;
257 : };
258 : #else
259 : EbErrorType signal_derivation_me_kernel_oq(
260 : SequenceControlSet *sequence_control_set_ptr,
261 : PictureParentControlSet *picture_control_set_ptr,
262 : MotionEstimationContext_t *context_ptr) {
263 : EbErrorType return_error = EB_ErrorNone;
264 :
265 : // Set ME/HME search regions
266 : if (sequence_control_set_ptr->static_config.use_default_me_hme)
267 : set_me_hme_params_oq(
268 : context_ptr->me_context_ptr,
269 : picture_control_set_ptr,
270 : sequence_control_set_ptr,
271 : sequence_control_set_ptr->input_resolution);
272 :
273 : else
274 : set_me_hme_params_from_config(
275 : sequence_control_set_ptr,
276 : context_ptr->me_context_ptr);
277 : if (picture_control_set_ptr->sc_content_detected)
278 : context_ptr->me_context_ptr->fractional_search_method = SUB_SAD_SEARCH;
279 : else
280 : if (picture_control_set_ptr->enc_mode <= ENC_M6)
281 : context_ptr->me_context_ptr->fractional_search_method = SSD_SEARCH ;
282 : else
283 : context_ptr->me_context_ptr->fractional_search_method = FULL_SAD_SEARCH;
284 : if (picture_control_set_ptr->sc_content_detected)
285 : context_ptr->me_context_ptr->fractional_search64x64 = EB_FALSE;
286 : else
287 : context_ptr->me_context_ptr->fractional_search64x64 = EB_TRUE;
288 :
289 : // Set HME flags
290 : context_ptr->me_context_ptr->enable_hme_flag = picture_control_set_ptr->enable_hme_flag;
291 : context_ptr->me_context_ptr->enable_hme_level0_flag = picture_control_set_ptr->enable_hme_level0_flag;
292 : context_ptr->me_context_ptr->enable_hme_level1_flag = picture_control_set_ptr->enable_hme_level1_flag;
293 : context_ptr->me_context_ptr->enable_hme_level2_flag = picture_control_set_ptr->enable_hme_level2_flag;
294 :
295 : // Set the default settings of subpel
296 : if (picture_control_set_ptr->sc_content_detected)
297 : if (picture_control_set_ptr->enc_mode <= ENC_M1)
298 : context_ptr->me_context_ptr->use_subpel_flag = 1;
299 : else
300 : context_ptr->me_context_ptr->use_subpel_flag = 0;
301 : else
302 : context_ptr->me_context_ptr->use_subpel_flag = 1;
303 : if (MR_MODE) {
304 : context_ptr->me_context_ptr->half_pel_mode =
305 : EX_HP_MODE;
306 : context_ptr->me_context_ptr->quarter_pel_mode =
307 : EX_QP_MODE;
308 : }
309 : else if (picture_control_set_ptr->enc_mode ==
310 : ENC_M0) {
311 : context_ptr->me_context_ptr->half_pel_mode =
312 : EX_HP_MODE;
313 : context_ptr->me_context_ptr->quarter_pel_mode =
314 : REFINMENT_QP_MODE;
315 : }
316 : else {
317 : context_ptr->me_context_ptr->half_pel_mode =
318 : REFINMENT_HP_MODE;
319 : context_ptr->me_context_ptr->quarter_pel_mode =
320 : REFINMENT_QP_MODE;
321 : }
322 :
323 :
324 :
325 : // Set fractional search model
326 : // 0: search all blocks
327 : // 1: selective based on Full-Search SAD & MV.
328 : // 2: off
329 : if (context_ptr->me_context_ptr->use_subpel_flag == 1) {
330 : if (picture_control_set_ptr->enc_mode <= ENC_M6)
331 : context_ptr->me_context_ptr->fractional_search_model = 0;
332 : else
333 : context_ptr->me_context_ptr->fractional_search_model = 1;
334 : }
335 : else
336 : context_ptr->me_context_ptr->fractional_search_model = 2;
337 :
338 : // HME Search Method
339 : if (picture_control_set_ptr->sc_content_detected)
340 : if (picture_control_set_ptr->enc_mode <= ENC_M6)
341 : context_ptr->me_context_ptr->hme_search_method = FULL_SAD_SEARCH;
342 : else
343 : context_ptr->me_context_ptr->hme_search_method = SUB_SAD_SEARCH;
344 : else
345 : context_ptr->me_context_ptr->hme_search_method = FULL_SAD_SEARCH;
346 : // ME Search Method
347 : if (picture_control_set_ptr->sc_content_detected)
348 : if (picture_control_set_ptr->enc_mode <= ENC_M3)
349 : context_ptr->me_context_ptr->me_search_method = FULL_SAD_SEARCH;
350 : else
351 : context_ptr->me_context_ptr->me_search_method = SUB_SAD_SEARCH;
352 : else
353 : context_ptr->me_context_ptr->me_search_method = (picture_control_set_ptr->enc_mode <= ENC_M1) ?
354 : FULL_SAD_SEARCH :
355 : SUB_SAD_SEARCH;
356 :
357 : if (sequence_control_set_ptr->static_config.enable_global_warped_motion == EB_TRUE)
358 : {
359 : if (enc_mode == ENC_M0
360 : && sequence_control_set_ptr->encoder_bit_depth == EB_8BIT)
361 : context_ptr->me_context_ptr->compute_global_motion = EB_TRUE;
362 : else
363 : context_ptr->me_context_ptr->compute_global_motion = EB_FALSE;
364 : }
365 : else
366 : context_ptr->me_context_ptr->compute_global_motion = EB_FALSE;
367 :
368 : return return_error;
369 : };
370 : #endif
371 :
372 : /************************************************
373 : * Set ME/HME Params for Altref Temporal Filtering
374 : ************************************************/
375 478 : void* tf_set_me_hme_params_oq(
376 : MeContext *me_context_ptr,
377 : PictureParentControlSet *picture_control_set_ptr,
378 : SequenceControlSet *sequence_control_set_ptr,
379 : EbInputResolution input_resolution)
380 : {
381 : UNUSED(sequence_control_set_ptr);
382 : #if TWO_PASS_USE_2NDP_ME_IN_1STP
383 478 : uint8_t hmeMeLevel = sequence_control_set_ptr->use_output_stat_file ? picture_control_set_ptr->snd_pass_enc_mode : picture_control_set_ptr->enc_mode;
384 : #else
385 : uint8_t hmeMeLevel = picture_control_set_ptr->enc_mode; // OMK to be revised after new presets
386 : #endif
387 :
388 : // HME/ME default settings
389 478 : me_context_ptr->number_hme_search_region_in_width = 2;
390 478 : me_context_ptr->number_hme_search_region_in_height = 2;
391 :
392 478 : uint8_t sc_content_detected = picture_control_set_ptr->sc_content_detected;
393 :
394 : // HME Level0
395 478 : me_context_ptr->hme_level0_total_search_area_width = tf_hme_level0_total_search_area_width[sc_content_detected][input_resolution][hmeMeLevel];
396 478 : me_context_ptr->hme_level0_total_search_area_height = tf_hme_level0_total_search_area_height[sc_content_detected][input_resolution][hmeMeLevel];
397 478 : me_context_ptr->hme_level0_search_area_in_width_array[0] = tf_hme_level0_search_area_in_width_array_right[sc_content_detected][input_resolution][hmeMeLevel];
398 478 : me_context_ptr->hme_level0_search_area_in_width_array[1] = tf_hme_level0_search_area_in_width_array_left[sc_content_detected][input_resolution][hmeMeLevel];
399 478 : me_context_ptr->hme_level0_search_area_in_height_array[0] = tf_hme_level0_search_area_in_height_array_top[sc_content_detected][input_resolution][hmeMeLevel];
400 478 : me_context_ptr->hme_level0_search_area_in_height_array[1] = tf_hme_level0_search_area_in_height_array_bottom[sc_content_detected][input_resolution][hmeMeLevel];
401 : // HME Level1
402 478 : me_context_ptr->hme_level1_search_area_in_width_array[0] = tf_hme_level1_search_area_in_width_array_right[sc_content_detected][input_resolution][hmeMeLevel];
403 478 : me_context_ptr->hme_level1_search_area_in_width_array[1] = tf_hme_level1_search_area_in_width_array_left[sc_content_detected][input_resolution][hmeMeLevel];
404 478 : me_context_ptr->hme_level1_search_area_in_height_array[0] = tf_hme_level1_search_area_in_height_array_top[sc_content_detected][input_resolution][hmeMeLevel];
405 478 : me_context_ptr->hme_level1_search_area_in_height_array[1] = tf_hme_level1_search_area_in_height_array_bottom[sc_content_detected][input_resolution][hmeMeLevel];
406 : // HME Level2
407 478 : me_context_ptr->hme_level2_search_area_in_width_array[0] = tf_hme_level2_search_area_in_width_array_right[sc_content_detected][input_resolution][hmeMeLevel];
408 478 : me_context_ptr->hme_level2_search_area_in_width_array[1] = tf_hme_level2_search_area_in_width_array_left[sc_content_detected][input_resolution][hmeMeLevel];
409 478 : me_context_ptr->hme_level2_search_area_in_height_array[0] = tf_hme_level2_search_area_in_height_array_top[sc_content_detected][input_resolution][hmeMeLevel];
410 478 : me_context_ptr->hme_level2_search_area_in_height_array[1] = tf_hme_level2_search_area_in_height_array_bottom[sc_content_detected][input_resolution][hmeMeLevel];
411 :
412 : // ME
413 478 : me_context_ptr->search_area_width = tf_search_area_width[sc_content_detected][input_resolution][hmeMeLevel];
414 478 : me_context_ptr->search_area_height = tf_search_area_height[sc_content_detected][input_resolution][hmeMeLevel];
415 :
416 478 : assert(me_context_ptr->search_area_width <= MAX_SEARCH_AREA_WIDTH && "increase MAX_SEARCH_AREA_WIDTH");
417 478 : assert(me_context_ptr->search_area_height <= MAX_SEARCH_AREA_HEIGHT && "increase MAX_SEARCH_AREA_HEIGHT");
418 :
419 478 : me_context_ptr->update_hme_search_center_flag = 1;
420 :
421 478 : if (input_resolution <= INPUT_SIZE_576p_RANGE_OR_LOWER)
422 478 : me_context_ptr->update_hme_search_center_flag = 0;
423 :
424 478 : return EB_NULL;
425 : };
426 :
427 : /******************************************************
428 : * Derive ME Settings for OQ for Altref Temporal Filtering
429 : Input : encoder mode and tune
430 : Output : ME Kernel signal(s)
431 : ******************************************************/
432 : #if TWO_PASS_USE_2NDP_ME_IN_1STP
433 478 : EbErrorType tf_signal_derivation_me_kernel_oq(
434 : SequenceControlSet *sequence_control_set_ptr,
435 : PictureParentControlSet *picture_control_set_ptr,
436 : MotionEstimationContext_t *context_ptr) {
437 478 : EbErrorType return_error = EB_ErrorNone;
438 478 : uint8_t enc_mode = sequence_control_set_ptr->use_output_stat_file ?
439 : picture_control_set_ptr->snd_pass_enc_mode : picture_control_set_ptr->enc_mode;
440 : // Set ME/HME search regions
441 478 : tf_set_me_hme_params_oq(
442 : context_ptr->me_context_ptr,
443 : picture_control_set_ptr,
444 : sequence_control_set_ptr,
445 478 : sequence_control_set_ptr->input_resolution);
446 :
447 478 : if (picture_control_set_ptr->sc_content_detected)
448 0 : if (enc_mode <= ENC_M1)
449 0 : context_ptr->me_context_ptr->fractional_search_method = SSD_SEARCH;
450 : else
451 0 : context_ptr->me_context_ptr->fractional_search_method = SUB_SAD_SEARCH;
452 : else
453 478 : if (enc_mode <= ENC_M6)
454 239 : context_ptr->me_context_ptr->fractional_search_method = SSD_SEARCH;
455 : else
456 239 : context_ptr->me_context_ptr->fractional_search_method = FULL_SAD_SEARCH;
457 478 : if (picture_control_set_ptr->sc_content_detected)
458 0 : if (enc_mode <= ENC_M1)
459 0 : context_ptr->me_context_ptr->fractional_search64x64 = EB_TRUE;
460 : else
461 0 : context_ptr->me_context_ptr->fractional_search64x64 = EB_FALSE;
462 : else
463 478 : context_ptr->me_context_ptr->fractional_search64x64 = EB_TRUE;
464 :
465 : // Set HME flags
466 478 : context_ptr->me_context_ptr->enable_hme_flag = picture_control_set_ptr->tf_enable_hme_flag;
467 478 : context_ptr->me_context_ptr->enable_hme_level0_flag = picture_control_set_ptr->tf_enable_hme_level0_flag;
468 478 : context_ptr->me_context_ptr->enable_hme_level1_flag = picture_control_set_ptr->tf_enable_hme_level1_flag;
469 478 : context_ptr->me_context_ptr->enable_hme_level2_flag = picture_control_set_ptr->tf_enable_hme_level2_flag;
470 :
471 : // Set the default settings of subpel
472 478 : if (picture_control_set_ptr->sc_content_detected)
473 0 : if (enc_mode <= ENC_M1)
474 0 : context_ptr->me_context_ptr->use_subpel_flag = 1;
475 : else
476 0 : context_ptr->me_context_ptr->use_subpel_flag = 0;
477 : else
478 478 : context_ptr->me_context_ptr->use_subpel_flag = 1;
479 : if (MR_MODE) {
480 : context_ptr->me_context_ptr->half_pel_mode =
481 : EX_HP_MODE;
482 : context_ptr->me_context_ptr->quarter_pel_mode =
483 : EX_QP_MODE;
484 : }
485 478 : else if (enc_mode == ENC_M0) {
486 240 : context_ptr->me_context_ptr->half_pel_mode =
487 : EX_HP_MODE;
488 240 : context_ptr->me_context_ptr->quarter_pel_mode =
489 : REFINMENT_QP_MODE;
490 : }
491 : else {
492 238 : context_ptr->me_context_ptr->half_pel_mode =
493 : REFINMENT_HP_MODE;
494 238 : context_ptr->me_context_ptr->quarter_pel_mode =
495 : REFINMENT_QP_MODE;
496 : }
497 : // Set fractional search model
498 : // 0: search all blocks
499 : // 1: selective based on Full-Search SAD & MV.
500 : // 2: off
501 478 : if (context_ptr->me_context_ptr->use_subpel_flag == 1) {
502 479 : if (enc_mode <= ENC_M6)
503 240 : context_ptr->me_context_ptr->fractional_search_model = 0;
504 : else
505 239 : context_ptr->me_context_ptr->fractional_search_model = 1;
506 : }
507 : else
508 0 : context_ptr->me_context_ptr->fractional_search_model = 2;
509 :
510 : // HME Search Method
511 478 : if (picture_control_set_ptr->sc_content_detected)
512 0 : if (enc_mode <= ENC_M6)
513 0 : context_ptr->me_context_ptr->hme_search_method = FULL_SAD_SEARCH;
514 : else
515 0 : context_ptr->me_context_ptr->hme_search_method = SUB_SAD_SEARCH;
516 : else
517 478 : context_ptr->me_context_ptr->hme_search_method = FULL_SAD_SEARCH;
518 : // ME Search Method
519 478 : if (picture_control_set_ptr->sc_content_detected)
520 0 : if (enc_mode <= ENC_M3)
521 0 : context_ptr->me_context_ptr->me_search_method = FULL_SAD_SEARCH;
522 : else
523 0 : context_ptr->me_context_ptr->me_search_method = SUB_SAD_SEARCH;
524 : else
525 478 : context_ptr->me_context_ptr->me_search_method = (enc_mode <= ENC_M1) ?
526 478 : FULL_SAD_SEARCH :
527 : SUB_SAD_SEARCH;
528 478 : return return_error;
529 : };
530 : #else
531 : EbErrorType tf_signal_derivation_me_kernel_oq(
532 : SequenceControlSet *sequence_control_set_ptr,
533 : PictureParentControlSet *picture_control_set_ptr,
534 : MotionEstimationContext_t *context_ptr) {
535 : EbErrorType return_error = EB_ErrorNone;
536 :
537 : // Set ME/HME search regions
538 : tf_set_me_hme_params_oq(
539 : context_ptr->me_context_ptr,
540 : picture_control_set_ptr,
541 : sequence_control_set_ptr,
542 : sequence_control_set_ptr->input_resolution);
543 :
544 : if (picture_control_set_ptr->sc_content_detected)
545 : if (picture_control_set_ptr->enc_mode <= ENC_M1)
546 : context_ptr->me_context_ptr->fractional_search_method = SSD_SEARCH;
547 : else
548 : context_ptr->me_context_ptr->fractional_search_method = SUB_SAD_SEARCH;
549 : else
550 : if (picture_control_set_ptr->enc_mode <= ENC_M6)
551 : context_ptr->me_context_ptr->fractional_search_method = SSD_SEARCH;
552 : else
553 : context_ptr->me_context_ptr->fractional_search_method = FULL_SAD_SEARCH;
554 : if (picture_control_set_ptr->sc_content_detected)
555 : if (picture_control_set_ptr->enc_mode <= ENC_M1)
556 : context_ptr->me_context_ptr->fractional_search64x64 = EB_TRUE;
557 : else
558 : context_ptr->me_context_ptr->fractional_search64x64 = EB_FALSE;
559 : else
560 : context_ptr->me_context_ptr->fractional_search64x64 = EB_TRUE;
561 :
562 : // Set HME flags
563 : context_ptr->me_context_ptr->enable_hme_flag = picture_control_set_ptr->tf_enable_hme_flag;
564 : context_ptr->me_context_ptr->enable_hme_level0_flag = picture_control_set_ptr->tf_enable_hme_level0_flag;
565 : context_ptr->me_context_ptr->enable_hme_level1_flag = picture_control_set_ptr->tf_enable_hme_level1_flag;
566 : context_ptr->me_context_ptr->enable_hme_level2_flag = picture_control_set_ptr->tf_enable_hme_level2_flag;
567 :
568 : // Set the default settings of subpel
569 : if (picture_control_set_ptr->sc_content_detected)
570 : if (picture_control_set_ptr->enc_mode <= ENC_M1)
571 : context_ptr->me_context_ptr->use_subpel_flag = 1;
572 : else
573 : context_ptr->me_context_ptr->use_subpel_flag = 0;
574 : else
575 : context_ptr->me_context_ptr->use_subpel_flag = 1;
576 : if (MR_MODE) {
577 : context_ptr->me_context_ptr->half_pel_mode =
578 : EX_HP_MODE;
579 : context_ptr->me_context_ptr->quarter_pel_mode =
580 : EX_QP_MODE;
581 : }
582 : else if (picture_control_set_ptr->enc_mode ==
583 : ENC_M0) {
584 : context_ptr->me_context_ptr->half_pel_mode =
585 : EX_HP_MODE;
586 : context_ptr->me_context_ptr->quarter_pel_mode =
587 : REFINMENT_QP_MODE;
588 : }
589 : else {
590 : context_ptr->me_context_ptr->half_pel_mode =
591 : REFINMENT_HP_MODE;
592 : context_ptr->me_context_ptr->quarter_pel_mode =
593 : REFINMENT_QP_MODE;
594 : }
595 :
596 :
597 : // Set fractional search model
598 : // 0: search all blocks
599 : // 1: selective based on Full-Search SAD & MV.
600 : // 2: off
601 : if (context_ptr->me_context_ptr->use_subpel_flag == 1) {
602 : if (picture_control_set_ptr->enc_mode <= ENC_M6)
603 : context_ptr->me_context_ptr->fractional_search_model = 0;
604 : else
605 : context_ptr->me_context_ptr->fractional_search_model = 1;
606 : }
607 : else
608 : context_ptr->me_context_ptr->fractional_search_model = 2;
609 :
610 : // HME Search Method
611 : if (picture_control_set_ptr->sc_content_detected)
612 : if (picture_control_set_ptr->enc_mode <= ENC_M6)
613 : context_ptr->me_context_ptr->hme_search_method = FULL_SAD_SEARCH;
614 : else
615 : context_ptr->me_context_ptr->hme_search_method = SUB_SAD_SEARCH;
616 : else
617 : context_ptr->me_context_ptr->hme_search_method = FULL_SAD_SEARCH;
618 : // ME Search Method
619 : if (picture_control_set_ptr->sc_content_detected)
620 : if (picture_control_set_ptr->enc_mode <= ENC_M3)
621 : context_ptr->me_context_ptr->me_search_method = FULL_SAD_SEARCH;
622 : else
623 : context_ptr->me_context_ptr->me_search_method = SUB_SAD_SEARCH;
624 : else
625 : context_ptr->me_context_ptr->me_search_method = (picture_control_set_ptr->enc_mode <= ENC_M1) ?
626 : FULL_SAD_SEARCH :
627 : SUB_SAD_SEARCH;
628 : return return_error;
629 : };
630 : #endif
631 8 : void motion_estimation_context_dctor(EbPtr p)
632 : {
633 8 : MotionEstimationContext_t* obj = (MotionEstimationContext_t*)p;
634 8 : EB_DELETE(obj->me_context_ptr);
635 8 : }
636 :
637 : /************************************************
638 : * Motion Analysis Context Constructor
639 : ************************************************/
640 8 : EbErrorType motion_estimation_context_ctor(
641 : MotionEstimationContext_t *context_ptr,
642 : EbFifo *picture_decision_results_input_fifo_ptr,
643 : EbFifo *motion_estimation_results_output_fifo_ptr,
644 : uint16_t max_input_luma_width,
645 : uint16_t max_input_luma_height,
646 : uint8_t nsq_present,
647 : uint8_t mrp_mode) {
648 :
649 8 : context_ptr->dctor = motion_estimation_context_dctor;
650 :
651 8 : context_ptr->picture_decision_results_input_fifo_ptr = picture_decision_results_input_fifo_ptr;
652 8 : context_ptr->motion_estimation_results_output_fifo_ptr = motion_estimation_results_output_fifo_ptr;
653 8 : EB_NEW(
654 : context_ptr->me_context_ptr,
655 : me_context_ctor,
656 : max_input_luma_width,
657 : max_input_luma_height,
658 : nsq_present,
659 : mrp_mode);
660 8 : return EB_ErrorNone;
661 : }
662 :
663 : /***************************************************************************************************
664 : * ZZ Decimated SAD Computation
665 : ***************************************************************************************************/
666 7080 : EbErrorType ComputeDecimatedZzSad(
667 : MotionEstimationContext_t *context_ptr,
668 : SequenceControlSet *sequence_control_set_ptr,
669 : PictureParentControlSet *picture_control_set_ptr,
670 : EbPictureBufferDesc *sixteenth_decimated_picture_ptr,
671 : uint32_t xLcuStartIndex,
672 : uint32_t xLcuEndIndex,
673 : uint32_t yLcuStartIndex,
674 : uint32_t yLcuEndIndex) {
675 7080 : EbErrorType return_error = EB_ErrorNone;
676 :
677 7080 : PictureParentControlSet *previous_picture_control_set_wrapper_ptr = ((PictureParentControlSet*)picture_control_set_ptr->previous_picture_control_set_wrapper_ptr->object_ptr);
678 7080 : EbPictureBufferDesc *previousInputPictureFull = previous_picture_control_set_wrapper_ptr->enhanced_picture_ptr;
679 :
680 : uint32_t sb_index;
681 :
682 : uint32_t sb_width;
683 : uint32_t sb_height;
684 :
685 : uint32_t decimatedLcuWidth;
686 : uint32_t decimatedLcuHeight;
687 :
688 : uint32_t sb_origin_x;
689 : uint32_t sb_origin_y;
690 :
691 : uint32_t blkDisplacementDecimated;
692 : uint32_t blkDisplacementFull;
693 :
694 : uint32_t decimatedLcuCollocatedSad;
695 :
696 : uint32_t x_lcu_index;
697 : uint32_t y_lcu_index;
698 :
699 14160 : for (y_lcu_index = yLcuStartIndex; y_lcu_index < yLcuEndIndex; ++y_lcu_index) {
700 14160 : for (x_lcu_index = xLcuStartIndex; x_lcu_index < xLcuEndIndex; ++x_lcu_index) {
701 7080 : sb_index = x_lcu_index + y_lcu_index * sequence_control_set_ptr->picture_width_in_sb;
702 7080 : SbParams *sb_params = &sequence_control_set_ptr->sb_params_array[sb_index];
703 :
704 7080 : sb_width = sb_params->width;
705 7080 : sb_height = sb_params->height;
706 :
707 7080 : sb_origin_x = sb_params->origin_x;
708 7080 : sb_origin_y = sb_params->origin_y;
709 :
710 7080 : sb_width = sb_params->width;
711 7080 : sb_height = sb_params->height;
712 :
713 7080 : decimatedLcuWidth = sb_width >> 2;
714 7080 : decimatedLcuHeight = sb_height >> 2;
715 :
716 7080 : decimatedLcuCollocatedSad = 0;
717 :
718 7080 : if (sb_params->is_complete_sb)
719 : {
720 5900 : blkDisplacementDecimated = (sixteenth_decimated_picture_ptr->origin_y + (sb_origin_y >> 2)) * sixteenth_decimated_picture_ptr->stride_y + sixteenth_decimated_picture_ptr->origin_x + (sb_origin_x >> 2);
721 5900 : blkDisplacementFull = (previousInputPictureFull->origin_y + sb_origin_y)* previousInputPictureFull->stride_y + (previousInputPictureFull->origin_x + sb_origin_x);
722 :
723 : // 1/16 collocated SB decimation
724 5900 : decimation_2d(
725 5900 : &previousInputPictureFull->buffer_y[blkDisplacementFull],
726 5900 : previousInputPictureFull->stride_y,
727 : BLOCK_SIZE_64,
728 : BLOCK_SIZE_64,
729 5900 : context_ptr->me_context_ptr->sixteenth_sb_buffer,
730 5900 : context_ptr->me_context_ptr->sixteenth_sb_buffer_stride,
731 : 4);
732 :
733 : // ZZ SAD between 1/16 current & 1/16 collocated
734 5900 : decimatedLcuCollocatedSad = nxm_sad_kernel(
735 5900 : &(sixteenth_decimated_picture_ptr->buffer_y[blkDisplacementDecimated]),
736 5900 : sixteenth_decimated_picture_ptr->stride_y,
737 5900 : context_ptr->me_context_ptr->sixteenth_sb_buffer,
738 5900 : context_ptr->me_context_ptr->sixteenth_sb_buffer_stride,
739 : 16, 16);
740 : }
741 : else {
742 1180 : decimatedLcuCollocatedSad = (uint32_t)~0;
743 : }
744 : // Keep track of non moving LCUs for QP modulation
745 7080 : if (decimatedLcuCollocatedSad < ((decimatedLcuWidth * decimatedLcuHeight) * 2))
746 2614 : previous_picture_control_set_wrapper_ptr->non_moving_index_array[sb_index] = BEA_CLASS_0_ZZ_COST;
747 4466 : else if (decimatedLcuCollocatedSad < ((decimatedLcuWidth * decimatedLcuHeight) * 4))
748 148 : previous_picture_control_set_wrapper_ptr->non_moving_index_array[sb_index] = BEA_CLASS_1_ZZ_COST;
749 4318 : else if (decimatedLcuCollocatedSad < ((decimatedLcuWidth * decimatedLcuHeight) * 8))
750 276 : previous_picture_control_set_wrapper_ptr->non_moving_index_array[sb_index] = BEA_CLASS_2_ZZ_COST;
751 : else
752 4042 : previous_picture_control_set_wrapper_ptr->non_moving_index_array[sb_index] = BEA_CLASS_3_ZZ_COST;
753 : }
754 : }
755 :
756 7080 : return return_error;
757 : }
758 :
759 : /************************************************
760 : * Motion Analysis Kernel
761 : * The Motion Analysis performs Motion Estimation
762 : * This process has access to the current input picture as well as
763 : * the input pictures, which the current picture references according
764 : * to the prediction structure pattern. The Motion Analysis process is multithreaded,
765 : * so pictures can be processed out of order as long as all inputs are available.
766 : ************************************************/
767 7 : void* motion_estimation_kernel(void *input_ptr)
768 : {
769 7 : MotionEstimationContext_t *context_ptr = (MotionEstimationContext_t*)input_ptr;
770 :
771 : PictureParentControlSet *picture_control_set_ptr;
772 : SequenceControlSet *sequence_control_set_ptr;
773 :
774 : EbObjectWrapper *inputResultsWrapperPtr;
775 : PictureDecisionResults *inputResultsPtr;
776 :
777 : EbObjectWrapper *outputResultsWrapperPtr;
778 : MotionEstimationResults *outputResultsPtr;
779 :
780 : EbPictureBufferDesc *input_picture_ptr;
781 :
782 : EbPictureBufferDesc *input_padded_picture_ptr;
783 :
784 : uint32_t bufferIndex;
785 :
786 : uint32_t sb_index;
787 : uint32_t x_lcu_index;
788 : uint32_t y_lcu_index;
789 : uint32_t picture_width_in_sb;
790 : uint32_t picture_height_in_sb;
791 : uint32_t sb_origin_x;
792 : uint32_t sb_origin_y;
793 : uint32_t sb_width;
794 : uint32_t sb_height;
795 : uint32_t lcuRow;
796 :
797 : EbPaReferenceObject *paReferenceObject;
798 : EbPictureBufferDesc *quarter_picture_ptr;
799 : EbPictureBufferDesc *sixteenth_picture_ptr;
800 : // Segments
801 : uint32_t segment_index;
802 : uint32_t xSegmentIndex;
803 : uint32_t ySegmentIndex;
804 : uint32_t xLcuStartIndex;
805 : uint32_t xLcuEndIndex;
806 : uint32_t yLcuStartIndex;
807 : uint32_t yLcuEndIndex;
808 :
809 : uint32_t intra_sad_interval_index;
810 :
811 : for (;;) {
812 : // Get Input Full Object
813 7687 : eb_get_full_object(
814 : context_ptr->picture_decision_results_input_fifo_ptr,
815 : &inputResultsWrapperPtr);
816 :
817 7677 : inputResultsPtr = (PictureDecisionResults*)inputResultsWrapperPtr->object_ptr;
818 7677 : picture_control_set_ptr = (PictureParentControlSet*)inputResultsPtr->picture_control_set_wrapper_ptr->object_ptr;
819 7677 : sequence_control_set_ptr = (SequenceControlSet*)picture_control_set_ptr->sequence_control_set_wrapper_ptr->object_ptr;
820 :
821 7677 : paReferenceObject = (EbPaReferenceObject*)picture_control_set_ptr->pa_reference_picture_wrapper_ptr->object_ptr;
822 : // Set 1/4 and 1/16 ME input buffer(s); filtered or decimated
823 15354 : quarter_picture_ptr = (sequence_control_set_ptr->down_sampling_method_me_search == ME_FILTERED_DOWNSAMPLED) ?
824 7677 : (EbPictureBufferDesc*)paReferenceObject->quarter_filtered_picture_ptr :
825 : (EbPictureBufferDesc*)paReferenceObject->quarter_decimated_picture_ptr;
826 :
827 15354 : sixteenth_picture_ptr = (sequence_control_set_ptr->down_sampling_method_me_search == ME_FILTERED_DOWNSAMPLED) ?
828 7677 : (EbPictureBufferDesc*)paReferenceObject->sixteenth_filtered_picture_ptr :
829 : (EbPictureBufferDesc*)paReferenceObject->sixteenth_decimated_picture_ptr;
830 7677 : input_padded_picture_ptr = (EbPictureBufferDesc*)paReferenceObject->input_padded_picture_ptr;
831 :
832 7677 : input_picture_ptr = picture_control_set_ptr->enhanced_picture_ptr;
833 :
834 7677 : context_ptr->me_context_ptr->me_alt_ref = inputResultsPtr->task_type == 1 ? EB_TRUE : EB_FALSE;
835 :
836 : // Lambda Assignement
837 7677 : if (sequence_control_set_ptr->static_config.pred_structure == EB_PRED_RANDOM_ACCESS) {
838 7680 : if (picture_control_set_ptr->temporal_layer_index == 0)
839 1080 : context_ptr->me_context_ptr->lambda = lambda_mode_decision_ra_sad[picture_control_set_ptr->picture_qp];
840 6600 : else if (picture_control_set_ptr->temporal_layer_index < 3)
841 1560 : context_ptr->me_context_ptr->lambda = lambda_mode_decision_ra_sad_qp_scaling_l1[picture_control_set_ptr->picture_qp];
842 : else
843 5040 : context_ptr->me_context_ptr->lambda = lambda_mode_decision_ra_sad_qp_scaling_l3[picture_control_set_ptr->picture_qp];
844 : }
845 : else {
846 0 : if (picture_control_set_ptr->temporal_layer_index == 0)
847 0 : context_ptr->me_context_ptr->lambda = lambda_mode_decision_ld_sad[picture_control_set_ptr->picture_qp];
848 : else
849 0 : context_ptr->me_context_ptr->lambda = lambda_mode_decision_ld_sad_qp_scaling[picture_control_set_ptr->picture_qp];
850 : }
851 7677 : if (inputResultsPtr->task_type == 0)
852 : {
853 : // ME Kernel Signal(s) derivation
854 7200 : signal_derivation_me_kernel_oq(
855 : sequence_control_set_ptr,
856 : picture_control_set_ptr,
857 : context_ptr);
858 :
859 : #if GLOBAL_WARPED_MOTION
860 : // Global motion estimation
861 : // Compute only for the first fragment.
862 : // TODO: create an other kernel ?
863 7199 : if (context_ptr->me_context_ptr->compute_global_motion
864 3599 : && inputResultsPtr->segment_index == 0)
865 60 : global_motion_estimation(picture_control_set_ptr,
866 : context_ptr->me_context_ptr,
867 : input_picture_ptr);
868 : #endif
869 :
870 : // Segments
871 7199 : segment_index = inputResultsPtr->segment_index;
872 7199 : picture_width_in_sb = (sequence_control_set_ptr->seq_header.max_frame_width + sequence_control_set_ptr->sb_sz - 1) / sequence_control_set_ptr->sb_sz;
873 7199 : picture_height_in_sb = (sequence_control_set_ptr->seq_header.max_frame_height + sequence_control_set_ptr->sb_sz - 1) / sequence_control_set_ptr->sb_sz;
874 7199 : SEGMENT_CONVERT_IDX_TO_XY(segment_index, xSegmentIndex, ySegmentIndex, picture_control_set_ptr->me_segments_column_count);
875 7199 : xLcuStartIndex = SEGMENT_START_IDX(xSegmentIndex, picture_width_in_sb, picture_control_set_ptr->me_segments_column_count);
876 7199 : xLcuEndIndex = SEGMENT_END_IDX(xSegmentIndex, picture_width_in_sb, picture_control_set_ptr->me_segments_column_count);
877 7199 : yLcuStartIndex = SEGMENT_START_IDX(ySegmentIndex, picture_height_in_sb, picture_control_set_ptr->me_segments_row_count);
878 7199 : yLcuEndIndex = SEGMENT_END_IDX(ySegmentIndex, picture_height_in_sb, picture_control_set_ptr->me_segments_row_count);
879 : // *** MOTION ESTIMATION CODE ***
880 7199 : if (picture_control_set_ptr->slice_type != I_SLICE) {
881 : // SB Loop
882 13921 : for (y_lcu_index = yLcuStartIndex; y_lcu_index < yLcuEndIndex; ++y_lcu_index) {
883 13920 : for (x_lcu_index = xLcuStartIndex; x_lcu_index < xLcuEndIndex; ++x_lcu_index) {
884 6959 : sb_index = (uint16_t)(x_lcu_index + y_lcu_index * picture_width_in_sb);
885 6959 : sb_origin_x = x_lcu_index * sequence_control_set_ptr->sb_sz;
886 6959 : sb_origin_y = y_lcu_index * sequence_control_set_ptr->sb_sz;
887 :
888 6959 : sb_width = (sequence_control_set_ptr->seq_header.max_frame_width - sb_origin_x) < BLOCK_SIZE_64 ? sequence_control_set_ptr->seq_header.max_frame_width - sb_origin_x : BLOCK_SIZE_64;
889 6959 : sb_height = (sequence_control_set_ptr->seq_header.max_frame_height - sb_origin_y) < BLOCK_SIZE_64 ? sequence_control_set_ptr->seq_header.max_frame_height - sb_origin_y : BLOCK_SIZE_64;
890 :
891 : // Load the SB from the input to the intermediate SB buffer
892 6959 : bufferIndex = (input_picture_ptr->origin_y + sb_origin_y) * input_picture_ptr->stride_y + input_picture_ptr->origin_x + sb_origin_x;
893 :
894 6959 : context_ptr->me_context_ptr->hme_search_type = HME_RECTANGULAR;
895 :
896 451993 : for (lcuRow = 0; lcuRow < BLOCK_SIZE_64; lcuRow++) {
897 445044 : EB_MEMCPY((&(context_ptr->me_context_ptr->sb_buffer[lcuRow * BLOCK_SIZE_64])), (&(input_picture_ptr->buffer_y[bufferIndex + lcuRow * input_picture_ptr->stride_y])), BLOCK_SIZE_64 * sizeof(uint8_t));
898 : }
899 :
900 : {
901 6949 : uint8_t * src_ptr = &input_padded_picture_ptr->buffer_y[bufferIndex];
902 :
903 : //_MM_HINT_T0 //_MM_HINT_T1 //_MM_HINT_T2//_MM_HINT_NTA
904 : uint32_t i;
905 424427 : for (i = 0; i < sb_height; i++)
906 : {
907 417481 : char const* p = (char const*)(src_ptr + i * input_padded_picture_ptr->stride_y);
908 417481 : _mm_prefetch(p, _MM_HINT_T2);
909 : }
910 : }
911 :
912 6946 : context_ptr->me_context_ptr->sb_src_ptr = &input_padded_picture_ptr->buffer_y[bufferIndex];
913 6946 : context_ptr->me_context_ptr->sb_src_stride = input_padded_picture_ptr->stride_y;
914 : // Load the 1/4 decimated SB from the 1/4 decimated input to the 1/4 intermediate SB buffer
915 6946 : if (context_ptr->me_context_ptr->enable_hme_level1_flag) {
916 6960 : bufferIndex = (quarter_picture_ptr->origin_y + (sb_origin_y >> 1)) * quarter_picture_ptr->stride_y + quarter_picture_ptr->origin_x + (sb_origin_x >> 1);
917 :
918 215665 : for (lcuRow = 0; lcuRow < (sb_height >> 1); lcuRow++) {
919 208706 : EB_MEMCPY((&(context_ptr->me_context_ptr->quarter_sb_buffer[lcuRow * context_ptr->me_context_ptr->quarter_sb_buffer_stride])), (&(quarter_picture_ptr->buffer_y[bufferIndex + lcuRow * quarter_picture_ptr->stride_y])), (sb_width >> 1) * sizeof(uint8_t));
920 : }
921 : }
922 :
923 : // Load the 1/16 decimated SB from the 1/16 decimated input to the 1/16 intermediate SB buffer
924 6945 : if (context_ptr->me_context_ptr->enable_hme_level0_flag) {
925 6960 : bufferIndex = (sixteenth_picture_ptr->origin_y + (sb_origin_y >> 2)) * sixteenth_picture_ptr->stride_y + sixteenth_picture_ptr->origin_x + (sb_origin_x >> 2);
926 :
927 : {
928 6960 : uint8_t *framePtr = &sixteenth_picture_ptr->buffer_y[bufferIndex];
929 6960 : uint8_t *localPtr = context_ptr->me_context_ptr->sixteenth_sb_buffer;
930 6960 : if (context_ptr->me_context_ptr->hme_search_method == FULL_SAD_SEARCH) {
931 111331 : for (lcuRow = 0; lcuRow < (sb_height >> 2); lcuRow += 1) {
932 104371 : EB_MEMCPY(localPtr, framePtr, (sb_width >> 2) * sizeof(uint8_t));
933 104372 : localPtr += 16;
934 104372 : framePtr += sixteenth_picture_ptr->stride_y;
935 : }
936 : }
937 : else {
938 1 : for (lcuRow = 0; lcuRow < (sb_height >> 2); lcuRow += 2) {
939 0 : EB_MEMCPY(localPtr, framePtr, (sb_width >> 2) * sizeof(uint8_t));
940 0 : localPtr += 16;
941 0 : framePtr += sixteenth_picture_ptr->stride_y << 1;
942 : }
943 : }
944 : }
945 : }
946 6946 : context_ptr->me_context_ptr->me_alt_ref = EB_FALSE;
947 :
948 6946 : motion_estimate_lcu(
949 : picture_control_set_ptr,
950 : sb_index,
951 : sb_origin_x,
952 : sb_origin_y,
953 : context_ptr->me_context_ptr,
954 : input_picture_ptr);
955 : }
956 : }
957 : }
958 7200 : if ( picture_control_set_ptr->intra_pred_mode > 4)
959 : // *** OPEN LOOP INTRA CANDIDATE SEARCH CODE ***
960 : {
961 : // SB Loop
962 0 : for (y_lcu_index = yLcuStartIndex; y_lcu_index < yLcuEndIndex; ++y_lcu_index) {
963 0 : for (x_lcu_index = xLcuStartIndex; x_lcu_index < xLcuEndIndex; ++x_lcu_index) {
964 0 : sb_origin_x = x_lcu_index * sequence_control_set_ptr->sb_sz;
965 0 : sb_origin_y = y_lcu_index * sequence_control_set_ptr->sb_sz;
966 :
967 0 : sb_index = (uint16_t)(x_lcu_index + y_lcu_index * picture_width_in_sb);
968 :
969 0 : open_loop_intra_search_sb(
970 : picture_control_set_ptr,
971 : sb_index,
972 : context_ptr,
973 : input_picture_ptr);
974 : }
975 : }
976 : }
977 :
978 : // ZZ SADs Computation
979 : // 1 lookahead frame is needed to get valid (0,0) SAD
980 7200 : if (sequence_control_set_ptr->static_config.look_ahead_distance != 0) {
981 : // when DG is ON, the ZZ SADs are computed @ the PD process
982 : {
983 : // ZZ SADs Computation using decimated picture
984 7197 : if (picture_control_set_ptr->picture_number > 0) {
985 7080 : ComputeDecimatedZzSad(
986 : context_ptr,
987 : sequence_control_set_ptr,
988 : picture_control_set_ptr,
989 : (EbPictureBufferDesc*)paReferenceObject->sixteenth_decimated_picture_ptr, // Hsan: always use decimated for ZZ SAD derivation until studying the trade offs and regenerating the activity threshold
990 : xLcuStartIndex,
991 : xLcuEndIndex,
992 : yLcuStartIndex,
993 : yLcuEndIndex);
994 : }
995 : }
996 : }
997 :
998 : // Calculate the ME Distortion and OIS Historgrams
999 :
1000 7200 : eb_block_on_mutex(picture_control_set_ptr->rc_distortion_histogram_mutex);
1001 :
1002 7200 : if (sequence_control_set_ptr->static_config.rate_control_mode) {
1003 0 : if (picture_control_set_ptr->slice_type != I_SLICE) {
1004 : uint16_t sadIntervalIndex;
1005 0 : for (y_lcu_index = yLcuStartIndex; y_lcu_index < yLcuEndIndex; ++y_lcu_index) {
1006 0 : for (x_lcu_index = xLcuStartIndex; x_lcu_index < xLcuEndIndex; ++x_lcu_index) {
1007 0 : sb_origin_x = x_lcu_index * sequence_control_set_ptr->sb_sz;
1008 0 : sb_origin_y = y_lcu_index * sequence_control_set_ptr->sb_sz;
1009 0 : sb_width = (sequence_control_set_ptr->seq_header.max_frame_width - sb_origin_x) < BLOCK_SIZE_64 ? sequence_control_set_ptr->seq_header.max_frame_width - sb_origin_x : BLOCK_SIZE_64;
1010 0 : sb_height = (sequence_control_set_ptr->seq_header.max_frame_height - sb_origin_y) < BLOCK_SIZE_64 ? sequence_control_set_ptr->seq_header.max_frame_height - sb_origin_y : BLOCK_SIZE_64;
1011 :
1012 0 : sb_index = (uint16_t)(x_lcu_index + y_lcu_index * picture_width_in_sb);
1013 0 : picture_control_set_ptr->inter_sad_interval_index[sb_index] = 0;
1014 0 : picture_control_set_ptr->intra_sad_interval_index[sb_index] = 0;
1015 :
1016 0 : if (sb_width == BLOCK_SIZE_64 && sb_height == BLOCK_SIZE_64) {
1017 0 : sadIntervalIndex = (uint16_t)(picture_control_set_ptr->rc_me_distortion[sb_index] >> (12 - SAD_PRECISION_INTERVAL));//change 12 to 2*log2(64)
1018 :
1019 : // printf("%d\n", sadIntervalIndex);
1020 :
1021 0 : sadIntervalIndex = (uint16_t)(sadIntervalIndex >> 2);
1022 0 : if (sadIntervalIndex > (NUMBER_OF_SAD_INTERVALS >> 1) - 1) {
1023 0 : uint16_t sadIntervalIndexTemp = sadIntervalIndex - ((NUMBER_OF_SAD_INTERVALS >> 1) - 1);
1024 :
1025 0 : sadIntervalIndex = ((NUMBER_OF_SAD_INTERVALS >> 1) - 1) + (sadIntervalIndexTemp >> 3);
1026 : }
1027 0 : if (sadIntervalIndex >= NUMBER_OF_SAD_INTERVALS - 1)
1028 0 : sadIntervalIndex = NUMBER_OF_SAD_INTERVALS - 1;
1029 :
1030 0 : picture_control_set_ptr->inter_sad_interval_index[sb_index] = sadIntervalIndex;
1031 :
1032 0 : picture_control_set_ptr->me_distortion_histogram[sadIntervalIndex] ++;
1033 :
1034 0 : intra_sad_interval_index = picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_64x64] >> 4;
1035 0 : intra_sad_interval_index = (uint16_t)(intra_sad_interval_index >> 2);
1036 0 : if (intra_sad_interval_index > (NUMBER_OF_SAD_INTERVALS >> 1) - 1) {
1037 0 : uint32_t sadIntervalIndexTemp = intra_sad_interval_index - ((NUMBER_OF_SAD_INTERVALS >> 1) - 1);
1038 :
1039 0 : intra_sad_interval_index = ((NUMBER_OF_SAD_INTERVALS >> 1) - 1) + (sadIntervalIndexTemp >> 3);
1040 : }
1041 0 : if (intra_sad_interval_index >= NUMBER_OF_SAD_INTERVALS - 1)
1042 0 : intra_sad_interval_index = NUMBER_OF_SAD_INTERVALS - 1;
1043 :
1044 0 : picture_control_set_ptr->intra_sad_interval_index[sb_index] = intra_sad_interval_index;
1045 :
1046 0 : picture_control_set_ptr->ois_distortion_histogram[intra_sad_interval_index] ++;
1047 :
1048 0 : ++picture_control_set_ptr->full_sb_count;
1049 : }
1050 : }
1051 : }
1052 : }
1053 : else {
1054 0 : for (y_lcu_index = yLcuStartIndex; y_lcu_index < yLcuEndIndex; ++y_lcu_index) {
1055 0 : for (x_lcu_index = xLcuStartIndex; x_lcu_index < xLcuEndIndex; ++x_lcu_index) {
1056 0 : sb_origin_x = x_lcu_index * sequence_control_set_ptr->sb_sz;
1057 0 : sb_origin_y = y_lcu_index * sequence_control_set_ptr->sb_sz;
1058 0 : sb_width = (sequence_control_set_ptr->seq_header.max_frame_width - sb_origin_x) < BLOCK_SIZE_64 ? sequence_control_set_ptr->seq_header.max_frame_width - sb_origin_x : BLOCK_SIZE_64;
1059 0 : sb_height = (sequence_control_set_ptr->seq_header.max_frame_height - sb_origin_y) < BLOCK_SIZE_64 ? sequence_control_set_ptr->seq_header.max_frame_height - sb_origin_y : BLOCK_SIZE_64;
1060 :
1061 0 : sb_index = (uint16_t)(x_lcu_index + y_lcu_index * picture_width_in_sb);
1062 :
1063 0 : picture_control_set_ptr->inter_sad_interval_index[sb_index] = 0;
1064 0 : picture_control_set_ptr->intra_sad_interval_index[sb_index] = 0;
1065 :
1066 0 : if (sb_width == BLOCK_SIZE_64 && sb_height == BLOCK_SIZE_64) {
1067 :
1068 0 : intra_sad_interval_index = picture_control_set_ptr->variance[sb_index][ME_TIER_ZERO_PU_64x64] >> 4;
1069 0 : intra_sad_interval_index = (uint16_t)(intra_sad_interval_index >> 2);
1070 0 : if (intra_sad_interval_index > (NUMBER_OF_SAD_INTERVALS >> 1) - 1) {
1071 0 : uint32_t sadIntervalIndexTemp = intra_sad_interval_index - ((NUMBER_OF_SAD_INTERVALS >> 1) - 1);
1072 :
1073 0 : intra_sad_interval_index = ((NUMBER_OF_SAD_INTERVALS >> 1) - 1) + (sadIntervalIndexTemp >> 3);
1074 : }
1075 0 : if (intra_sad_interval_index >= NUMBER_OF_SAD_INTERVALS - 1)
1076 0 : intra_sad_interval_index = NUMBER_OF_SAD_INTERVALS - 1;
1077 :
1078 0 : picture_control_set_ptr->intra_sad_interval_index[sb_index] = intra_sad_interval_index;
1079 :
1080 0 : picture_control_set_ptr->ois_distortion_histogram[intra_sad_interval_index] ++;
1081 :
1082 0 : ++picture_control_set_ptr->full_sb_count;
1083 : }
1084 : }
1085 : }
1086 : }
1087 : }
1088 :
1089 7200 : eb_release_mutex(picture_control_set_ptr->rc_distortion_histogram_mutex);
1090 :
1091 : // Get Empty Results Object
1092 7200 : eb_get_empty_object(
1093 : context_ptr->motion_estimation_results_output_fifo_ptr,
1094 : &outputResultsWrapperPtr);
1095 :
1096 7198 : outputResultsPtr = (MotionEstimationResults*)outputResultsWrapperPtr->object_ptr;
1097 7198 : outputResultsPtr->picture_control_set_wrapper_ptr = inputResultsPtr->picture_control_set_wrapper_ptr;
1098 7198 : outputResultsPtr->segment_index = segment_index;
1099 :
1100 : // Release the Input Results
1101 7198 : eb_release_object(inputResultsWrapperPtr);
1102 :
1103 : // Post the Full Results Object
1104 7200 : eb_post_full_object(outputResultsWrapperPtr);
1105 :
1106 : }
1107 : else {
1108 :
1109 : // ME Kernel Signal(s) derivation
1110 477 : tf_signal_derivation_me_kernel_oq(
1111 : sequence_control_set_ptr,
1112 : picture_control_set_ptr,
1113 : context_ptr);
1114 :
1115 : // temporal filtering start
1116 478 : context_ptr->me_context_ptr->me_alt_ref = EB_TRUE;
1117 478 : svt_av1_init_temporal_filtering(picture_control_set_ptr->temp_filt_pcs_list, picture_control_set_ptr, context_ptr, inputResultsPtr->segment_index);
1118 :
1119 : // Release the Input Results
1120 480 : eb_release_object(inputResultsWrapperPtr);
1121 : }
1122 : }
1123 :
1124 : return EB_NULL;
1125 : }
|